implement remaining subsystem metrics (#1770)

* overseer metrics: messages relayed

* provisioner metrics: cosmetic changes

* candidate selection metrics: cosmetic changes

* availability bitfields metrics

* availability distribution metrics

* PoV distribution metrics

* statement-distribution: small simplification

* statement-distribution: extract log target into a const

* statement-distribution: metrics

* address review nits
This commit is contained in:
Andronik Ordian
2020-10-01 12:08:03 +02:00
committed by GitHub
parent 693d40831d
commit 579614d127
12 changed files with 450 additions and 177 deletions
@@ -49,6 +49,9 @@ use polkadot_subsystem::{
ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem,
SubsystemContext, SubsystemError,
};
use polkadot_node_subsystem_util::{
metrics::{self, prometheus},
};
use polkadot_node_network_protocol::{
v1 as protocol_v1, View, ReputationChange as Rep, PeerId,
NetworkBridgeEvent,
@@ -292,6 +295,7 @@ async fn handle_network_msg<Context>(
ctx: &mut Context,
keystore: KeyStorePtr,
state: &mut ProtocolState,
metrics: &Metrics,
bridge_message: NetworkBridgeEvent<protocol_v1::AvailabilityDistributionMessage>,
) -> Result<()>
where
@@ -307,10 +311,10 @@ where
state.peer_views.remove(&peerid);
}
NetworkBridgeEvent::PeerViewChange(peerid, view) => {
handle_peer_view_change(ctx, state, peerid, view).await?;
handle_peer_view_change(ctx, state, peerid, view, metrics).await?;
}
NetworkBridgeEvent::OurViewChange(view) => {
handle_our_view_change(ctx, keystore, state, view).await?;
handle_our_view_change(ctx, keystore, state, view, metrics).await?;
}
NetworkBridgeEvent::PeerMessage(remote, msg) => {
let gossiped_availability = match msg {
@@ -318,7 +322,7 @@ where
AvailabilityGossipMessage { candidate_hash, erasure_chunk: chunk }
};
process_incoming_peer_message(ctx, state, remote, gossiped_availability).await?;
process_incoming_peer_message(ctx, state, remote, gossiped_availability, metrics).await?;
}
}
Ok(())
@@ -331,6 +335,7 @@ async fn handle_our_view_change<Context>(
keystore: KeyStorePtr,
state: &mut ProtocolState,
view: View,
metrics: &Metrics,
) -> Result<()>
where
Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
@@ -426,7 +431,7 @@ where
erasure_chunk,
};
send_tracked_gossip_message_to_peers(ctx, per_candidate, peers, message).await?;
send_tracked_gossip_message_to_peers(ctx, per_candidate, metrics, peers, message).await?;
}
}
@@ -442,31 +447,34 @@ where
async fn send_tracked_gossip_message_to_peers<Context>(
ctx: &mut Context,
per_candidate: &mut PerCandidate,
metrics: &Metrics,
peers: Vec<PeerId>,
message: AvailabilityGossipMessage,
) -> Result<()>
where
Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
{
send_tracked_gossip_messages_to_peers(ctx, per_candidate, peers, iter::once(message)).await
send_tracked_gossip_messages_to_peers(ctx, per_candidate, metrics, peers, iter::once(message)).await
}
#[inline(always)]
async fn send_tracked_gossip_messages_to_peer<Context>(
ctx: &mut Context,
per_candidate: &mut PerCandidate,
metrics: &Metrics,
peer: PeerId,
message_iter: impl IntoIterator<Item = AvailabilityGossipMessage>,
) -> Result<()>
where
Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
{
send_tracked_gossip_messages_to_peers(ctx, per_candidate, vec![peer], message_iter).await
send_tracked_gossip_messages_to_peers(ctx, per_candidate, metrics, vec![peer], message_iter).await
}
async fn send_tracked_gossip_messages_to_peers<Context>(
ctx: &mut Context,
per_candidate: &mut PerCandidate,
metrics: &Metrics,
peers: Vec<PeerId>,
message_iter: impl IntoIterator<Item = AvailabilityGossipMessage>,
) -> Result<()>
@@ -503,6 +511,8 @@ where
))
.await
.map_err::<Error, _>(Into::into)?;
metrics.on_chunk_distributed();
}
Ok(())
@@ -515,6 +525,7 @@ async fn handle_peer_view_change<Context>(
state: &mut ProtocolState,
origin: PeerId,
view: View,
metrics: &Metrics,
) -> Result<()>
where
Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
@@ -560,7 +571,7 @@ where
.cloned()
.collect::<HashSet<_>>();
send_tracked_gossip_messages_to_peer(ctx, per_candidate, origin.clone(), messages).await?;
send_tracked_gossip_messages_to_peer(ctx, per_candidate, metrics, origin.clone(), messages).await?;
}
Ok(())
}
@@ -588,6 +599,7 @@ async fn process_incoming_peer_message<Context>(
state: &mut ProtocolState,
origin: PeerId,
message: AvailabilityGossipMessage,
metrics: &Metrics,
) -> Result<()>
where
Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
@@ -694,13 +706,15 @@ where
.collect::<Vec<_>>();
// gossip that message to interested peers
send_tracked_gossip_message_to_peers(ctx, per_candidate, peers, message).await
send_tracked_gossip_message_to_peers(ctx, per_candidate, metrics, peers, message).await
}
/// The bitfield distribution subsystem.
pub struct AvailabilityDistributionSubsystem {
/// Pointer to a keystore, which is required for determining this nodes validator index.
keystore: KeyStorePtr,
/// Prometheus metrics.
metrics: Metrics,
}
impl AvailabilityDistributionSubsystem {
@@ -708,8 +722,8 @@ impl AvailabilityDistributionSubsystem {
const K: usize = 3;
/// Create a new instance of the availability distribution.
pub fn new(keystore: KeyStorePtr) -> Self {
Self { keystore }
pub fn new(keystore: KeyStorePtr, metrics: Metrics) -> Self {
Self { keystore, metrics }
}
/// Start processing work as passed on from the Overseer.
@@ -729,7 +743,8 @@ impl AvailabilityDistributionSubsystem {
&mut ctx,
self.keystore.clone(),
&mut state,
event
&self.metrics,
event,
).await {
warn!(
target: TARGET,
@@ -1073,5 +1088,37 @@ where
}
#[derive(Clone)]
struct MetricsInner {
gossipped_availability_chunks: prometheus::Counter<prometheus::U64>,
}
/// Availability Distribution metrics.
#[derive(Default, Clone)]
pub struct Metrics(Option<MetricsInner>);
impl Metrics {
fn on_chunk_distributed(&self) {
if let Some(metrics) = &self.0 {
metrics.gossipped_availability_chunks.inc();
}
}
}
impl metrics::Metrics for Metrics {
fn try_register(registry: &prometheus::Registry) -> std::result::Result<Self, prometheus::PrometheusError> {
let metrics = MetricsInner {
gossipped_availability_chunks: prometheus::register(
prometheus::Counter::new(
"parachain_gossipped_availability_chunks_total",
"Number of availability chunks gossipped to other peers."
)?,
registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
}
#[cfg(test)]
mod tests;
@@ -72,7 +72,7 @@ fn test_harness<T: Future<Output = ()>>(
let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone());
let subsystem = AvailabilityDistributionSubsystem::new(keystore);
let subsystem = AvailabilityDistributionSubsystem::new(keystore, Default::default());
let subsystem = subsystem.run(context);
let test_fut = test(TestHarness { virtual_overseer });