mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-13 21:01:05 +00:00
implement remaining subsystem metrics (#1770)
* overseer metrics: messages relayed * provisioner metrics: cosmetic changes * candidate selection metrics: cosmetic changes * availability bitfields metrics * availability distribution metrics * PoV distribution metrics * statement-distribution: small simplification * statement-distribution: extract log target into a const * statement-distribution: metrics * address review nits
This commit is contained in:
@@ -22,10 +22,13 @@
|
||||
use polkadot_subsystem::{
|
||||
Subsystem, SubsystemResult, SubsystemContext, SpawnedSubsystem,
|
||||
ActiveLeavesUpdate, FromOverseer, OverseerSignal,
|
||||
messages::{
|
||||
AllMessages, NetworkBridgeMessage, StatementDistributionMessage, CandidateBackingMessage,
|
||||
RuntimeApiMessage, RuntimeApiRequest,
|
||||
},
|
||||
};
|
||||
use polkadot_subsystem::messages::{
|
||||
AllMessages, NetworkBridgeMessage, StatementDistributionMessage, CandidateBackingMessage,
|
||||
RuntimeApiMessage, RuntimeApiRequest,
|
||||
use polkadot_node_subsystem_util::{
|
||||
metrics::{self, prometheus},
|
||||
};
|
||||
use node_primitives::SignedFullStatement;
|
||||
use polkadot_primitives::v1::{
|
||||
@@ -59,8 +62,13 @@ const BENEFIT_VALID_STATEMENT_FIRST: Rep = Rep::new(
|
||||
/// Typically we will only keep 1, but when a validator equivocates we will need to track 2.
|
||||
const VC_THRESHOLD: usize = 2;
|
||||
|
||||
const LOG_TARGET: &str = "statement_distribution";
|
||||
|
||||
/// The statement distribution subsystem.
|
||||
pub struct StatementDistribution;
|
||||
pub struct StatementDistribution {
|
||||
// Prometheus metrics
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
impl<C> Subsystem<C> for StatementDistribution
|
||||
where C: SubsystemContext<Message=StatementDistributionMessage>
|
||||
@@ -70,7 +78,7 @@ impl<C> Subsystem<C> for StatementDistribution
|
||||
// within `run`.
|
||||
SpawnedSubsystem {
|
||||
name: "statement-distribution-subsystem",
|
||||
future: run(ctx).map(|_| ()).boxed(),
|
||||
future: self.run(ctx).map(|_| ()).boxed(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -111,14 +119,7 @@ fn note_hash(
|
||||
) -> bool {
|
||||
if observed.contains(&h) { return true; }
|
||||
|
||||
if observed.is_full() {
|
||||
false
|
||||
} else {
|
||||
observed.try_push(h).expect("length of storage guarded above; \
|
||||
only panics if length exceeds capacity; qed");
|
||||
|
||||
true
|
||||
}
|
||||
observed.try_push(h).is_ok()
|
||||
}
|
||||
|
||||
/// knowledge that a peer has about goings-on in a relay parent.
|
||||
@@ -502,6 +503,7 @@ async fn circulate_statement_and_dependents(
|
||||
ctx: &mut impl SubsystemContext<Message = StatementDistributionMessage>,
|
||||
relay_parent: Hash,
|
||||
statement: SignedFullStatement,
|
||||
metrics: &Metrics,
|
||||
) -> SubsystemResult<()> {
|
||||
if let Some(active_head)= active_heads.get_mut(&relay_parent) {
|
||||
|
||||
@@ -529,7 +531,8 @@ async fn circulate_statement_and_dependents(
|
||||
ctx,
|
||||
relay_parent,
|
||||
candidate_hash,
|
||||
&*active_head
|
||||
&*active_head,
|
||||
metrics,
|
||||
).await?;
|
||||
}
|
||||
}
|
||||
@@ -589,6 +592,7 @@ async fn send_statements_about(
|
||||
relay_parent: Hash,
|
||||
candidate_hash: Hash,
|
||||
active_head: &ActiveHeadData,
|
||||
metrics: &Metrics,
|
||||
) -> SubsystemResult<()> {
|
||||
for statement in active_head.statements_about(candidate_hash) {
|
||||
if peer_data.send(&relay_parent, &statement.fingerprint()).is_some() {
|
||||
@@ -600,6 +604,8 @@ async fn send_statements_about(
|
||||
ctx.send_message(AllMessages::NetworkBridge(
|
||||
NetworkBridgeMessage::SendValidationMessage(vec![peer.clone()], payload)
|
||||
)).await?;
|
||||
|
||||
metrics.on_statement_distributed();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -612,7 +618,8 @@ async fn send_statements(
|
||||
peer_data: &mut PeerData,
|
||||
ctx: &mut impl SubsystemContext<Message = StatementDistributionMessage>,
|
||||
relay_parent: Hash,
|
||||
active_head: &ActiveHeadData
|
||||
active_head: &ActiveHeadData,
|
||||
metrics: &Metrics,
|
||||
) -> SubsystemResult<()> {
|
||||
for statement in active_head.statements() {
|
||||
if peer_data.send(&relay_parent, &statement.fingerprint()).is_some() {
|
||||
@@ -624,6 +631,8 @@ async fn send_statements(
|
||||
ctx.send_message(AllMessages::NetworkBridge(
|
||||
NetworkBridgeMessage::SendValidationMessage(vec![peer.clone()], payload)
|
||||
)).await?;
|
||||
|
||||
metrics.on_statement_distributed();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -652,6 +661,7 @@ async fn handle_incoming_message<'a>(
|
||||
active_heads: &'a mut HashMap<Hash, ActiveHeadData>,
|
||||
ctx: &mut impl SubsystemContext<Message = StatementDistributionMessage>,
|
||||
message: protocol_v1::StatementDistributionMessage,
|
||||
metrics: &Metrics,
|
||||
) -> SubsystemResult<Option<(Hash, &'a StoredStatement)>> {
|
||||
let (relay_parent, statement) = match message {
|
||||
protocol_v1::StatementDistributionMessage::Statement(r, s) => (r, s),
|
||||
@@ -697,6 +707,7 @@ async fn handle_incoming_message<'a>(
|
||||
relay_parent,
|
||||
fingerprint.0.candidate_hash().clone(),
|
||||
&*active_head,
|
||||
metrics,
|
||||
).await?
|
||||
}
|
||||
Ok(false) => {}
|
||||
@@ -724,6 +735,7 @@ async fn update_peer_view_and_send_unlocked(
|
||||
ctx: &mut impl SubsystemContext<Message = StatementDistributionMessage>,
|
||||
active_heads: &HashMap<Hash, ActiveHeadData>,
|
||||
new_view: View,
|
||||
metrics: &Metrics,
|
||||
) -> SubsystemResult<()> {
|
||||
let old_view = std::mem::replace(&mut peer_data.view, new_view);
|
||||
|
||||
@@ -745,6 +757,7 @@ async fn update_peer_view_and_send_unlocked(
|
||||
ctx,
|
||||
new,
|
||||
active_head,
|
||||
metrics,
|
||||
).await?;
|
||||
}
|
||||
}
|
||||
@@ -758,6 +771,7 @@ async fn handle_network_update(
|
||||
ctx: &mut impl SubsystemContext<Message = StatementDistributionMessage>,
|
||||
our_view: &mut View,
|
||||
update: NetworkBridgeEvent<protocol_v1::StatementDistributionMessage>,
|
||||
metrics: &Metrics,
|
||||
) -> SubsystemResult<()> {
|
||||
match update {
|
||||
NetworkBridgeEvent::PeerConnected(peer, _role) => {
|
||||
@@ -782,6 +796,7 @@ async fn handle_network_update(
|
||||
active_heads,
|
||||
ctx,
|
||||
message,
|
||||
metrics,
|
||||
).await?;
|
||||
|
||||
if let Some((relay_parent, new)) = new_stored {
|
||||
@@ -808,6 +823,7 @@ async fn handle_network_update(
|
||||
ctx,
|
||||
&*active_heads,
|
||||
view,
|
||||
metrics,
|
||||
).await
|
||||
}
|
||||
None => Ok(()),
|
||||
@@ -819,7 +835,7 @@ async fn handle_network_update(
|
||||
|
||||
for new in our_view.difference(&old_view) {
|
||||
if !active_heads.contains_key(&new) {
|
||||
log::warn!(target: "statement_distribution", "Our network bridge view update \
|
||||
log::warn!(target: LOG_TARGET, "Our network bridge view update \
|
||||
inconsistent with `StartWork` messages we have received from overseer. \
|
||||
Contains unknown hash {}", new);
|
||||
}
|
||||
@@ -831,94 +847,132 @@ async fn handle_network_update(
|
||||
|
||||
}
|
||||
|
||||
async fn run(
|
||||
mut ctx: impl SubsystemContext<Message = StatementDistributionMessage>,
|
||||
) -> SubsystemResult<()> {
|
||||
let mut peers: HashMap<PeerId, PeerData> = HashMap::new();
|
||||
let mut our_view = View::default();
|
||||
let mut active_heads: HashMap<Hash, ActiveHeadData> = HashMap::new();
|
||||
let mut statement_listeners: Vec<mpsc::Sender<SignedFullStatement>> = Vec::new();
|
||||
impl StatementDistribution {
|
||||
async fn run(
|
||||
self,
|
||||
mut ctx: impl SubsystemContext<Message = StatementDistributionMessage>,
|
||||
) -> SubsystemResult<()> {
|
||||
let mut peers: HashMap<PeerId, PeerData> = HashMap::new();
|
||||
let mut our_view = View::default();
|
||||
let mut active_heads: HashMap<Hash, ActiveHeadData> = HashMap::new();
|
||||
let mut statement_listeners: Vec<mpsc::Sender<SignedFullStatement>> = Vec::new();
|
||||
let metrics = self.metrics;
|
||||
|
||||
loop {
|
||||
let message = ctx.recv().await?;
|
||||
match message {
|
||||
FromOverseer::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { activated, .. })) => {
|
||||
for relay_parent in activated {
|
||||
let (validators, session_index) = {
|
||||
let (val_tx, val_rx) = oneshot::channel();
|
||||
let (session_tx, session_rx) = oneshot::channel();
|
||||
loop {
|
||||
let message = ctx.recv().await?;
|
||||
match message {
|
||||
FromOverseer::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { activated, .. })) => {
|
||||
for relay_parent in activated {
|
||||
let (validators, session_index) = {
|
||||
let (val_tx, val_rx) = oneshot::channel();
|
||||
let (session_tx, session_rx) = oneshot::channel();
|
||||
|
||||
let val_message = AllMessages::RuntimeApi(
|
||||
RuntimeApiMessage::Request(
|
||||
relay_parent,
|
||||
RuntimeApiRequest::Validators(val_tx),
|
||||
),
|
||||
);
|
||||
let session_message = AllMessages::RuntimeApi(
|
||||
RuntimeApiMessage::Request(
|
||||
relay_parent,
|
||||
RuntimeApiRequest::SessionIndexForChild(session_tx),
|
||||
),
|
||||
);
|
||||
let val_message = AllMessages::RuntimeApi(
|
||||
RuntimeApiMessage::Request(
|
||||
relay_parent,
|
||||
RuntimeApiRequest::Validators(val_tx),
|
||||
),
|
||||
);
|
||||
let session_message = AllMessages::RuntimeApi(
|
||||
RuntimeApiMessage::Request(
|
||||
relay_parent,
|
||||
RuntimeApiRequest::SessionIndexForChild(session_tx),
|
||||
),
|
||||
);
|
||||
|
||||
ctx.send_messages(
|
||||
std::iter::once(val_message).chain(std::iter::once(session_message))
|
||||
).await?;
|
||||
ctx.send_messages(
|
||||
std::iter::once(val_message).chain(std::iter::once(session_message))
|
||||
).await?;
|
||||
|
||||
match (val_rx.await?, session_rx.await?) {
|
||||
(Ok(v), Ok(s)) => (v, s),
|
||||
(Err(e), _) | (_, Err(e)) => {
|
||||
log::warn!(
|
||||
target: "statement_distribution",
|
||||
"Failed to fetch runtime API data for active leaf: {:?}",
|
||||
e,
|
||||
);
|
||||
match (val_rx.await?, session_rx.await?) {
|
||||
(Ok(v), Ok(s)) => (v, s),
|
||||
(Err(e), _) | (_, Err(e)) => {
|
||||
log::warn!(
|
||||
target: LOG_TARGET,
|
||||
"Failed to fetch runtime API data for active leaf: {:?}",
|
||||
e,
|
||||
);
|
||||
|
||||
// Lacking this bookkeeping might make us behave funny, although
|
||||
// not in any slashable way. But we shouldn't take down the node
|
||||
// on what are likely spurious runtime API errors.
|
||||
continue;
|
||||
// Lacking this bookkeeping might make us behave funny, although
|
||||
// not in any slashable way. But we shouldn't take down the node
|
||||
// on what are likely spurious runtime API errors.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
active_heads.entry(relay_parent)
|
||||
.or_insert(ActiveHeadData::new(validators, session_index));
|
||||
active_heads.entry(relay_parent)
|
||||
.or_insert(ActiveHeadData::new(validators, session_index));
|
||||
}
|
||||
}
|
||||
}
|
||||
FromOverseer::Signal(OverseerSignal::BlockFinalized(_block_hash)) => {
|
||||
// do nothing
|
||||
}
|
||||
FromOverseer::Signal(OverseerSignal::Conclude) => break,
|
||||
FromOverseer::Communication { msg } => match msg {
|
||||
StatementDistributionMessage::Share(relay_parent, statement) => {
|
||||
inform_statement_listeners(
|
||||
&statement,
|
||||
&mut statement_listeners,
|
||||
).await;
|
||||
circulate_statement_and_dependents(
|
||||
&mut peers,
|
||||
&mut active_heads,
|
||||
&mut ctx,
|
||||
relay_parent,
|
||||
statement,
|
||||
).await?;
|
||||
FromOverseer::Signal(OverseerSignal::BlockFinalized(_block_hash)) => {
|
||||
// do nothing
|
||||
}
|
||||
StatementDistributionMessage::NetworkBridgeUpdateV1(event) =>
|
||||
handle_network_update(
|
||||
&mut peers,
|
||||
&mut active_heads,
|
||||
&mut ctx,
|
||||
&mut our_view,
|
||||
event,
|
||||
).await?,
|
||||
StatementDistributionMessage::RegisterStatementListener(tx) => {
|
||||
statement_listeners.push(tx);
|
||||
FromOverseer::Signal(OverseerSignal::Conclude) => break,
|
||||
FromOverseer::Communication { msg } => match msg {
|
||||
StatementDistributionMessage::Share(relay_parent, statement) => {
|
||||
inform_statement_listeners(
|
||||
&statement,
|
||||
&mut statement_listeners,
|
||||
).await;
|
||||
circulate_statement_and_dependents(
|
||||
&mut peers,
|
||||
&mut active_heads,
|
||||
&mut ctx,
|
||||
relay_parent,
|
||||
statement,
|
||||
&metrics,
|
||||
).await?;
|
||||
}
|
||||
StatementDistributionMessage::NetworkBridgeUpdateV1(event) =>
|
||||
handle_network_update(
|
||||
&mut peers,
|
||||
&mut active_heads,
|
||||
&mut ctx,
|
||||
&mut our_view,
|
||||
event,
|
||||
&metrics,
|
||||
).await?,
|
||||
StatementDistributionMessage::RegisterStatementListener(tx) => {
|
||||
statement_listeners.push(tx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
statements_distributed: prometheus::Counter<prometheus::U64>,
|
||||
}
|
||||
|
||||
/// Statement Distribution metrics.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
impl Metrics {
|
||||
fn on_statement_distributed(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.statements_distributed.inc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(registry: &prometheus::Registry) -> std::result::Result<Self, prometheus::PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
statements_distributed: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"parachain_statements_distributed_total",
|
||||
"Number of candidate validity statements distributed to other peers."
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1256,6 +1310,7 @@ mod tests {
|
||||
&mut ctx,
|
||||
&active_heads,
|
||||
new_view.clone(),
|
||||
&Default::default(),
|
||||
).await.unwrap();
|
||||
|
||||
assert_eq!(peer_data.view, new_view);
|
||||
|
||||
Reference in New Issue
Block a user