Rework telemetry to replace the use of tracing with an object we pass around (#8143)

polkadot companion: paritytech/polkadot#2535
This commit is contained in:
Cecile Tonglet
2021-03-11 11:05:45 +01:00
committed by GitHub
parent 7aaba0c154
commit 8031b6eacb
55 changed files with 1028 additions and 838 deletions
@@ -90,7 +90,7 @@ use sc_network::{ObservedRole, PeerId, ReputationChange};
use parity_scale_codec::{Encode, Decode};
use sp_finality_grandpa::AuthorityId;
use sc_telemetry::{telemetry, CONSENSUS_DEBUG};
use sc_telemetry::{telemetry, TelemetryHandle, CONSENSUS_DEBUG};
use log::{trace, debug};
use sp_utils::mpsc::{tracing_unbounded, TracingUnboundedReceiver, TracingUnboundedSender};
use prometheus_endpoint::{CounterVec, Opts, PrometheusError, register, Registry, U64};
@@ -744,7 +744,7 @@ impl<Block: BlockT> Inner<Block> {
fn note_set(&mut self, set_id: SetId, authorities: Vec<AuthorityId>) -> MaybeMessage<Block> {
{
let local_view = match self.local_view {
ref mut x @ None => x.get_or_insert(LocalView::new(
ref mut x @ None => x.get_or_insert(LocalView::new(
set_id,
Round(1),
)),
@@ -828,7 +828,12 @@ impl<Block: BlockT> Inner<Block> {
// ensure authority is part of the set.
if !self.authorities.contains(&full.message.id) {
debug!(target: "afg", "Message from unknown voter: {}", full.message.id);
telemetry!(CONSENSUS_DEBUG; "afg.bad_msg_signature"; "signature" => ?full.message.id);
telemetry!(
self.config.telemetry;
CONSENSUS_DEBUG;
"afg.bad_msg_signature";
"signature" => ?full.message.id,
);
return Action::Discard(cost::UNKNOWN_VOTER);
}
@@ -840,7 +845,12 @@ impl<Block: BlockT> Inner<Block> {
full.set_id.0,
) {
debug!(target: "afg", "Bad message signature {}", full.message.id);
telemetry!(CONSENSUS_DEBUG; "afg.bad_msg_signature"; "signature" => ?full.message.id);
telemetry!(
self.config.telemetry;
CONSENSUS_DEBUG;
"afg.bad_msg_signature";
"signature" => ?full.message.id,
);
return Action::Discard(cost::BAD_SIGNATURE);
}
@@ -866,7 +876,10 @@ impl<Block: BlockT> Inner<Block> {
if full.message.precommits.len() != full.message.auth_data.len() || full.message.precommits.is_empty() {
debug!(target: "afg", "Malformed compact commit");
telemetry!(CONSENSUS_DEBUG; "afg.malformed_compact_commit";
telemetry!(
self.config.telemetry;
CONSENSUS_DEBUG;
"afg.malformed_compact_commit";
"precommits_len" => ?full.message.precommits.len(),
"auth_data_len" => ?full.message.auth_data.len(),
"precommits_is_empty" => ?full.message.precommits.is_empty(),
@@ -1277,6 +1290,7 @@ pub(super) struct GossipValidator<Block: BlockT> {
set_state: environment::SharedVoterSetState<Block>,
report_sender: TracingUnboundedSender<PeerReport>,
metrics: Option<Metrics>,
telemetry: Option<TelemetryHandle>,
}
impl<Block: BlockT> GossipValidator<Block> {
@@ -1287,6 +1301,7 @@ impl<Block: BlockT> GossipValidator<Block> {
config: crate::Config,
set_state: environment::SharedVoterSetState<Block>,
prometheus_registry: Option<&Registry>,
telemetry: Option<TelemetryHandle>,
) -> (GossipValidator<Block>, TracingUnboundedReceiver<PeerReport>) {
let metrics = match prometheus_registry.map(Metrics::register) {
Some(Ok(metrics)) => Some(metrics),
@@ -1303,6 +1318,7 @@ impl<Block: BlockT> GossipValidator<Block> {
set_state,
report_sender: tx,
metrics,
telemetry,
};
(val, rx)
@@ -1411,7 +1427,12 @@ impl<Block: BlockT> GossipValidator<Block> {
Err(e) => {
message_name = None;
debug!(target: "afg", "Error decoding message: {}", e);
telemetry!(CONSENSUS_DEBUG; "afg.err_decoding_msg"; "" => "");
telemetry!(
self.telemetry;
CONSENSUS_DEBUG;
"afg.err_decoding_msg";
"" => "",
);
let len = std::cmp::min(i32::max_value() as usize, data.len()) as i32;
Action::Discard(Misbehavior::UndecodablePacket(len).cost())
@@ -1630,6 +1651,7 @@ mod tests {
name: None,
is_authority: true,
observer_enabled: true,
telemetry: None,
}
}
@@ -1797,6 +1819,7 @@ mod tests {
config(),
voter_set_state(),
None,
None,
);
let set_id = 1;
@@ -1833,6 +1856,7 @@ mod tests {
config(),
voter_set_state(),
None,
None,
);
let set_id = 1;
let auth = AuthorityId::from_slice(&[1u8; 32]);
@@ -1878,6 +1902,7 @@ mod tests {
config(),
voter_set_state(),
None,
None,
);
let set_id = 1;
@@ -1947,6 +1972,7 @@ mod tests {
config(),
set_state.clone(),
None,
None,
);
let set_id = 1;
@@ -2002,6 +2028,7 @@ mod tests {
config(),
set_state.clone(),
None,
None,
);
// the validator starts at set id 2
@@ -2082,6 +2109,7 @@ mod tests {
config(),
voter_set_state(),
None,
None,
);
// the validator starts at set id 1.
@@ -2156,6 +2184,7 @@ mod tests {
config,
voter_set_state(),
None,
None,
);
// the validator starts at set id 1.
@@ -2190,6 +2219,7 @@ mod tests {
config(),
voter_set_state(),
None,
None,
);
// the validator starts at set id 1.
@@ -2250,6 +2280,7 @@ mod tests {
config,
voter_set_state(),
None,
None,
);
// the validator starts at set id 1.
@@ -2289,6 +2320,7 @@ mod tests {
config(),
voter_set_state(),
None,
None,
);
// the validator starts at set id 1.
@@ -2322,6 +2354,7 @@ mod tests {
config,
voter_set_state(),
None,
None,
);
// the validator start at set id 0
@@ -2401,6 +2434,7 @@ mod tests {
config(),
voter_set_state(),
None,
None,
);
// the validator start at set id 0
@@ -2441,6 +2475,7 @@ mod tests {
config,
voter_set_state(),
None,
None,
);
// the validator start at set id 0
@@ -2490,7 +2525,7 @@ mod tests {
#[test]
fn only_gossip_commits_to_peers_on_same_set() {
let (val, _) = GossipValidator::<Block>::new(config(), voter_set_state(), None);
let (val, _) = GossipValidator::<Block>::new(config(), voter_set_state(), None, None);
// the validator start at set id 1
val.note_set(SetId(1), Vec::new(), |_, _| {});
@@ -2568,7 +2603,7 @@ mod tests {
#[test]
fn expire_commits_from_older_rounds() {
let (val, _) = GossipValidator::<Block>::new(config(), voter_set_state(), None);
let (val, _) = GossipValidator::<Block>::new(config(), voter_set_state(), None, None);
let commit = |round, set_id, target_number| {
let commit = finality_grandpa::CompactCommit {
@@ -2619,7 +2654,7 @@ mod tests {
#[test]
fn allow_noting_different_authorities_for_same_set() {
let (val, _) = GossipValidator::<Block>::new(config(), voter_set_state(), None);
let (val, _) = GossipValidator::<Block>::new(config(), voter_set_state(), None, None);
let a1 = vec![AuthorityId::from_slice(&[0; 32])];
val.note_set(SetId(1), a1.clone(), |_, _| {});
@@ -42,7 +42,7 @@ use sc_network::{NetworkService, ReputationChange};
use sc_network_gossip::{GossipEngine, Network as GossipNetwork};
use parity_scale_codec::{Encode, Decode};
use sp_runtime::traits::{Block as BlockT, Hash as HashT, Header as HeaderT, NumberFor};
use sc_telemetry::{telemetry, CONSENSUS_DEBUG, CONSENSUS_INFO};
use sc_telemetry::{telemetry, TelemetryHandle, CONSENSUS_DEBUG, CONSENSUS_INFO};
use crate::{
CatchUp, Commit, CommunicationIn, CommunicationOutH,
@@ -192,6 +192,8 @@ pub(crate) struct NetworkBridge<B: BlockT, N: Network<B>> {
// just an `UnboundedReceiver`, one could also switch to a multi-producer-*multi*-consumer
// channel implementation.
gossip_validator_report_stream: Arc<Mutex<TracingUnboundedReceiver<PeerReport>>>,
telemetry: Option<TelemetryHandle>,
}
impl<B: BlockT, N: Network<B>> Unpin for NetworkBridge<B, N> {}
@@ -206,11 +208,13 @@ impl<B: BlockT, N: Network<B>> NetworkBridge<B, N> {
config: crate::Config,
set_state: crate::environment::SharedVoterSetState<B>,
prometheus_registry: Option<&Registry>,
telemetry: Option<TelemetryHandle>,
) -> Self {
let (validator, report_stream) = GossipValidator::new(
config,
set_state.clone(),
prometheus_registry,
telemetry.clone(),
);
let validator = Arc::new(validator);
@@ -268,6 +272,7 @@ impl<B: BlockT, N: Network<B>> NetworkBridge<B, N> {
neighbor_sender: neighbor_packet_sender,
neighbor_packet_worker: Arc::new(Mutex::new(neighbor_packet_worker)),
gossip_validator_report_stream: Arc::new(Mutex::new(report_stream)),
telemetry,
}
}
@@ -320,6 +325,7 @@ impl<B: BlockT, N: Network<B>> NetworkBridge<B, N> {
});
let topic = round_topic::<B>(round.0, set_id.0);
let telemetry = self.telemetry.clone();
let incoming = self.gossip_engine.lock().messages_for(topic)
.filter_map(move |notification| {
let decoded = GossipMessage::<B>::decode(&mut &notification.message[..]);
@@ -339,21 +345,30 @@ impl<B: BlockT, N: Network<B>> NetworkBridge<B, N> {
if voters.len().get() <= TELEMETRY_VOTERS_LIMIT {
match &msg.message.message {
PrimaryPropose(propose) => {
telemetry!(CONSENSUS_INFO; "afg.received_propose";
telemetry!(
telemetry;
CONSENSUS_INFO;
"afg.received_propose";
"voter" => ?format!("{}", msg.message.id),
"target_number" => ?propose.target_number,
"target_hash" => ?propose.target_hash,
);
},
Prevote(prevote) => {
telemetry!(CONSENSUS_INFO; "afg.received_prevote";
telemetry!(
telemetry;
CONSENSUS_INFO;
"afg.received_prevote";
"voter" => ?format!("{}", msg.message.id),
"target_number" => ?prevote.target_number,
"target_hash" => ?prevote.target_hash,
);
},
Precommit(precommit) => {
telemetry!(CONSENSUS_INFO; "afg.received_precommit";
telemetry!(
telemetry;
CONSENSUS_INFO;
"afg.received_precommit";
"voter" => ?format!("{}", msg.message.id),
"target_number" => ?precommit.target_number,
"target_hash" => ?precommit.target_hash,
@@ -379,6 +394,7 @@ impl<B: BlockT, N: Network<B>> NetworkBridge<B, N> {
network: self.gossip_engine.clone(),
sender: tx,
has_voted,
telemetry: self.telemetry.clone(),
};
// Combine incoming votes from external GRANDPA nodes with outgoing
@@ -412,6 +428,7 @@ impl<B: BlockT, N: Network<B>> NetworkBridge<B, N> {
voters,
self.validator.clone(),
self.neighbor_sender.clone(),
self.telemetry.clone(),
);
let outgoing = CommitsOut::<B>::new(
@@ -420,6 +437,7 @@ impl<B: BlockT, N: Network<B>> NetworkBridge<B, N> {
is_voter,
self.validator.clone(),
self.neighbor_sender.clone(),
self.telemetry.clone(),
);
let outgoing = outgoing.with(|out| {
@@ -491,72 +509,80 @@ fn incoming_global<B: BlockT>(
voters: Arc<VoterSet<AuthorityId>>,
gossip_validator: Arc<GossipValidator<B>>,
neighbor_sender: periodic::NeighborPacketSender<B>,
telemetry: Option<TelemetryHandle>,
) -> impl Stream<Item = CommunicationIn<B>> {
let process_commit = move |
msg: FullCommitMessage<B>,
mut notification: sc_network_gossip::TopicNotification,
gossip_engine: &Arc<Mutex<GossipEngine<B>>>,
gossip_validator: &Arc<GossipValidator<B>>,
voters: &VoterSet<AuthorityId>,
| {
if voters.len().get() <= TELEMETRY_VOTERS_LIMIT {
let precommits_signed_by: Vec<String> =
msg.message.auth_data.iter().map(move |(_, a)| {
format!("{}", a)
}).collect();
let process_commit = {
let telemetry = telemetry.clone();
move |
msg: FullCommitMessage<B>,
mut notification: sc_network_gossip::TopicNotification,
gossip_engine: &Arc<Mutex<GossipEngine<B>>>,
gossip_validator: &Arc<GossipValidator<B>>,
voters: &VoterSet<AuthorityId>,
| {
if voters.len().get() <= TELEMETRY_VOTERS_LIMIT {
let precommits_signed_by: Vec<String> =
msg.message.auth_data.iter().map(move |(_, a)| {
format!("{}", a)
}).collect();
telemetry!(CONSENSUS_INFO; "afg.received_commit";
"contains_precommits_signed_by" => ?precommits_signed_by,
"target_number" => ?msg.message.target_number.clone(),
"target_hash" => ?msg.message.target_hash.clone(),
);
}
if let Err(cost) = check_compact_commit::<B>(
&msg.message,
voters,
msg.round,
msg.set_id,
) {
if let Some(who) = notification.sender {
gossip_engine.lock().report(who, cost);
}
return None;
}
let round = msg.round;
let set_id = msg.set_id;
let commit = msg.message;
let finalized_number = commit.target_number;
let gossip_validator = gossip_validator.clone();
let gossip_engine = gossip_engine.clone();
let neighbor_sender = neighbor_sender.clone();
let cb = move |outcome| match outcome {
voter::CommitProcessingOutcome::Good(_) => {
// if it checks out, gossip it. not accounting for
// any discrepancy between the actual ghost and the claimed
// finalized number.
gossip_validator.note_commit_finalized(
round,
set_id,
finalized_number,
|to, neighbor| neighbor_sender.send(to, neighbor),
telemetry!(
telemetry;
CONSENSUS_INFO;
"afg.received_commit";
"contains_precommits_signed_by" => ?precommits_signed_by,
"target_number" => ?msg.message.target_number.clone(),
"target_hash" => ?msg.message.target_hash.clone(),
);
gossip_engine.lock().gossip_message(topic, notification.message.clone(), false);
}
voter::CommitProcessingOutcome::Bad(_) => {
// report peer and do not gossip.
if let Some(who) = notification.sender.take() {
gossip_engine.lock().report(who, cost::INVALID_COMMIT);
if let Err(cost) = check_compact_commit::<B>(
&msg.message,
voters,
msg.round,
msg.set_id,
telemetry.as_ref(),
) {
if let Some(who) = notification.sender {
gossip_engine.lock().report(who, cost);
}
return None;
}
};
let cb = voter::Callback::Work(Box::new(cb));
let round = msg.round;
let set_id = msg.set_id;
let commit = msg.message;
let finalized_number = commit.target_number;
let gossip_validator = gossip_validator.clone();
let gossip_engine = gossip_engine.clone();
let neighbor_sender = neighbor_sender.clone();
let cb = move |outcome| match outcome {
voter::CommitProcessingOutcome::Good(_) => {
// if it checks out, gossip it. not accounting for
// any discrepancy between the actual ghost and the claimed
// finalized number.
gossip_validator.note_commit_finalized(
round,
set_id,
finalized_number,
|to, neighbor| neighbor_sender.send(to, neighbor),
);
Some(voter::CommunicationIn::Commit(round.0, commit, cb))
gossip_engine.lock().gossip_message(topic, notification.message.clone(), false);
}
voter::CommitProcessingOutcome::Bad(_) => {
// report peer and do not gossip.
if let Some(who) = notification.sender.take() {
gossip_engine.lock().report(who, cost::INVALID_COMMIT);
}
}
};
let cb = voter::Callback::Work(Box::new(cb));
Some(voter::CommunicationIn::Commit(round.0, commit, cb))
}
};
let process_catch_up = move |
@@ -573,6 +599,7 @@ fn incoming_global<B: BlockT>(
&msg.message,
voters,
msg.set_id,
telemetry.clone(),
) {
if let Some(who) = notification.sender {
gossip_engine.lock().report(who, cost);
@@ -629,6 +656,7 @@ impl<B: BlockT, N: Network<B>> Clone for NetworkBridge<B, N> {
neighbor_sender: self.neighbor_sender.clone(),
neighbor_packet_worker: self.neighbor_packet_worker.clone(),
gossip_validator_report_stream: self.gossip_validator_report_stream.clone(),
telemetry: self.telemetry.clone(),
}
}
}
@@ -655,6 +683,7 @@ pub(crate) struct OutgoingMessages<Block: BlockT> {
sender: mpsc::Sender<SignedMessage<Block>>,
network: Arc<Mutex<GossipEngine<Block>>>,
has_voted: HasVoted<Block>,
telemetry: Option<TelemetryHandle>,
}
impl<B: BlockT> Unpin for OutgoingMessages<B> {}
@@ -717,7 +746,9 @@ impl<Block: BlockT> Sink<Message<Block>> for OutgoingMessages<Block>
);
telemetry!(
CONSENSUS_DEBUG; "afg.announcing_blocks_to_voted_peers";
self.telemetry;
CONSENSUS_DEBUG;
"afg.announcing_blocks_to_voted_peers";
"block" => ?target_hash, "round" => ?self.round, "set_id" => ?self.set_id,
);
@@ -756,6 +787,7 @@ fn check_compact_commit<Block: BlockT>(
voters: &VoterSet<AuthorityId>,
round: Round,
set_id: SetId,
telemetry: Option<&TelemetryHandle>,
) -> Result<(), ReputationChange> {
// 4f + 1 = equivocations from f voters.
let f = voters.total_weight() - voters.threshold();
@@ -797,7 +829,12 @@ fn check_compact_commit<Block: BlockT>(
&mut buf,
) {
debug!(target: "afg", "Bad commit message signature {}", id);
telemetry!(CONSENSUS_DEBUG; "afg.bad_commit_msg_signature"; "id" => ?id);
telemetry!(
telemetry;
CONSENSUS_DEBUG;
"afg.bad_commit_msg_signature";
"id" => ?id,
);
let cost = Misbehavior::BadCommitMessage {
signatures_checked: i as i32,
blocks_loaded: 0,
@@ -817,6 +854,7 @@ fn check_catch_up<Block: BlockT>(
msg: &CatchUp<Block>,
voters: &VoterSet<AuthorityId>,
set_id: SetId,
telemetry: Option<TelemetryHandle>,
) -> Result<(), ReputationChange> {
// 4f + 1 = equivocations from f voters.
let f = voters.total_weight() - voters.threshold();
@@ -867,6 +905,7 @@ fn check_catch_up<Block: BlockT>(
set_id: SetIdNumber,
mut signatures_checked: usize,
buf: &mut Vec<u8>,
telemetry: Option<TelemetryHandle>,
) -> Result<usize, ReputationChange> where
B: BlockT,
I: Iterator<Item=(Message<B>, &'a AuthorityId, &'a AuthoritySignature)>,
@@ -885,7 +924,12 @@ fn check_catch_up<Block: BlockT>(
buf,
) {
debug!(target: "afg", "Bad catch up message signature {}", id);
telemetry!(CONSENSUS_DEBUG; "afg.bad_catch_up_msg_signature"; "id" => ?id);
telemetry!(
telemetry;
CONSENSUS_DEBUG;
"afg.bad_catch_up_msg_signature";
"id" => ?id,
);
let cost = Misbehavior::BadCatchUpMessage {
signatures_checked: signatures_checked as i32,
@@ -909,6 +953,7 @@ fn check_catch_up<Block: BlockT>(
set_id.0,
0,
&mut buf,
telemetry.clone(),
)?;
// check signatures on all contained precommits.
@@ -920,6 +965,7 @@ fn check_catch_up<Block: BlockT>(
set_id.0,
signatures_checked,
&mut buf,
telemetry,
)?;
Ok(())
@@ -932,6 +978,7 @@ struct CommitsOut<Block: BlockT> {
is_voter: bool,
gossip_validator: Arc<GossipValidator<Block>>,
neighbor_sender: periodic::NeighborPacketSender<Block>,
telemetry: Option<TelemetryHandle>,
}
impl<Block: BlockT> CommitsOut<Block> {
@@ -942,6 +989,7 @@ impl<Block: BlockT> CommitsOut<Block> {
is_voter: bool,
gossip_validator: Arc<GossipValidator<Block>>,
neighbor_sender: periodic::NeighborPacketSender<Block>,
telemetry: Option<TelemetryHandle>,
) -> Self {
CommitsOut {
network,
@@ -949,6 +997,7 @@ impl<Block: BlockT> CommitsOut<Block> {
is_voter,
gossip_validator,
neighbor_sender,
telemetry,
}
}
}
@@ -968,8 +1017,12 @@ impl<Block: BlockT> Sink<(RoundNumber, Commit<Block>)> for CommitsOut<Block> {
let (round, commit) = input;
let round = Round(round);
telemetry!(CONSENSUS_DEBUG; "afg.commit_issued";
"target_number" => ?commit.target_number, "target_hash" => ?commit.target_hash,
telemetry!(
self.telemetry;
CONSENSUS_DEBUG;
"afg.commit_issued";
"target_number" => ?commit.target_number,
"target_hash" => ?commit.target_hash,
);
let (precommits, auth_data) = commit.precommits.into_iter()
.map(|signed| (signed.precommit, (signed.signature, signed.id)))
@@ -139,6 +139,7 @@ fn config() -> crate::Config {
name: None,
is_authority: true,
observer_enabled: true,
telemetry: None,
}
}
@@ -189,6 +190,7 @@ pub(crate) fn make_test_network() -> (
config(),
voter_set_state(),
None,
None,
);
(