mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-04-26 22:47:56 +00:00
Add more debug logs to understand if statement-distribution misbehaving (#3419)
Add more debug logs to understand if statement-distribution is in a bad state, should be useful for debugging https://github.com/paritytech/polkadot-sdk/issues/3314 on production networks. Additionally, increase the number of parallel requests should make, since I notice that requests take around 100ms on kusama, and the 5 parallel request was picked mostly random, no reason why we can do more than that. --------- Signed-off-by: Alexandru Gheorghe <alexandru.gheorghe@parity.io> Co-authored-by: ordian <write@reusable.software>
This commit is contained in:
committed by
GitHub
parent
ce5de99540
commit
b9c792a4b8
@@ -55,8 +55,9 @@
|
||||
//! and to keep track of what we have sent to other validators in the group and what we may
|
||||
//! continue to send them.
|
||||
|
||||
use polkadot_primitives::{CandidateHash, CompactStatement, ValidatorIndex};
|
||||
use polkadot_primitives::{CandidateHash, CompactStatement, Hash, ValidatorIndex};
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
#[derive(Hash, PartialEq, Eq)]
|
||||
@@ -424,6 +425,28 @@ impl ClusterTracker {
|
||||
fn is_in_group(&self, validator: ValidatorIndex) -> bool {
|
||||
self.validators.contains(&validator)
|
||||
}
|
||||
|
||||
/// Dumps pending statement for this cluster.
|
||||
///
|
||||
/// Normally we should not have pending statements to validators in our cluster,
|
||||
/// but if we do for all validators in our cluster, then we don't participate
|
||||
/// in backing. Ocasional pending statements are expected if two authorities
|
||||
/// can't detect each otehr or after restart, where it takes a while to discover
|
||||
/// the whole network.
|
||||
|
||||
pub fn warn_if_too_many_pending_statements(&self, parent_hash: Hash) {
|
||||
if self.pending.iter().filter(|pending| !pending.1.is_empty()).count() >=
|
||||
self.validators.len()
|
||||
{
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
pending_statements = ?self.pending,
|
||||
?parent_hash,
|
||||
"Cluster has too many pending statements, something wrong with our connection to our group peers \n
|
||||
Restart might be needed if validator gets 0 backing rewards for more than 3-4 consecutive sessions"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Incoming statement was accepted.
|
||||
|
||||
@@ -251,6 +251,13 @@ impl PerSessionState {
|
||||
if local_index.is_some() {
|
||||
self.local_validator.get_or_insert(LocalValidatorIndex::Inactive);
|
||||
}
|
||||
|
||||
gum::info!(
|
||||
target: LOG_TARGET,
|
||||
index_in_gossip_topology = ?local_index,
|
||||
index_in_parachain_authorities = ?self.local_validator,
|
||||
"Node uses the following topology indices"
|
||||
);
|
||||
}
|
||||
|
||||
/// Returns `true` if local is neither active or inactive validator node.
|
||||
@@ -768,7 +775,15 @@ pub(crate) fn handle_deactivate_leaves(state: &mut State, leaves: &[Hash]) {
|
||||
let pruned = state.implicit_view.deactivate_leaf(*leaf);
|
||||
for pruned_rp in pruned {
|
||||
// clean up per-relay-parent data based on everything removed.
|
||||
state.per_relay_parent.remove(&pruned_rp);
|
||||
state
|
||||
.per_relay_parent
|
||||
.remove(&pruned_rp)
|
||||
.as_ref()
|
||||
.and_then(|pruned| pruned.active_validator_state())
|
||||
.map(|active_state| {
|
||||
active_state.cluster_tracker.warn_if_too_many_pending_statements(pruned_rp)
|
||||
});
|
||||
|
||||
// clean up requests related to this relay parent.
|
||||
state.request_manager.remove_by_relay_parent(*leaf);
|
||||
}
|
||||
|
||||
@@ -315,7 +315,16 @@ impl RequestManager {
|
||||
request_props: impl Fn(&CandidateIdentifier) -> Option<RequestProperties>,
|
||||
peer_advertised: impl Fn(&CandidateIdentifier, &PeerId) -> Option<StatementFilter>,
|
||||
) -> Option<OutgoingRequest<AttestedCandidateRequest>> {
|
||||
if response_manager.len() >= MAX_PARALLEL_ATTESTED_CANDIDATE_REQUESTS as usize {
|
||||
// The number of parallel requests a node can answer is limited by
|
||||
// `MAX_PARALLEL_ATTESTED_CANDIDATE_REQUESTS`, however there is no
|
||||
// need for the current node to limit itself to the same amount the
|
||||
// requests, because the requests are going to different nodes anyways.
|
||||
// While looking at https://github.com/paritytech/polkadot-sdk/issues/3314,
|
||||
// found out that this requests take around 100ms to fullfill, so it
|
||||
// would make sense to try to request things as early as we can, given
|
||||
// we would need to request it for each candidate, around 25 right now
|
||||
// on kusama.
|
||||
if response_manager.len() >= 2 * MAX_PARALLEL_ATTESTED_CANDIDATE_REQUESTS as usize {
|
||||
return None
|
||||
}
|
||||
|
||||
@@ -1027,6 +1036,7 @@ mod tests {
|
||||
let peer_advertised = |_identifier: &CandidateIdentifier, _peer: &_| {
|
||||
Some(StatementFilter::full(group_size))
|
||||
};
|
||||
|
||||
let outgoing = request_manager
|
||||
.next_request(&mut response_manager, request_props, peer_advertised)
|
||||
.unwrap();
|
||||
@@ -1148,6 +1158,7 @@ mod tests {
|
||||
{
|
||||
let request_props =
|
||||
|_identifier: &CandidateIdentifier| Some((&request_properties).clone());
|
||||
|
||||
let outgoing = request_manager
|
||||
.next_request(&mut response_manager, request_props, peer_advertised)
|
||||
.unwrap();
|
||||
@@ -1230,6 +1241,7 @@ mod tests {
|
||||
{
|
||||
let request_props =
|
||||
|_identifier: &CandidateIdentifier| Some((&request_properties).clone());
|
||||
|
||||
let outgoing = request_manager
|
||||
.next_request(&mut response_manager, request_props, peer_advertised)
|
||||
.unwrap();
|
||||
|
||||
Reference in New Issue
Block a user