Log info about low connectivity and unreachable validators (#3916)

* Attempt to add log stats to gossip-support.

* WIP: Keep track of connected validators.

* Clarify metric.

* WIP: Make gossip support report connectivity.

* WIP: Fixing tests.

* Fix network bridge + integrate in overseer.

* Consistent naming.

* Fix logic error

* cargo fmt

* Pretty logs.

* cargo fmt

* Use `Delay` to trigger periodic checks.

* fmt

* Fix warning for authority set size of 1.

* More correct ratio report if there are no resolved validators.

* Prettier rendering of empty set.

* Fix typo.

* Another typo.

* Don't check on every leaf update.

* Make compatible with older rustc.

* Fix tests.

* Demote warning.
This commit is contained in:
Robert Klotzner
2021-09-27 16:30:02 +02:00
committed by GitHub
parent fdebbbf4b3
commit 7c3b3c4a59
11 changed files with 635 additions and 315 deletions
+216 -114
View File
@@ -16,7 +16,17 @@
//! Unit tests for Gossip Support Subsystem.
use super::*;
use std::{sync::Arc, time::Duration};
use assert_matches::assert_matches;
use async_trait::async_trait;
use futures::{executor, future, Future};
use lazy_static::lazy_static;
use sc_network::multiaddr::Protocol;
use sp_consensus_babe::{AllowedSlots, BabeEpochConfiguration, Epoch as BabeEpoch};
use sp_keyring::Sr25519Keyring;
use polkadot_node_subsystem::{
jaeger,
messages::{AllMessages, RuntimeApiMessage, RuntimeApiRequest},
@@ -24,47 +34,124 @@ use polkadot_node_subsystem::{
};
use polkadot_node_subsystem_test_helpers as test_helpers;
use polkadot_node_subsystem_util::TimeoutExt as _;
use sp_consensus_babe::{AllowedSlots, BabeEpochConfiguration, Epoch as BabeEpoch};
use sp_keyring::Sr25519Keyring;
use test_helpers::mock::make_ferdie_keystore;
use assert_matches::assert_matches;
use futures::{executor, future, Future};
use std::{sync::Arc, time::Duration};
use super::*;
lazy_static! {
static ref MOCK_AUTHORITY_DISCOVERY: MockAuthorityDiscovery = MockAuthorityDiscovery::new();
static ref AUTHORITIES: Vec<AuthorityDiscoveryId> = {
let mut authorities = OTHER_AUTHORITIES.clone();
authorities.push(Sr25519Keyring::Ferdie.public().into());
authorities
};
static ref OTHER_AUTHORITIES: Vec<AuthorityDiscoveryId> = vec![
Sr25519Keyring::Alice.public().into(),
Sr25519Keyring::Bob.public().into(),
Sr25519Keyring::Charlie.public().into(),
Sr25519Keyring::Eve.public().into(),
Sr25519Keyring::One.public().into(),
Sr25519Keyring::Two.public().into(),
];
static ref NEIGHBORS: Vec<AuthorityDiscoveryId> = vec![
Sr25519Keyring::Two.public().into(),
Sr25519Keyring::Charlie.public().into(),
Sr25519Keyring::Eve.public().into(),
];
}
type VirtualOverseer = test_helpers::TestSubsystemContextHandle<GossipSupportMessage>;
fn test_harness<T: Future<Output = VirtualOverseer>>(
mut state: State,
#[derive(Debug, Clone)]
struct MockAuthorityDiscovery {
addrs: HashMap<AuthorityDiscoveryId, Vec<Multiaddr>>,
authorities: HashMap<PeerId, AuthorityDiscoveryId>,
}
impl MockAuthorityDiscovery {
fn new() -> Self {
let authorities: HashMap<_, _> =
AUTHORITIES.clone().into_iter().map(|a| (PeerId::random(), a)).collect();
let addrs = authorities
.clone()
.into_iter()
.map(|(p, a)| {
let multiaddr = Multiaddr::empty().with(Protocol::P2p(p.into()));
(a, vec![multiaddr])
})
.collect();
Self { addrs, authorities }
}
}
#[async_trait]
impl AuthorityDiscovery for MockAuthorityDiscovery {
async fn get_addresses_by_authority_id(
&mut self,
authority: polkadot_primitives::v1::AuthorityDiscoveryId,
) -> Option<Vec<sc_network::Multiaddr>> {
self.addrs.get(&authority).cloned()
}
async fn get_authority_id_by_peer_id(
&mut self,
peer_id: polkadot_node_network_protocol::PeerId,
) -> Option<polkadot_primitives::v1::AuthorityDiscoveryId> {
self.authorities.get(&peer_id).cloned()
}
}
async fn get_other_authorities_addrs() -> Vec<Vec<Multiaddr>> {
let mut addrs = Vec::with_capacity(OTHER_AUTHORITIES.len());
let mut discovery = MOCK_AUTHORITY_DISCOVERY.clone();
for authority in OTHER_AUTHORITIES.iter().cloned() {
if let Some(addr) = discovery.get_addresses_by_authority_id(authority).await {
addrs.push(addr);
}
}
addrs
}
async fn get_other_authorities_addrs_map() -> HashMap<AuthorityDiscoveryId, Vec<Multiaddr>> {
let mut addrs = HashMap::with_capacity(OTHER_AUTHORITIES.len());
let mut discovery = MOCK_AUTHORITY_DISCOVERY.clone();
for authority in OTHER_AUTHORITIES.iter().cloned() {
if let Some(addr) = discovery.get_addresses_by_authority_id(authority.clone()).await {
addrs.insert(authority, addr);
}
}
addrs
}
fn make_subsystem() -> GossipSupport<MockAuthorityDiscovery> {
GossipSupport::new(make_ferdie_keystore(), MOCK_AUTHORITY_DISCOVERY.clone())
}
fn test_harness<T: Future<Output = VirtualOverseer>, AD: AuthorityDiscovery>(
subsystem: GossipSupport<AD>,
test_fn: impl FnOnce(VirtualOverseer) -> T,
) -> State {
) -> GossipSupport<AD> {
let pool = sp_core::testing::TaskExecutor::new();
let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone());
let keystore = make_ferdie_keystore();
let subsystem = GossipSupport::new(keystore);
{
let subsystem = subsystem.run_inner(context, &mut state);
let subsystem = subsystem.run(context);
let test_fut = test_fn(virtual_overseer);
let test_fut = test_fn(virtual_overseer);
futures::pin_mut!(test_fut);
futures::pin_mut!(subsystem);
futures::pin_mut!(test_fut);
futures::pin_mut!(subsystem);
executor::block_on(future::join(
async move {
let mut overseer = test_fut.await;
overseer
.send(FromOverseer::Signal(OverseerSignal::Conclude))
.timeout(TIMEOUT)
.await
.expect("Conclude send timeout");
},
subsystem,
));
}
state
let (_, subsystem) = executor::block_on(future::join(
async move {
let mut overseer = test_fut.await;
overseer
.send(FromOverseer::Signal(OverseerSignal::Conclude))
.timeout(TIMEOUT)
.await
.expect("Conclude send timeout");
},
subsystem,
));
subsystem
}
const TIMEOUT: Duration = Duration::from_millis(100);
@@ -91,32 +178,6 @@ async fn overseer_recv(overseer: &mut VirtualOverseer) -> AllMessages {
msg
}
fn authorities() -> Vec<AuthorityDiscoveryId> {
let mut authorities = other_authorities();
authorities.push(Sr25519Keyring::Ferdie.public().into());
authorities
}
// Authorities other than ourselves:
fn other_authorities() -> Vec<AuthorityDiscoveryId> {
vec![
Sr25519Keyring::Alice.public().into(),
Sr25519Keyring::Bob.public().into(),
Sr25519Keyring::Charlie.public().into(),
Sr25519Keyring::Eve.public().into(),
Sr25519Keyring::One.public().into(),
Sr25519Keyring::Two.public().into(),
]
}
fn neighbors() -> Vec<AuthorityDiscoveryId> {
vec![
Sr25519Keyring::Two.public().into(),
Sr25519Keyring::Charlie.public().into(),
Sr25519Keyring::Eve.public().into(),
]
}
async fn test_neighbors(overseer: &mut VirtualOverseer) {
assert_matches!(
overseer_recv(overseer).await,
@@ -145,7 +206,7 @@ async fn test_neighbors(overseer: &mut VirtualOverseer) {
}) => {
let mut got: Vec<_> = our_neighbors.into_iter().collect();
got.sort();
assert_eq!(got, neighbors());
assert_eq!(got, NEIGHBORS.clone());
}
);
}
@@ -153,7 +214,7 @@ async fn test_neighbors(overseer: &mut VirtualOverseer) {
#[test]
fn issues_a_connection_request_on_new_session() {
let hash = Hash::repeat_byte(0xAA);
let state = test_harness(State::default(), |mut virtual_overseer| async move {
let state = test_harness(make_subsystem(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
overseer_signal_active_leaves(overseer, hash).await;
assert_matches!(
@@ -173,20 +234,18 @@ fn issues_a_connection_request_on_new_session() {
RuntimeApiRequest::Authorities(tx),
)) => {
assert_eq!(relay_parent, hash);
tx.send(Ok(authorities())).unwrap();
tx.send(Ok(AUTHORITIES.clone())).unwrap();
}
);
assert_matches!(
overseer_recv(overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ConnectToValidators {
validator_ids,
AllMessages::NetworkBridge(NetworkBridgeMessage::ConnectToResolvedValidators {
validator_addrs,
peer_set,
failed,
}) => {
assert_eq!(validator_ids, other_authorities());
assert_eq!(validator_addrs, get_other_authorities_addrs().await);
assert_eq!(peer_set, PeerSet::Validation);
failed.send(0).unwrap();
}
);
@@ -241,20 +300,18 @@ fn issues_a_connection_request_on_new_session() {
RuntimeApiRequest::Authorities(tx),
)) => {
assert_eq!(relay_parent, hash);
tx.send(Ok(authorities())).unwrap();
tx.send(Ok(AUTHORITIES.clone())).unwrap();
}
);
assert_matches!(
overseer_recv(overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ConnectToValidators {
validator_ids,
AllMessages::NetworkBridge(NetworkBridgeMessage::ConnectToResolvedValidators {
validator_addrs,
peer_set,
failed,
}) => {
assert_eq!(validator_ids, other_authorities());
assert_eq!(validator_addrs, get_other_authorities_addrs().await);
assert_eq!(peer_set, PeerSet::Validation);
failed.send(0).unwrap();
}
);
@@ -266,54 +323,96 @@ fn issues_a_connection_request_on_new_session() {
assert!(state.last_failure.is_none());
}
#[test]
fn test_log_output() {
sp_tracing::try_init_simple();
let alice: AuthorityDiscoveryId = Sr25519Keyring::Alice.public().into();
let bob = Sr25519Keyring::Bob.public().into();
let unconnected_authorities = {
let mut m = HashMap::new();
let peer_id = PeerId::random();
let addr = Multiaddr::empty().with(Protocol::P2p(peer_id.into()));
let addrs = vec![addr.clone(), addr];
m.insert(alice, addrs);
let peer_id = PeerId::random();
let addr = Multiaddr::empty().with(Protocol::P2p(peer_id.into()));
let addrs = vec![addr.clone(), addr];
m.insert(bob, addrs);
m
};
tracing::debug!(
target: LOG_TARGET,
unconnected_authorities = %PrettyAuthorities(unconnected_authorities.iter()),
"Connectivity Report"
);
}
#[test]
fn issues_a_connection_request_when_last_request_was_mostly_unresolved() {
let hash = Hash::repeat_byte(0xAA);
let mut state = test_harness(State::default(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
overseer_signal_active_leaves(overseer, hash).await;
assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::SessionIndexForChild(tx),
)) => {
assert_eq!(relay_parent, hash);
tx.send(Ok(1)).unwrap();
}
);
assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::Authorities(tx),
)) => {
assert_eq!(relay_parent, hash);
tx.send(Ok(authorities())).unwrap();
}
);
let mut state = make_subsystem();
// There will be two lookup failures:
let alice = Sr25519Keyring::Alice.public().into();
let bob = Sr25519Keyring::Bob.public().into();
let alice_addr = state.authority_discovery.addrs.remove(&alice);
state.authority_discovery.addrs.remove(&bob);
assert_matches!(
overseer_recv(overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ConnectToValidators {
validator_ids,
peer_set,
failed,
}) => {
assert_eq!(validator_ids, other_authorities());
assert_eq!(peer_set, PeerSet::Validation);
failed.send(2).unwrap();
}
);
let mut state = {
let alice = alice.clone();
let bob = bob.clone();
test_neighbors(overseer).await;
test_harness(state, |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
overseer_signal_active_leaves(overseer, hash).await;
assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::SessionIndexForChild(tx),
)) => {
assert_eq!(relay_parent, hash);
tx.send(Ok(1)).unwrap();
}
);
assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::Authorities(tx),
)) => {
assert_eq!(relay_parent, hash);
tx.send(Ok(AUTHORITIES.clone())).unwrap();
}
);
virtual_overseer
});
assert_matches!(
overseer_recv(overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ConnectToResolvedValidators {
mut validator_addrs,
peer_set,
}) => {
let mut expected = get_other_authorities_addrs_map().await;
expected.remove(&alice);
expected.remove(&bob);
let mut expected: Vec<Vec<Multiaddr>> = expected.into_iter().map(|(_,v)| v).collect();
validator_addrs.sort();
expected.sort();
assert_eq!(validator_addrs, expected);
assert_eq!(peer_set, PeerSet::Validation);
}
);
test_neighbors(overseer).await;
virtual_overseer
})
};
assert_eq!(state.last_session_index, Some(1));
assert!(state.last_failure.is_some());
state.last_failure = state.last_failure.and_then(|i| i.checked_sub(BACKOFF_DURATION));
// One error less:
state.authority_discovery.addrs.insert(alice, alice_addr.unwrap());
let hash = Hash::repeat_byte(0xBB);
let state = test_harness(state, |mut virtual_overseer| async move {
@@ -336,20 +435,23 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() {
RuntimeApiRequest::Authorities(tx),
)) => {
assert_eq!(relay_parent, hash);
tx.send(Ok(authorities())).unwrap();
tx.send(Ok(AUTHORITIES.clone())).unwrap();
}
);
assert_matches!(
overseer_recv(overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ConnectToValidators {
validator_ids,
AllMessages::NetworkBridge(NetworkBridgeMessage::ConnectToResolvedValidators {
mut validator_addrs,
peer_set,
failed,
}) => {
assert_eq!(validator_ids, other_authorities());
let mut expected = get_other_authorities_addrs_map().await;
expected.remove(&bob);
let mut expected: Vec<Vec<Multiaddr>> = expected.into_iter().map(|(_,v)| v).collect();
expected.sort();
validator_addrs.sort();
assert_eq!(validator_addrs, expected);
assert_eq!(peer_set, PeerSet::Validation);
failed.send(1).unwrap();
}
);