Try to fix out of view statements (#5177)

This issue happens when some peer sends a good but already known Seconded statement and the statement-distribution code does not update the statements_received field in the peer_knowledge structure. Subsequently, a Valid statement causes out-of-view message that is incorrectly emitted and causes reputation lose.

This PR also introduces a concept of passing the specific pseudo-random generator to subsystems to make it easier to write deterministic tests. This functionality is not really necessary for the specific issue and unit test but it can be useful for other tests and subsystems.
This commit is contained in:
Vsevolod Stakhov
2022-03-24 20:18:43 +00:00
committed by GitHub
parent b424e5e741
commit af94fc9534
10 changed files with 474 additions and 46 deletions
@@ -32,7 +32,7 @@ use polkadot_node_network_protocol::{
IfDisconnected, PeerId, UnifiedReputationChange as Rep, View,
};
use polkadot_node_primitives::{SignedFullStatement, Statement, UncheckedSignedFullStatement};
use polkadot_node_subsystem_util::{self as util, MIN_GOSSIP_PEERS};
use polkadot_node_subsystem_util::{self as util, rand, MIN_GOSSIP_PEERS};
use polkadot_primitives::v2::{
AuthorityDiscoveryId, CandidateHash, CommittedCandidateReceipt, CompactStatement, Hash,
@@ -115,16 +115,19 @@ const LOG_TARGET: &str = "parachain::statement-distribution";
const MAX_LARGE_STATEMENTS_PER_SENDER: usize = 20;
/// The statement distribution subsystem.
pub struct StatementDistributionSubsystem {
pub struct StatementDistributionSubsystem<R> {
/// Pointer to a keystore, which is required for determining this node's validator index.
keystore: SyncCryptoStorePtr,
/// Receiver for incoming large statement requests.
req_receiver: Option<IncomingRequestReceiver<request_v1::StatementFetchingRequest>>,
/// Prometheus metrics
metrics: Metrics,
/// Pseudo-random generator for peers selection logic
rng: R,
}
impl<Context> overseer::Subsystem<Context, SubsystemError> for StatementDistributionSubsystem
impl<Context, R: rand::Rng + Send + Sync + 'static> overseer::Subsystem<Context, SubsystemError>
for StatementDistributionSubsystem<R>
where
Context: SubsystemContext<Message = StatementDistributionMessage>,
Context: overseer::SubsystemContext<Message = StatementDistributionMessage>,
@@ -142,17 +145,6 @@ where
}
}
impl StatementDistributionSubsystem {
/// Create a new Statement Distribution Subsystem
pub fn new(
keystore: SyncCryptoStorePtr,
req_receiver: IncomingRequestReceiver<request_v1::StatementFetchingRequest>,
metrics: Metrics,
) -> Self {
Self { keystore, req_receiver: Some(req_receiver), metrics }
}
}
#[derive(Default)]
struct RecentOutdatedHeads {
buf: VecDeque<Hash>,
@@ -906,6 +898,7 @@ async fn circulate_statement_and_dependents(
statement: SignedFullStatement,
priority_peers: Vec<PeerId>,
metrics: &Metrics,
rng: &mut impl rand::Rng,
) {
let active_head = match active_heads.get_mut(&relay_parent) {
Some(res) => res,
@@ -932,6 +925,7 @@ async fn circulate_statement_and_dependents(
stored,
priority_peers,
metrics,
rng,
)
.await,
)),
@@ -1019,6 +1013,7 @@ async fn circulate_statement<'a>(
stored: StoredStatement<'a>,
mut priority_peers: Vec<PeerId>,
metrics: &Metrics,
rng: &mut impl rand::Rng,
) -> Vec<PeerId> {
let fingerprint = stored.fingerprint();
@@ -1041,8 +1036,12 @@ async fn circulate_statement<'a>(
let priority_set: HashSet<&PeerId> = priority_peers.iter().collect();
peers_to_send.retain(|p| !priority_set.contains(p));
let mut peers_to_send =
util::choose_random_subset(|e| gossip_peers.contains(e), peers_to_send, MIN_GOSSIP_PEERS);
util::choose_random_subset_with_rng(
|e| gossip_peers.contains(e),
&mut peers_to_send,
rng,
MIN_GOSSIP_PEERS,
);
// We don't want to use less peers, than we would without any priority peers:
let min_size = std::cmp::max(peers_to_send.len(), MIN_GOSSIP_PEERS);
// Make set full:
@@ -1313,6 +1312,7 @@ async fn handle_incoming_message_and_circulate<'a>(
message: protocol_v1::StatementDistributionMessage,
req_sender: &mpsc::Sender<RequesterMessage>,
metrics: &Metrics,
rng: &mut impl rand::Rng,
) {
let handled_incoming = match peers.get_mut(&peer) {
Some(data) =>
@@ -1348,6 +1348,7 @@ async fn handle_incoming_message_and_circulate<'a>(
statement,
Vec::new(),
metrics,
rng,
)
.await;
}
@@ -1458,7 +1459,12 @@ async fn handle_incoming_message<'a>(
Ok(()) => {},
Err(DeniedStatement::NotUseful) => return None,
Err(DeniedStatement::UsefulButKnown) => {
// Note a received statement in the peer data
peer_data
.receive(&relay_parent, &fingerprint, max_message_count)
.expect("checked in `check_can_receive` above; qed");
report_peer(ctx, peer, BENEFIT_VALID_STATEMENT).await;
return None
},
}
@@ -1558,6 +1564,7 @@ async fn update_peer_view_and_maybe_send_unlocked(
active_heads: &HashMap<Hash, ActiveHeadData>,
new_view: View,
metrics: &Metrics,
rng: &mut impl rand::Rng,
) {
let old_view = std::mem::replace(&mut peer_data.view, new_view);
@@ -1568,9 +1575,10 @@ async fn update_peer_view_and_maybe_send_unlocked(
let is_gossip_peer = gossip_peers.contains(&peer);
let lucky = is_gossip_peer ||
util::gen_ratio(
util::gen_ratio_rng(
util::MIN_GOSSIP_PEERS.saturating_sub(gossip_peers.len()),
util::MIN_GOSSIP_PEERS,
rng,
);
// Add entries for all relay-parents in the new view but not the old.
@@ -1597,6 +1605,7 @@ async fn handle_network_update(
req_sender: &mpsc::Sender<RequesterMessage>,
update: NetworkBridgeEvent<protocol_v1::StatementDistributionMessage>,
metrics: &Metrics,
rng: &mut impl rand::Rng,
) {
match update {
NetworkBridgeEvent::PeerConnected(peer, role, maybe_authority) => {
@@ -1638,6 +1647,7 @@ async fn handle_network_update(
&*active_heads,
view,
metrics,
rng,
)
.await
}
@@ -1654,6 +1664,7 @@ async fn handle_network_update(
message,
req_sender,
metrics,
rng,
)
.await;
},
@@ -1670,6 +1681,7 @@ async fn handle_network_update(
&*active_heads,
view,
metrics,
rng,
)
.await,
None => (),
@@ -1681,7 +1693,17 @@ async fn handle_network_update(
}
}
impl StatementDistributionSubsystem {
impl<R: rand::Rng> StatementDistributionSubsystem<R> {
/// Create a new Statement Distribution Subsystem
pub fn new(
keystore: SyncCryptoStorePtr,
req_receiver: IncomingRequestReceiver<request_v1::StatementFetchingRequest>,
metrics: Metrics,
rng: R,
) -> Self {
Self { keystore, req_receiver: Some(req_receiver), metrics, rng }
}
async fn run(
mut self,
mut ctx: (impl SubsystemContext<Message = StatementDistributionMessage>
@@ -1803,7 +1825,7 @@ impl StatementDistributionSubsystem {
}
async fn handle_requester_message(
&self,
&mut self,
ctx: &mut impl SubsystemContext,
gossip_peers: &HashSet<PeerId>,
peers: &mut HashMap<PeerId, PeerData>,
@@ -1861,6 +1883,7 @@ impl StatementDistributionSubsystem {
message,
req_sender,
&self.metrics,
&mut self.rng,
)
.await;
}
@@ -1910,7 +1933,7 @@ impl StatementDistributionSubsystem {
}
async fn handle_subsystem_message(
&self,
&mut self,
ctx: &mut (impl SubsystemContext + overseer::SubsystemContext),
runtime: &mut RuntimeInfo,
peers: &mut HashMap<PeerId, PeerData>,
@@ -2022,6 +2045,7 @@ impl StatementDistributionSubsystem {
statement,
group_peers,
metrics,
&mut self.rng,
)
.await;
},
@@ -2036,6 +2060,7 @@ impl StatementDistributionSubsystem {
req_sender,
event,
metrics,
&mut self.rng,
)
.await;
},
@@ -29,7 +29,9 @@ use polkadot_node_network_protocol::{
use polkadot_node_primitives::{Statement, UncheckedSignedFullStatement};
use polkadot_node_subsystem_test_helpers::mock::make_ferdie_keystore;
use polkadot_primitives::v2::{Hash, SessionInfo, ValidationCode};
use polkadot_primitives_test_helpers::{dummy_committed_candidate_receipt, dummy_hash};
use polkadot_primitives_test_helpers::{
dummy_committed_candidate_receipt, dummy_hash, AlwaysZeroRng,
};
use polkadot_subsystem::{
jaeger,
messages::{RuntimeApiMessage, RuntimeApiRequest},
@@ -511,6 +513,7 @@ fn peer_view_update_sends_messages() {
&active_heads,
new_view.clone(),
&Default::default(),
&mut AlwaysZeroRng,
)
.await;
@@ -640,6 +643,7 @@ fn circulated_statement_goes_to_all_peers_with_view() {
statement,
Vec::new(),
&Metrics::default(),
&mut AlwaysZeroRng,
)
.await;
@@ -723,6 +727,7 @@ fn receiving_from_one_sends_to_another_and_to_candidate_backing() {
Arc::new(LocalKeystore::in_memory()),
statement_req_receiver,
Default::default(),
AlwaysZeroRng,
);
s.run(ctx).await.unwrap();
};
@@ -915,6 +920,7 @@ fn receiving_large_statement_from_one_sends_to_another_and_to_candidate_backing(
make_ferdie_keystore(),
statement_req_receiver,
Default::default(),
AlwaysZeroRng,
);
s.run(ctx).await.unwrap();
};
@@ -1412,6 +1418,7 @@ fn share_prioritizes_backing_group() {
make_ferdie_keystore(),
statement_req_receiver,
Default::default(),
AlwaysZeroRng,
);
s.run(ctx).await.unwrap();
};
@@ -1695,6 +1702,7 @@ fn peer_cant_flood_with_large_statements() {
make_ferdie_keystore(),
statement_req_receiver,
Default::default(),
AlwaysZeroRng,
);
s.run(ctx).await.unwrap();
};
@@ -1842,6 +1850,347 @@ fn peer_cant_flood_with_large_statements() {
executor::block_on(future::join(test_fut, bg));
}
// This test addresses an issue when received knowledge is not updated on a
// subsequent `Seconded` statements
// See https://github.com/paritytech/polkadot/pull/5177
#[test]
fn handle_multiple_seconded_statements() {
let relay_parent_hash = Hash::repeat_byte(1);
let candidate = dummy_committed_candidate_receipt(relay_parent_hash);
let candidate_hash = candidate.hash();
// We want to ensure that our peers are not lucky
let mut all_peers: Vec<PeerId> = Vec::with_capacity(MIN_GOSSIP_PEERS + 4);
let peer_a = PeerId::random();
let peer_b = PeerId::random();
assert_ne!(peer_a, peer_b);
for _ in 0..MIN_GOSSIP_PEERS + 2 {
all_peers.push(PeerId::random());
}
all_peers.push(peer_a.clone());
all_peers.push(peer_b.clone());
let mut lucky_peers = all_peers.clone();
util::choose_random_subset_with_rng(
|_| false,
&mut lucky_peers,
&mut AlwaysZeroRng,
MIN_GOSSIP_PEERS,
);
lucky_peers.sort();
assert_eq!(lucky_peers.len(), MIN_GOSSIP_PEERS);
assert!(!lucky_peers.contains(&peer_a));
assert!(!lucky_peers.contains(&peer_b));
let validators = vec![
Sr25519Keyring::Alice.pair(),
Sr25519Keyring::Bob.pair(),
Sr25519Keyring::Charlie.pair(),
];
let session_info = make_session_info(validators, vec![]);
let session_index = 1;
let pool = sp_core::testing::TaskExecutor::new();
let (ctx, mut handle) = polkadot_node_subsystem_test_helpers::make_subsystem_context(pool);
let (statement_req_receiver, _) = IncomingRequest::get_config_receiver();
let virtual_overseer_fut = async move {
let s = StatementDistributionSubsystem::new(
Arc::new(LocalKeystore::in_memory()),
statement_req_receiver,
Default::default(),
AlwaysZeroRng,
);
s.run(ctx).await.unwrap();
};
let test_fut = async move {
// register our active heads.
handle
.send(FromOverseer::Signal(OverseerSignal::ActiveLeaves(
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: relay_parent_hash,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}),
)))
.await;
assert_matches!(
handle.recv().await,
AllMessages::RuntimeApi(
RuntimeApiMessage::Request(r, RuntimeApiRequest::SessionIndexForChild(tx))
)
if r == relay_parent_hash
=> {
let _ = tx.send(Ok(session_index));
}
);
assert_matches!(
handle.recv().await,
AllMessages::RuntimeApi(
RuntimeApiMessage::Request(r, RuntimeApiRequest::SessionInfo(sess_index, tx))
)
if r == relay_parent_hash && sess_index == session_index
=> {
let _ = tx.send(Ok(Some(session_info)));
}
);
// notify of peers and view
for peer in all_peers.iter() {
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::PeerConnected(peer.clone(), ObservedRole::Full, None),
),
})
.await;
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::PeerViewChange(peer.clone(), view![relay_parent_hash]),
),
})
.await;
}
// Explicitly add all `lucky` peers to the gossip peers to ensure that neither `peerA` not `peerB`
// receive statements
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::NewGossipTopology(
lucky_peers.iter().cloned().collect::<HashSet<_>>(),
),
),
})
.await;
// receive a seconded statement from peer A. it should be propagated onwards to peer B and to
// candidate backing.
let statement = {
let signing_context = SigningContext { parent_hash: relay_parent_hash, session_index };
let keystore: SyncCryptoStorePtr = Arc::new(LocalKeystore::in_memory());
let alice_public = CryptoStore::sr25519_generate_new(
&*keystore,
ValidatorId::ID,
Some(&Sr25519Keyring::Alice.to_seed()),
)
.await
.unwrap();
SignedFullStatement::sign(
&keystore,
Statement::Seconded(candidate.clone()),
&signing_context,
ValidatorIndex(0),
&alice_public.into(),
)
.await
.ok()
.flatten()
.expect("should be signed")
};
// `PeerA` sends a `Seconded` message
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::PeerMessage(
peer_a.clone(),
protocol_v1::StatementDistributionMessage::Statement(
relay_parent_hash,
statement.clone().into(),
),
),
),
})
.await;
assert_matches!(
handle.recv().await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::ReportPeer(p, r)
) => {
assert_eq!(p, peer_a);
assert_eq!(r, BENEFIT_VALID_STATEMENT_FIRST);
}
);
// After the first valid statement, we expect messages to be circulated
assert_matches!(
handle.recv().await,
AllMessages::CandidateBacking(
CandidateBackingMessage::Statement(r, s)
) => {
assert_eq!(r, relay_parent_hash);
assert_eq!(s, statement);
}
);
assert_matches!(
handle.recv().await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::SendValidationMessage(
recipients,
protocol_v1::ValidationProtocol::StatementDistribution(
protocol_v1::StatementDistributionMessage::Statement(r, s)
),
)
) => {
assert!(!recipients.contains(&peer_b));
assert_eq!(r, relay_parent_hash);
assert_eq!(s, statement.clone().into());
}
);
// `PeerB` sends a `Seconded` message: valid but known
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::PeerMessage(
peer_b.clone(),
protocol_v1::StatementDistributionMessage::Statement(
relay_parent_hash,
statement.clone().into(),
),
),
),
})
.await;
assert_matches!(
handle.recv().await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::ReportPeer(p, r)
) => {
assert_eq!(p, peer_b);
assert_eq!(r, BENEFIT_VALID_STATEMENT);
}
);
// Create a `Valid` statement
let statement = {
let signing_context = SigningContext { parent_hash: relay_parent_hash, session_index };
let keystore: SyncCryptoStorePtr = Arc::new(LocalKeystore::in_memory());
let alice_public = CryptoStore::sr25519_generate_new(
&*keystore,
ValidatorId::ID,
Some(&Sr25519Keyring::Alice.to_seed()),
)
.await
.unwrap();
SignedFullStatement::sign(
&keystore,
Statement::Valid(candidate_hash),
&signing_context,
ValidatorIndex(0),
&alice_public.into(),
)
.await
.ok()
.flatten()
.expect("should be signed")
};
// `PeerA` sends a `Valid` message
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::PeerMessage(
peer_a.clone(),
protocol_v1::StatementDistributionMessage::Statement(
relay_parent_hash,
statement.clone().into(),
),
),
),
})
.await;
assert_matches!(
handle.recv().await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::ReportPeer(p, r)
) => {
assert_eq!(p, peer_a);
assert_eq!(r, BENEFIT_VALID_STATEMENT_FIRST);
}
);
assert_matches!(
handle.recv().await,
AllMessages::CandidateBacking(
CandidateBackingMessage::Statement(r, s)
) => {
assert_eq!(r, relay_parent_hash);
assert_eq!(s, statement);
}
);
assert_matches!(
handle.recv().await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::SendValidationMessage(
recipients,
protocol_v1::ValidationProtocol::StatementDistribution(
protocol_v1::StatementDistributionMessage::Statement(r, s)
),
)
) => {
assert!(!recipients.contains(&peer_b));
assert_eq!(r, relay_parent_hash);
assert_eq!(s, statement.clone().into());
}
);
// `PeerB` sends a `Valid` message
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::PeerMessage(
peer_b.clone(),
protocol_v1::StatementDistributionMessage::Statement(
relay_parent_hash,
statement.clone().into(),
),
),
),
})
.await;
// We expect that this is still valid despite the fact that `PeerB` was not
// the first when sending `Seconded`
assert_matches!(
handle.recv().await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::ReportPeer(p, r)
) => {
assert_eq!(p, peer_b);
assert_eq!(r, BENEFIT_VALID_STATEMENT);
}
);
handle.send(FromOverseer::Signal(OverseerSignal::Conclude)).await;
};
futures::pin_mut!(test_fut);
futures::pin_mut!(virtual_overseer_fut);
executor::block_on(future::join(test_fut, virtual_overseer_fut));
}
fn make_session_info(validators: Vec<Pair>, groups: Vec<Vec<u32>>) -> SessionInfo {
let validator_groups: Vec<Vec<ValidatorIndex>> = groups
.iter()