Delay reputation updates (#7214)

* Add futures-timer

* Make cost_or_benefit public

* Update ReportPeer message format

* Add delay to reputation updates (dirtywork)

* Update ReputationAggregator

* Update tests

* Fix flucky tests

* Move reputation to state

* Use the main loop for handling reputation sendings

* Update

* Move reputation to utils

* Update reputation sending

* Fix arguments order

* Update state

* Remove new from state

* Add constant

* Add failing test for delay

* Change mocking approach

* Fix type errors

* Fix comments

* Add message handling to select

* Fix bitfields-distribution tests

* Add docs to reputation aggregator

* Replace .into_base_rep

* Use one REPUTATION_CHANGE_INTERVAL by default

* Add reputation change to statement-distribution

* Update polkadot-availability-bitfield-distribution

* Update futures selecting in subsystems

* Update reputation adding

* Send malicious changes right away without adding to state

* Add reputation to StatementDistributionSubsystem

* Handle reputation in statement distribution

* Add delay test for polkadot-statement-distribution

* Fix collator-protocol tests before applying reputation delay

* Remove into_base_rep

* Add reputation to State

* Fix failed tests

* Add reputation delay

* Update tests

* Add batched network message for peer reporting

* Update approval-distribution tests

* Update bitfield-distribution tests

* Update statement-distribution tests

* Update collator-protocol tests

* Remove levels in matching

* Address clippy errors

* Fix overseer test

* Add a metric for original count of rep changes

* Update Reputation

* Revert "Add a metric for original count of rep changes"

This reverts commit 6c9b0c1ec34491d16e562bdcba8db6b9dcf484db.

* Update node/subsystem-util/src/reputation.rs

Co-authored-by: Vsevolod Stakhov <vsevolod.stakhov@parity.io>

* Remove redundant vec

---------

Co-authored-by: Vsevolod Stakhov <vsevolod.stakhov@parity.io>
This commit is contained in:
Andrei Eres
2023-06-15 15:46:06 +02:00
committed by GitHub
parent d3d9d4ae66
commit 0a1bc654d9
27 changed files with 2231 additions and 805 deletions
@@ -20,7 +20,7 @@
#![warn(missing_docs)]
use futures::{channel::oneshot, FutureExt as _};
use futures::{channel::oneshot, select, FutureExt as _};
use polkadot_node_jaeger as jaeger;
use polkadot_node_network_protocol::{
self as net_protocol,
@@ -38,11 +38,15 @@ use polkadot_node_subsystem::{
},
overseer, FromOrchestra, OverseerSignal, SpawnedSubsystem, SubsystemError,
};
use polkadot_node_subsystem_util::reputation::{ReputationAggregator, REPUTATION_CHANGE_INTERVAL};
use polkadot_primitives::{
BlockNumber, CandidateIndex, Hash, SessionIndex, ValidatorIndex, ValidatorSignature,
};
use rand::{CryptoRng, Rng, SeedableRng};
use std::collections::{hash_map, BTreeMap, HashMap, HashSet, VecDeque};
use std::{
collections::{hash_map, BTreeMap, HashMap, HashSet, VecDeque},
time::Duration,
};
use self::metrics::Metrics;
@@ -187,6 +191,9 @@ struct State {
/// Current approval checking finality lag.
approval_checking_lag: BlockNumber,
/// Aggregated reputation change
reputation: ReputationAggregator,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -755,7 +762,13 @@ impl State {
"Unexpected assignment",
);
if !self.recent_outdated_blocks.is_recent_outdated(&block_hash) {
modify_reputation(ctx.sender(), peer_id, COST_UNEXPECTED_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
COST_UNEXPECTED_MESSAGE,
)
.await;
}
}
return
@@ -780,7 +793,13 @@ impl State {
?message_subject,
"Duplicate assignment",
);
modify_reputation(ctx.sender(), peer_id, COST_DUPLICATE_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
COST_DUPLICATE_MESSAGE,
)
.await;
}
return
}
@@ -792,13 +811,25 @@ impl State {
?message_subject,
"Assignment from a peer is out of view",
);
modify_reputation(ctx.sender(), peer_id, COST_UNEXPECTED_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
COST_UNEXPECTED_MESSAGE,
)
.await;
},
}
// if the assignment is known to be valid, reward the peer
if entry.knowledge.contains(&message_subject, message_kind) {
modify_reputation(ctx.sender(), peer_id, BENEFIT_VALID_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
BENEFIT_VALID_MESSAGE,
)
.await;
if let Some(peer_knowledge) = entry.known_by.get_mut(&peer_id) {
gum::trace!(target: LOG_TARGET, ?peer_id, ?message_subject, "Known assignment");
peer_knowledge.received.insert(message_subject, message_kind);
@@ -834,7 +865,13 @@ impl State {
);
match result {
AssignmentCheckResult::Accepted => {
modify_reputation(ctx.sender(), peer_id, BENEFIT_VALID_MESSAGE_FIRST).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
BENEFIT_VALID_MESSAGE_FIRST,
)
.await;
entry.knowledge.known_messages.insert(message_subject.clone(), message_kind);
if let Some(peer_knowledge) = entry.known_by.get_mut(&peer_id) {
peer_knowledge.received.insert(message_subject.clone(), message_kind);
@@ -862,8 +899,13 @@ impl State {
?peer_id,
"Got an assignment too far in the future",
);
modify_reputation(ctx.sender(), peer_id, COST_ASSIGNMENT_TOO_FAR_IN_THE_FUTURE)
.await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
COST_ASSIGNMENT_TOO_FAR_IN_THE_FUTURE,
)
.await;
return
},
AssignmentCheckResult::Bad(error) => {
@@ -874,7 +916,13 @@ impl State {
%error,
"Got a bad assignment from peer",
);
modify_reputation(ctx.sender(), peer_id, COST_INVALID_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
COST_INVALID_MESSAGE,
)
.await;
return
},
}
@@ -1024,7 +1072,13 @@ impl State {
_ => {
if let Some(peer_id) = source.peer_id() {
if !self.recent_outdated_blocks.is_recent_outdated(&block_hash) {
modify_reputation(ctx.sender(), peer_id, COST_UNEXPECTED_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
COST_UNEXPECTED_MESSAGE,
)
.await;
}
}
return
@@ -1043,7 +1097,13 @@ impl State {
?message_subject,
"Unknown approval assignment",
);
modify_reputation(ctx.sender(), peer_id, COST_UNEXPECTED_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
COST_UNEXPECTED_MESSAGE,
)
.await;
return
}
@@ -1060,7 +1120,13 @@ impl State {
"Duplicate approval",
);
modify_reputation(ctx.sender(), peer_id, COST_DUPLICATE_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
COST_DUPLICATE_MESSAGE,
)
.await;
}
return
}
@@ -1072,14 +1138,26 @@ impl State {
?message_subject,
"Approval from a peer is out of view",
);
modify_reputation(ctx.sender(), peer_id, COST_UNEXPECTED_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
COST_UNEXPECTED_MESSAGE,
)
.await;
},
}
// if the approval is known to be valid, reward the peer
if entry.knowledge.contains(&message_subject, message_kind) {
gum::trace!(target: LOG_TARGET, ?peer_id, ?message_subject, "Known approval");
modify_reputation(ctx.sender(), peer_id, BENEFIT_VALID_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
BENEFIT_VALID_MESSAGE,
)
.await;
if let Some(peer_knowledge) = entry.known_by.get_mut(&peer_id) {
peer_knowledge.received.insert(message_subject.clone(), message_kind);
}
@@ -1110,7 +1188,13 @@ impl State {
);
match result {
ApprovalCheckResult::Accepted => {
modify_reputation(ctx.sender(), peer_id, BENEFIT_VALID_MESSAGE_FIRST).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
BENEFIT_VALID_MESSAGE_FIRST,
)
.await;
entry.knowledge.insert(message_subject.clone(), message_kind);
if let Some(peer_knowledge) = entry.known_by.get_mut(&peer_id) {
@@ -1118,7 +1202,13 @@ impl State {
}
},
ApprovalCheckResult::Bad(error) => {
modify_reputation(ctx.sender(), peer_id, COST_INVALID_MESSAGE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
peer_id,
COST_INVALID_MESSAGE,
)
.await;
gum::info!(
target: LOG_TARGET,
?peer_id,
@@ -1669,6 +1759,7 @@ async fn adjust_required_routing_and_propagate<Context, BlockFilter, RoutingModi
/// Modify the reputation of a peer based on its behavior.
async fn modify_reputation(
reputation: &mut ReputationAggregator,
sender: &mut impl overseer::ApprovalDistributionSenderTrait,
peer_id: PeerId,
rep: Rep,
@@ -1679,8 +1770,7 @@ async fn modify_reputation(
?peer_id,
"Reputation change for peer",
);
sender.send_message(NetworkBridgeTxMessage::ReportPeer(peer_id, rep)).await;
reputation.modify(sender, peer_id, rep).await;
}
#[overseer::contextbounds(ApprovalDistribution, prefix = self::overseer)]
@@ -1696,7 +1786,7 @@ impl ApprovalDistribution {
// According to the docs of `rand`, this is a ChaCha12 RNG in practice
// and will always be chosen for strong performance and security properties.
let mut rng = rand::rngs::StdRng::from_entropy();
self.run_inner(ctx, &mut state, &mut rng).await
self.run_inner(ctx, &mut state, REPUTATION_CHANGE_INTERVAL, &mut rng).await
}
/// Used for testing.
@@ -1704,37 +1794,49 @@ impl ApprovalDistribution {
self,
mut ctx: Context,
state: &mut State,
reputation_interval: Duration,
rng: &mut (impl CryptoRng + Rng),
) {
let new_reputation_delay = || futures_timer::Delay::new(reputation_interval).fuse();
let mut reputation_delay = new_reputation_delay();
loop {
let message = match ctx.recv().await {
Ok(message) => message,
Err(e) => {
gum::debug!(target: LOG_TARGET, err = ?e, "Failed to receive a message from Overseer, exiting");
return
select! {
_ = reputation_delay => {
state.reputation.send(ctx.sender()).await;
reputation_delay = new_reputation_delay();
},
};
match message {
FromOrchestra::Communication { msg } =>
Self::handle_incoming(&mut ctx, state, msg, &self.metrics, rng).await,
FromOrchestra::Signal(OverseerSignal::ActiveLeaves(update)) => {
gum::trace!(target: LOG_TARGET, "active leaves signal (ignored)");
// the relay chain blocks relevant to the approval subsystems
// are those that are available, but not finalized yet
// actived and deactivated heads hence are irrelevant to this subsystem, other than
// for tracing purposes.
if let Some(activated) = update.activated {
let head = activated.hash;
let approval_distribution_span =
jaeger::PerLeafSpan::new(activated.span, "approval-distribution");
state.spans.insert(head, approval_distribution_span);
message = ctx.recv().fuse() => {
let message = match message {
Ok(message) => message,
Err(e) => {
gum::debug!(target: LOG_TARGET, err = ?e, "Failed to receive a message from Overseer, exiting");
return
},
};
match message {
FromOrchestra::Communication { msg } =>
Self::handle_incoming(&mut ctx, state, msg, &self.metrics, rng).await,
FromOrchestra::Signal(OverseerSignal::ActiveLeaves(update)) => {
gum::trace!(target: LOG_TARGET, "active leaves signal (ignored)");
// the relay chain blocks relevant to the approval subsystems
// are those that are available, but not finalized yet
// actived and deactivated heads hence are irrelevant to this subsystem, other than
// for tracing purposes.
if let Some(activated) = update.activated {
let head = activated.hash;
let approval_distribution_span =
jaeger::PerLeafSpan::new(activated.span, "approval-distribution");
state.spans.insert(head, approval_distribution_span);
}
},
FromOrchestra::Signal(OverseerSignal::BlockFinalized(_hash, number)) => {
gum::trace!(target: LOG_TARGET, number = %number, "finalized signal");
state.handle_block_finalized(&mut ctx, &self.metrics, number).await;
},
FromOrchestra::Signal(OverseerSignal::Conclude) => return,
}
},
FromOrchestra::Signal(OverseerSignal::BlockFinalized(_hash, number)) => {
gum::trace!(target: LOG_TARGET, number = %number, "finalized signal");
state.handle_block_finalized(&mut ctx, &self.metrics, number).await;
},
FromOrchestra::Signal(OverseerSignal::Conclude) => return,
}
}
}
@@ -26,9 +26,11 @@ use polkadot_node_network_protocol::{
use polkadot_node_primitives::approval::{
AssignmentCertKind, VrfOutput, VrfProof, VrfSignature, RELAY_VRF_MODULO_CONTEXT,
};
use polkadot_node_subsystem::messages::{network_bridge_event, AllMessages, ApprovalCheckError};
use polkadot_node_subsystem::messages::{
network_bridge_event, AllMessages, ApprovalCheckError, ReportPeerMessage,
};
use polkadot_node_subsystem_test_helpers as test_helpers;
use polkadot_node_subsystem_util::TimeoutExt as _;
use polkadot_node_subsystem_util::{reputation::add_reputation, TimeoutExt as _};
use polkadot_primitives::{AuthorityDiscoveryId, BlakeTwo256, HashT};
use polkadot_primitives_test_helpers::dummy_signature;
use rand::SeedableRng;
@@ -54,7 +56,8 @@ fn test_harness<T: Future<Output = VirtualOverseer>>(
{
let mut rng = rand_chacha::ChaCha12Rng::seed_from_u64(12345);
let subsystem = subsystem.run_inner(context, &mut state, &mut rng);
let subsystem =
subsystem.run_inner(context, &mut state, REPUTATION_CHANGE_TEST_INTERVAL, &mut rng);
let test_fut = test_fn(virtual_overseer);
@@ -78,6 +81,7 @@ fn test_harness<T: Future<Output = VirtualOverseer>>(
}
const TIMEOUT: Duration = Duration::from_millis(200);
const REPUTATION_CHANGE_TEST_INTERVAL: Duration = Duration::from_millis(1);
async fn overseer_send(overseer: &mut VirtualOverseer, msg: ApprovalDistributionMessage) {
gum::trace!(msg = ?msg, "Sending message");
@@ -273,22 +277,46 @@ fn fake_assignment_cert(block_hash: Hash, validator: ValidatorIndex) -> Indirect
async fn expect_reputation_change(
virtual_overseer: &mut VirtualOverseer,
peer_id: &PeerId,
expected_reputation_change: Rep,
rep: Rep,
) {
assert_matches!(
overseer_recv(virtual_overseer).await,
AllMessages::NetworkBridgeTx(
NetworkBridgeTxMessage::ReportPeer(
rep_peer,
rep,
)
) => {
assert_eq!(peer_id, &rep_peer);
assert_eq!(expected_reputation_change, rep);
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::ReportPeer(
ReportPeerMessage::Single(p, r),
)) => {
assert_eq!(p, *peer_id);
assert_eq!(r, rep.into());
}
);
}
async fn expect_reputation_changes(
virtual_overseer: &mut VirtualOverseer,
peer_id: &PeerId,
reps: Vec<Rep>,
) {
let mut acc = HashMap::new();
for rep in reps {
add_reputation(&mut acc, *peer_id, rep);
}
assert_matches!(
overseer_recv(virtual_overseer).await,
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::ReportPeer(
ReportPeerMessage::Batch(v),
)) => {
assert_eq!(v, acc);
}
);
}
fn state_without_reputation_delay() -> State {
State { reputation: ReputationAggregator::new(|_| true), ..Default::default() }
}
fn state_with_reputation_delay() -> State {
State { reputation: ReputationAggregator::new(|_| false), ..Default::default() }
}
/// import an assignment
/// connect a new peer
/// the new peer sends us the same assignment
@@ -301,7 +329,7 @@ fn try_import_the_same_assignment() {
let parent_hash = Hash::repeat_byte(0xFF);
let hash = Hash::repeat_byte(0xAA);
let _ = test_harness(State::default(), |mut virtual_overseer| async move {
let _ = test_harness(state_without_reputation_delay(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
// setup peers
setup_peer_with_view(overseer, &peer_a, view![]).await;
@@ -373,6 +401,65 @@ fn try_import_the_same_assignment() {
});
}
/// import an assignment
/// connect a new peer
/// state sends aggregated reputation change
#[test]
fn delay_reputation_change() {
let peer = PeerId::random();
let parent_hash = Hash::repeat_byte(0xFF);
let hash = Hash::repeat_byte(0xAA);
let _ = test_harness(state_with_reputation_delay(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
// Setup peers
setup_peer_with_view(overseer, &peer, view![]).await;
// new block `hash_a` with 1 candidates
let meta = BlockApprovalMeta {
hash,
parent_hash,
number: 2,
candidates: vec![Default::default(); 1],
slot: 1.into(),
session: 1,
};
let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]);
overseer_send(overseer, msg).await;
// send the assignment related to `hash`
let validator_index = ValidatorIndex(0);
let cert = fake_assignment_cert(hash, validator_index);
let assignments = vec![(cert.clone(), 0u32)];
let msg = protocol_v1::ApprovalDistributionMessage::Assignments(assignments.clone());
send_message_from_peer(overseer, &peer, msg).await;
// send an `Accept` message from the Approval Voting subsystem
assert_matches!(
overseer_recv(overseer).await,
AllMessages::ApprovalVoting(ApprovalVotingMessage::CheckAndImportAssignment(
assignment,
0u32,
tx,
)) => {
assert_eq!(assignment, cert);
tx.send(AssignmentCheckResult::Accepted).unwrap();
}
);
expect_reputation_changes(
overseer,
&peer,
vec![COST_UNEXPECTED_MESSAGE, BENEFIT_VALID_MESSAGE_FIRST],
)
.await;
assert!(overseer.recv().timeout(TIMEOUT).await.is_none(), "no message should be sent");
virtual_overseer
});
}
/// <https://github.com/paritytech/polkadot/pull/2160#discussion_r547594835>
///
/// 1. Send a view update that removes block B from their view.
@@ -385,7 +472,7 @@ fn spam_attack_results_in_negative_reputation_change() {
let peer_a = PeerId::random();
let hash_b = Hash::repeat_byte(0xBB);
let _ = test_harness(State::default(), |mut virtual_overseer| async move {
let _ = test_harness(state_without_reputation_delay(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
let peer = &peer_a;
setup_peer_with_view(overseer, peer, view![]).await;
@@ -469,7 +556,7 @@ fn peer_sending_us_the_same_we_just_sent_them_is_ok() {
let peer_a = PeerId::random();
let hash = Hash::repeat_byte(0xAA);
let _ = test_harness(State::default(), |mut virtual_overseer| async move {
let _ = test_harness(state_without_reputation_delay(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
let peer = &peer_a;
setup_peer_with_view(overseer, peer, view![]).await;
@@ -545,7 +632,7 @@ fn import_approval_happy_path() {
let parent_hash = Hash::repeat_byte(0xFF);
let hash = Hash::repeat_byte(0xAA);
let _ = test_harness(State::default(), |mut virtual_overseer| async move {
let _ = test_harness(state_without_reputation_delay(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
// setup peers
setup_peer_with_view(overseer, &peer_a, view![]).await;
@@ -633,7 +720,7 @@ fn import_approval_bad() {
let parent_hash = Hash::repeat_byte(0xFF);
let hash = Hash::repeat_byte(0xAA);
let _ = test_harness(State::default(), |mut virtual_overseer| async move {
let _ = test_harness(state_without_reputation_delay(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
// setup peers
setup_peer_with_view(overseer, &peer_a, view![]).await;
@@ -942,7 +1029,7 @@ fn import_remotely_then_locally() {
let hash = Hash::repeat_byte(0xAA);
let peer = &peer_a;
let _ = test_harness(State::default(), |mut virtual_overseer| async move {
let _ = test_harness(state_without_reputation_delay(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
// setup the peer
setup_peer_with_view(overseer, peer, view![hash]).await;
@@ -1114,7 +1201,7 @@ fn race_condition_in_local_vs_remote_view_update() {
let peer_a = PeerId::random();
let hash_b = Hash::repeat_byte(0xBB);
let _ = test_harness(State::default(), |mut virtual_overseer| async move {
let _ = test_harness(state_without_reputation_delay(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
let peer = &peer_a;
@@ -1294,7 +1381,7 @@ fn propagates_assignments_along_unshared_dimension() {
let peers = make_peers_and_authority_ids(100);
let _ = test_harness(State::default(), |mut virtual_overseer| async move {
let _ = test_harness(state_without_reputation_delay(), |mut virtual_overseer| async move {
let overseer = &mut virtual_overseer;
// Connect all peers.
@@ -1883,7 +1970,7 @@ fn non_originator_aggression_l1() {
let peers = make_peers_and_authority_ids(100);
let mut state = State::default();
let mut state = state_without_reputation_delay();
state.aggression_config.resend_unfinalized_period = None;
let aggression_l1_threshold = state.aggression_config.l1_threshold.clone().unwrap();
@@ -1987,7 +2074,7 @@ fn non_originator_aggression_l2() {
let peers = make_peers_and_authority_ids(100);
let mut state = State::default();
let mut state = state_without_reputation_delay();
state.aggression_config.resend_unfinalized_period = None;
let aggression_l1_threshold = state.aggression_config.l1_threshold.clone().unwrap();
@@ -2154,7 +2241,7 @@ fn resends_messages_periodically() {
let peers = make_peers_and_authority_ids(100);
let mut state = State::default();
let mut state = state_without_reputation_delay();
state.aggression_config.l1_threshold = None;
state.aggression_config.l2_threshold = None;
state.aggression_config.resend_unfinalized_period = Some(2);
@@ -2298,7 +2385,8 @@ fn batch_test_round(message_count: usize) {
let subsystem = ApprovalDistribution::new(Default::default());
let mut rng = rand_chacha::ChaCha12Rng::seed_from_u64(12345);
let mut sender = context.sender().clone();
let subsystem = subsystem.run_inner(context, &mut state, &mut rng);
let subsystem =
subsystem.run_inner(context, &mut state, REPUTATION_CHANGE_TEST_INTERVAL, &mut rng);
let test_fut = async move {
let overseer = &mut virtual_overseer;