Delay reputation updates (#7214)

* Add futures-timer

* Make cost_or_benefit public

* Update ReportPeer message format

* Add delay to reputation updates (dirtywork)

* Update ReputationAggregator

* Update tests

* Fix flucky tests

* Move reputation to state

* Use the main loop for handling reputation sendings

* Update

* Move reputation to utils

* Update reputation sending

* Fix arguments order

* Update state

* Remove new from state

* Add constant

* Add failing test for delay

* Change mocking approach

* Fix type errors

* Fix comments

* Add message handling to select

* Fix bitfields-distribution tests

* Add docs to reputation aggregator

* Replace .into_base_rep

* Use one REPUTATION_CHANGE_INTERVAL by default

* Add reputation change to statement-distribution

* Update polkadot-availability-bitfield-distribution

* Update futures selecting in subsystems

* Update reputation adding

* Send malicious changes right away without adding to state

* Add reputation to StatementDistributionSubsystem

* Handle reputation in statement distribution

* Add delay test for polkadot-statement-distribution

* Fix collator-protocol tests before applying reputation delay

* Remove into_base_rep

* Add reputation to State

* Fix failed tests

* Add reputation delay

* Update tests

* Add batched network message for peer reporting

* Update approval-distribution tests

* Update bitfield-distribution tests

* Update statement-distribution tests

* Update collator-protocol tests

* Remove levels in matching

* Address clippy errors

* Fix overseer test

* Add a metric for original count of rep changes

* Update Reputation

* Revert "Add a metric for original count of rep changes"

This reverts commit 6c9b0c1ec34491d16e562bdcba8db6b9dcf484db.

* Update node/subsystem-util/src/reputation.rs

Co-authored-by: Vsevolod Stakhov <vsevolod.stakhov@parity.io>

* Remove redundant vec

---------

Co-authored-by: Vsevolod Stakhov <vsevolod.stakhov@parity.io>
This commit is contained in:
Andrei Eres
2023-06-15 15:46:06 +02:00
committed by GitHub
parent d3d9d4ae66
commit 0a1bc654d9
27 changed files with 2231 additions and 805 deletions
@@ -55,12 +55,19 @@ use futures::{
channel::{mpsc, oneshot},
future::RemoteHandle,
prelude::*,
select,
};
use indexmap::{map::Entry as IEntry, IndexMap};
use sp_keystore::KeystorePtr;
use util::runtime::RuntimeInfo;
use util::{
reputation::{ReputationAggregator, REPUTATION_CHANGE_INTERVAL},
runtime::RuntimeInfo,
};
use std::collections::{hash_map::Entry, HashMap, HashSet, VecDeque};
use std::{
collections::{hash_map::Entry, HashMap, HashSet, VecDeque},
time::Duration,
};
use fatality::Nested;
@@ -126,6 +133,8 @@ pub struct StatementDistributionSubsystem<R> {
metrics: Metrics,
/// Pseudo-random generator for peers selection logic
rng: R,
/// Aggregated reputation change
reputation: ReputationAggregator,
}
#[overseer::subsystem(StatementDistribution, error=SubsystemError, prefix=self::overseer)]
@@ -1167,12 +1176,14 @@ async fn send_statements<Context>(
}
}
async fn report_peer(
/// Modify the reputation of a peer based on its behavior.
async fn modify_reputation(
reputation: &mut ReputationAggregator,
sender: &mut impl overseer::StatementDistributionSenderTrait,
peer: PeerId,
rep: Rep,
) {
sender.send_message(NetworkBridgeTxMessage::ReportPeer(peer, rep)).await
reputation.modify(sender, peer, rep).await;
}
/// If message contains a statement, then retrieve it, otherwise fork task to fetch it.
@@ -1319,6 +1330,7 @@ async fn handle_incoming_message_and_circulate<'a, Context, R>(
metrics: &Metrics,
runtime: &mut RuntimeInfo,
rng: &mut R,
reputation: &mut ReputationAggregator,
) where
R: rand::Rng,
{
@@ -1333,6 +1345,7 @@ async fn handle_incoming_message_and_circulate<'a, Context, R>(
message,
req_sender,
metrics,
reputation,
)
.await,
None => None,
@@ -1397,6 +1410,7 @@ async fn handle_incoming_message<'a, Context>(
message: protocol_v1::StatementDistributionMessage,
req_sender: &mpsc::Sender<RequesterMessage>,
metrics: &Metrics,
reputation: &mut ReputationAggregator,
) -> Option<(Hash, StoredStatement<'a>)> {
let relay_parent = message.get_relay_parent();
let _ = metrics.time_network_bridge_update_v1("handle_incoming_message");
@@ -1411,7 +1425,7 @@ async fn handle_incoming_message<'a, Context>(
);
if !recent_outdated_heads.is_recent_outdated(&relay_parent) {
report_peer(ctx.sender(), peer, COST_UNEXPECTED_STATEMENT).await;
modify_reputation(reputation, ctx.sender(), peer, COST_UNEXPECTED_STATEMENT).await;
}
return None
@@ -1421,7 +1435,7 @@ async fn handle_incoming_message<'a, Context>(
if let protocol_v1::StatementDistributionMessage::LargeStatement(_) = message {
if let Err(rep) = peer_data.receive_large_statement(&relay_parent) {
gum::debug!(target: LOG_TARGET, ?peer, ?message, ?rep, "Unexpected large statement.",);
report_peer(ctx.sender(), peer, rep).await;
modify_reputation(reputation, ctx.sender(), peer, rep).await;
return None
}
}
@@ -1462,16 +1476,16 @@ async fn handle_incoming_message<'a, Context>(
// Report peer merely if this is not a duplicate out-of-view statement that
// was caused by a missing Seconded statement from this peer
if unexpected_count == 0_usize {
report_peer(ctx.sender(), peer, rep).await;
modify_reputation(reputation, ctx.sender(), peer, rep).await;
}
},
// This happens when we have an unexpected remote peer that announced Seconded
COST_UNEXPECTED_STATEMENT_REMOTE => {
metrics.on_unexpected_statement_seconded();
report_peer(ctx.sender(), peer, rep).await;
modify_reputation(reputation, ctx.sender(), peer, rep).await;
},
_ => {
report_peer(ctx.sender(), peer, rep).await;
modify_reputation(reputation, ctx.sender(), peer, rep).await;
},
}
@@ -1492,7 +1506,7 @@ async fn handle_incoming_message<'a, Context>(
peer_data
.receive(&relay_parent, &fingerprint, max_message_count)
.expect("checked in `check_can_receive` above; qed");
report_peer(ctx.sender(), peer, BENEFIT_VALID_STATEMENT).await;
modify_reputation(reputation, ctx.sender(), peer, BENEFIT_VALID_STATEMENT).await;
return None
},
@@ -1502,7 +1516,7 @@ async fn handle_incoming_message<'a, Context>(
match check_statement_signature(&active_head, relay_parent, unchecked_compact) {
Err(statement) => {
gum::debug!(target: LOG_TARGET, ?peer, ?statement, "Invalid statement signature");
report_peer(ctx.sender(), peer, COST_INVALID_SIGNATURE).await;
modify_reputation(reputation, ctx.sender(), peer, COST_INVALID_SIGNATURE).await;
return None
},
Ok(statement) => statement,
@@ -1528,7 +1542,7 @@ async fn handle_incoming_message<'a, Context>(
is_large_statement,
"Full statement had bad payload."
);
report_peer(ctx.sender(), peer, COST_WRONG_HASH).await;
modify_reputation(reputation, ctx.sender(), peer, COST_WRONG_HASH).await;
return None
},
Ok(statement) => statement,
@@ -1567,7 +1581,7 @@ async fn handle_incoming_message<'a, Context>(
unreachable!("checked in `is_useful_or_unknown` above; qed");
},
NotedStatement::Fresh(statement) => {
report_peer(ctx.sender(), peer, BENEFIT_VALID_STATEMENT_FIRST).await;
modify_reputation(reputation, ctx.sender(), peer, BENEFIT_VALID_STATEMENT_FIRST).await;
let mut _span = handle_incoming_span.child("notify-backing");
@@ -1641,6 +1655,7 @@ async fn handle_network_update<Context, R>(
metrics: &Metrics,
runtime: &mut RuntimeInfo,
rng: &mut R,
reputation: &mut ReputationAggregator,
) where
R: rand::Rng,
{
@@ -1713,6 +1728,7 @@ async fn handle_network_update<Context, R>(
metrics,
runtime,
rng,
reputation,
)
.await;
},
@@ -1750,10 +1766,27 @@ impl<R: rand::Rng> StatementDistributionSubsystem<R> {
metrics: Metrics,
rng: R,
) -> Self {
Self { keystore, req_receiver: Some(req_receiver), metrics, rng }
Self {
keystore,
req_receiver: Some(req_receiver),
metrics,
rng,
reputation: Default::default(),
}
}
async fn run<Context>(mut self, mut ctx: Context) -> std::result::Result<(), FatalError> {
async fn run<Context>(self, ctx: Context) -> std::result::Result<(), FatalError> {
self.run_inner(ctx, REPUTATION_CHANGE_INTERVAL).await
}
async fn run_inner<Context>(
mut self,
mut ctx: Context,
reputation_interval: Duration,
) -> std::result::Result<(), FatalError> {
let new_reputation_delay = || futures_timer::Delay::new(reputation_interval).fuse();
let mut reputation_delay = new_reputation_delay();
let mut peers: HashMap<PeerId, PeerData> = HashMap::new();
let mut topology_storage: SessionBoundGridTopologyStorage = Default::default();
let mut authorities: HashMap<AuthorityDiscoveryId, PeerId> = HashMap::new();
@@ -1778,55 +1811,61 @@ impl<R: rand::Rng> StatementDistributionSubsystem<R> {
.map_err(FatalError::SpawnTask)?;
loop {
let message =
MuxedMessage::receive(&mut ctx, &mut req_receiver, &mut res_receiver).await;
match message {
MuxedMessage::Subsystem(result) => {
let result = self
.handle_subsystem_message(
&mut ctx,
&mut runtime,
&mut peers,
&mut topology_storage,
&mut authorities,
&mut active_heads,
&mut recent_outdated_heads,
&req_sender,
result?,
)
.await;
match result.into_nested()? {
Ok(true) => break,
Ok(false) => {},
Err(jfyi) => gum::debug!(target: LOG_TARGET, error = ?jfyi),
}
select! {
_ = reputation_delay => {
self.reputation.send(ctx.sender()).await;
reputation_delay = new_reputation_delay();
},
MuxedMessage::Requester(result) => {
let result = self
.handle_requester_message(
&mut ctx,
&topology_storage,
&mut peers,
&mut active_heads,
&recent_outdated_heads,
&req_sender,
&mut runtime,
result.ok_or(FatalError::RequesterReceiverFinished)?,
)
.await;
log_error(result.map_err(From::from), "handle_requester_message")?;
},
MuxedMessage::Responder(result) => {
let result = self
.handle_responder_message(
&peers,
&mut active_heads,
result.ok_or(FatalError::ResponderReceiverFinished)?,
)
.await;
log_error(result.map_err(From::from), "handle_responder_message")?;
},
};
message = MuxedMessage::receive(&mut ctx, &mut req_receiver, &mut res_receiver).fuse() => {
match message {
MuxedMessage::Subsystem(result) => {
let result = self
.handle_subsystem_message(
&mut ctx,
&mut runtime,
&mut peers,
&mut topology_storage,
&mut authorities,
&mut active_heads,
&mut recent_outdated_heads,
&req_sender,
result?,
)
.await;
match result.into_nested()? {
Ok(true) => break,
Ok(false) => {},
Err(jfyi) => gum::debug!(target: LOG_TARGET, error = ?jfyi),
}
},
MuxedMessage::Requester(result) => {
let result = self
.handle_requester_message(
&mut ctx,
&topology_storage,
&mut peers,
&mut active_heads,
&recent_outdated_heads,
&req_sender,
&mut runtime,
result.ok_or(FatalError::RequesterReceiverFinished)?,
)
.await;
log_error(result.map_err(From::from), "handle_requester_message")?;
},
MuxedMessage::Responder(result) => {
let result = self
.handle_responder_message(
&peers,
&mut active_heads,
result.ok_or(FatalError::ResponderReceiverFinished)?,
)
.await;
log_error(result.map_err(From::from), "handle_responder_message")?;
},
};
}
}
}
Ok(())
}
@@ -1890,9 +1929,16 @@ impl<R: rand::Rng> StatementDistributionSubsystem<R> {
bad_peers,
} => {
for bad in bad_peers {
report_peer(ctx.sender(), bad, COST_FETCH_FAIL).await;
modify_reputation(&mut self.reputation, ctx.sender(), bad, COST_FETCH_FAIL)
.await;
}
report_peer(ctx.sender(), from_peer, BENEFIT_VALID_RESPONSE).await;
modify_reputation(
&mut self.reputation,
ctx.sender(),
from_peer,
BENEFIT_VALID_RESPONSE,
)
.await;
let active_head = active_heads
.get_mut(&relay_parent)
@@ -1932,6 +1978,7 @@ impl<R: rand::Rng> StatementDistributionSubsystem<R> {
&self.metrics,
runtime,
&mut self.rng,
&mut self.reputation,
)
.await;
}
@@ -1975,7 +2022,8 @@ impl<R: rand::Rng> StatementDistributionSubsystem<R> {
}
}
},
RequesterMessage::ReportPeer(peer, rep) => report_peer(ctx.sender(), peer, rep).await,
RequesterMessage::ReportPeer(peer, rep) =>
modify_reputation(&mut self.reputation, ctx.sender(), peer, rep).await,
}
Ok(())
}
@@ -2113,6 +2161,7 @@ impl<R: rand::Rng> StatementDistributionSubsystem<R> {
metrics,
runtime,
&mut self.rng,
&mut self.reputation,
)
.await;
},