mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-22 05:41:03 +00:00
Dispute distribution implementation (#3282)
* Dispute protocol. * Dispute distribution protocol. * Get network requests routed. * WIP: Basic dispute sender logic. * Basic validator determination logic. * WIP: Getting things to typecheck. * Slightly larger timeout. * More typechecking stuff. * Cleanup. * Finished most of the sending logic. * Handle active leaves updates - Cleanup dead disputes - Update sends for new sessions - Retry on errors * Pass sessions in already. * Startup dispute sending. * Provide incoming decoding facilities and use them in statement-distribution. * Relaxed runtime util requirements. We only need a `SubsystemSender` not a full `SubsystemContext`. * Better usability of incoming requests. Make it possible to consume stuff without clones. * Add basic receiver functionality. * Cleanup + fixes for sender. * One more sender fix. * Start receiver. * Make sure to send responses back. * WIP: Exposed authority discovery * Make tests pass. * Fully featured receiver. * Decrease cost of `NotAValidator`. * Make `RuntimeInfo` LRU cache size configurable. * Cache more sessions. * Fix collator protocol. * Disable metrics for now. * Make dispute-distribution a proper subsystem. * Fix naming. * Code style fixes. * Factored out 4x copied mock function. * WIP: Tests. * Whitespace cleanup. * Accessor functions. * More testing. * More Debug instances. * Fix busy loop. * Working tests. * More tests. * Cleanup. * Fix build. * Basic receiving test. * Non validator message gets dropped. * More receiving tests. * Test nested and subsequent imports. * Fix spaces. * Better formatted imports. * Import cleanup. * Metrics. * Message -> MuxedMessage * Message -> MuxedMessage * More review remarks. * Add missing metrics.rs. * Fix flaky test. * Dispute coordinator - deliver confirmations. * Send out `DisputeMessage` on issue local statement. * Unwire dispute distribution. * Review remarks. * Review remarks. * Better docs.
This commit is contained in:
@@ -0,0 +1,110 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
//! Error handling related code and Error/Result definitions.
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
use polkadot_node_network_protocol::PeerId;
|
||||
use polkadot_node_network_protocol::request_response::request::ReceiveError;
|
||||
use polkadot_node_subsystem_util::{Fault, runtime, unwrap_non_fatal};
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[error(transparent)]
|
||||
pub struct Error(pub Fault<NonFatal, Fatal>);
|
||||
|
||||
impl From<NonFatal> for Error {
|
||||
fn from(e: NonFatal) -> Self {
|
||||
Self(Fault::from_non_fatal(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Fatal> for Error {
|
||||
fn from(f: Fatal) -> Self {
|
||||
Self(Fault::from_fatal(f))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<runtime::Error> for Error {
|
||||
fn from(o: runtime::Error) -> Self {
|
||||
Self(Fault::from_other(o))
|
||||
}
|
||||
}
|
||||
|
||||
/// Fatal errors of this subsystem.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Fatal {
|
||||
/// Request channel returned `None`. Likely a system shutdown.
|
||||
#[error("Request channel stream finished.")]
|
||||
RequestChannelFinished,
|
||||
|
||||
/// Errors coming from runtime::Runtime.
|
||||
#[error("Error while accessing runtime information")]
|
||||
Runtime(#[from] #[source] runtime::Fatal),
|
||||
}
|
||||
|
||||
/// Non-fatal errors of this subsystem.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum NonFatal {
|
||||
/// Answering request failed.
|
||||
#[error("Sending back response to peer {0} failed.")]
|
||||
SendResponse(PeerId),
|
||||
|
||||
/// Getting request from raw request failed.
|
||||
#[error("Decoding request failed.")]
|
||||
FromRawRequest(#[source] ReceiveError),
|
||||
|
||||
/// Setting reputation for peer failed.
|
||||
#[error("Changing peer's ({0}) reputation failed.")]
|
||||
SetPeerReputation(PeerId),
|
||||
|
||||
/// Peer sent us request with invalid signature.
|
||||
#[error("Dispute request with invalid signatures, from peer {0}.")]
|
||||
InvalidSignature(PeerId),
|
||||
|
||||
/// Import oneshot got canceled.
|
||||
#[error("Import of dispute got canceled for peer {0} - import failed for some reason.")]
|
||||
ImportCanceled(PeerId),
|
||||
|
||||
/// Non validator tried to participate in dispute.
|
||||
#[error("Peer {0} is not a validator.")]
|
||||
NotAValidator(PeerId),
|
||||
|
||||
/// Errors coming from runtime::Runtime.
|
||||
#[error("Error while accessing runtime information")]
|
||||
Runtime(#[from] #[source] runtime::NonFatal),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
pub type FatalResult<T> = std::result::Result<T, Fatal>;
|
||||
pub type NonFatalResult<T> = std::result::Result<T, NonFatal>;
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error. This utility function is meant to
|
||||
/// consume top-level errors by simply logging them
|
||||
pub fn log_error(result: Result<()>)
|
||||
-> std::result::Result<(), Fatal>
|
||||
{
|
||||
if let Some(error) = unwrap_non_fatal(result.map_err(|e| e.0))? {
|
||||
tracing::warn!(target: LOG_TARGET, error = ?error);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,429 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use futures::FutureExt;
|
||||
use futures::Stream;
|
||||
use futures::future::{BoxFuture, poll_fn};
|
||||
use futures::stream::FusedStream;
|
||||
use lru::LruCache;
|
||||
use futures::{channel::mpsc, channel::oneshot, stream::StreamExt, stream::FuturesUnordered};
|
||||
|
||||
use polkadot_node_network_protocol::{
|
||||
PeerId,
|
||||
UnifiedReputationChange as Rep,
|
||||
authority_discovery::AuthorityDiscovery,
|
||||
request_response::{
|
||||
IncomingRequest,
|
||||
request::OutgoingResponse,
|
||||
request::OutgoingResponseSender,
|
||||
v1::DisputeRequest,
|
||||
v1::DisputeResponse,
|
||||
},
|
||||
};
|
||||
use polkadot_node_primitives::DISPUTE_WINDOW;
|
||||
use polkadot_node_subsystem_util::{
|
||||
runtime,
|
||||
runtime::RuntimeInfo,
|
||||
};
|
||||
use polkadot_subsystem::{
|
||||
SubsystemSender,
|
||||
messages::{
|
||||
AllMessages, DisputeCoordinatorMessage, ImportStatementsResult,
|
||||
},
|
||||
};
|
||||
|
||||
use crate::metrics::{FAILED, SUCCEEDED};
|
||||
use crate::{LOG_TARGET, Metrics};
|
||||
|
||||
mod error;
|
||||
use self::error::{log_error, FatalResult, NonFatalResult, NonFatal, Fatal, Result};
|
||||
|
||||
const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded.");
|
||||
const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Signatures were invalid.");
|
||||
const COST_INVALID_CANDIDATE: Rep = Rep::Malicious("Reported candidate was not available.");
|
||||
const COST_NOT_A_VALIDATOR: Rep = Rep::CostMajor("Reporting peer was not a validator.");
|
||||
|
||||
/// How many statement imports we want to issue in parallel:
|
||||
pub const MAX_PARALLEL_IMPORTS: usize = 10;
|
||||
|
||||
/// State for handling incoming `DisputeRequest` messages.
|
||||
///
|
||||
/// This is supposed to run as its own task in order to easily impose back pressure on the incoming
|
||||
/// request channel and at the same time to drop flood messages as fast as possible.
|
||||
pub struct DisputesReceiver<Sender, AD> {
|
||||
/// Access to session information.
|
||||
runtime: RuntimeInfo,
|
||||
|
||||
/// Subsystem sender for communication with other subsystems.
|
||||
sender: Sender,
|
||||
|
||||
/// Channel to retrieve incoming requests from.
|
||||
receiver: mpsc::Receiver<sc_network::config::IncomingRequest>,
|
||||
|
||||
/// Authority discovery service:
|
||||
authority_discovery: AD,
|
||||
|
||||
/// Imports currently being processed.
|
||||
pending_imports: PendingImports,
|
||||
|
||||
/// We keep record of the last banned peers.
|
||||
///
|
||||
/// This is needed because once we ban a peer, we will very likely still have pending requests
|
||||
/// in the incoming channel - we should not waste time recovering availability for those, as we
|
||||
/// already know the peer is malicious.
|
||||
banned_peers: LruCache<PeerId, ()>,
|
||||
|
||||
/// Log received requests.
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
/// Messages as handled by this receiver internally.
|
||||
enum MuxedMessage {
|
||||
/// An import got confirmed by the coordinator.
|
||||
///
|
||||
/// We need to handle those for two reasons:
|
||||
///
|
||||
/// - We need to make sure responses are actually sent (therefore we need to await futures
|
||||
/// promptly).
|
||||
/// - We need to update banned_peers accordingly to the result.
|
||||
ConfirmedImport(NonFatalResult<(PeerId, ImportStatementsResult)>),
|
||||
|
||||
/// A new request has arrived and should be handled.
|
||||
NewRequest(sc_network::config::IncomingRequest),
|
||||
}
|
||||
|
||||
impl MuxedMessage {
|
||||
async fn receive(
|
||||
pending_imports: &mut PendingImports,
|
||||
pending_requests: &mut mpsc::Receiver<sc_network::config::IncomingRequest>,
|
||||
) -> FatalResult<MuxedMessage> {
|
||||
poll_fn(|ctx| {
|
||||
if let Poll::Ready(v) = pending_requests.poll_next_unpin(ctx) {
|
||||
let r = match v {
|
||||
None => Err(Fatal::RequestChannelFinished),
|
||||
Some(msg) => Ok(MuxedMessage::NewRequest(msg)),
|
||||
};
|
||||
return Poll::Ready(r)
|
||||
}
|
||||
// In case of Ready(None) return `Pending` below - we want to wait for the next request
|
||||
// in that case.
|
||||
if let Poll::Ready(Some(v)) = pending_imports.poll_next_unpin(ctx) {
|
||||
return Poll::Ready(Ok(MuxedMessage::ConfirmedImport(v)))
|
||||
}
|
||||
Poll::Pending
|
||||
}).await
|
||||
}
|
||||
}
|
||||
|
||||
impl<Sender: SubsystemSender, AD> DisputesReceiver<Sender, AD>
|
||||
where
|
||||
AD: AuthorityDiscovery,
|
||||
{
|
||||
/// Create a new receiver which can be `run`.
|
||||
pub fn new(
|
||||
sender: Sender,
|
||||
receiver: mpsc::Receiver<sc_network::config::IncomingRequest>,
|
||||
authority_discovery: AD,
|
||||
metrics: Metrics,
|
||||
) -> Self {
|
||||
let runtime = RuntimeInfo::new_with_config(runtime::Config {
|
||||
keystore: None,
|
||||
session_cache_lru_size: DISPUTE_WINDOW as usize,
|
||||
});
|
||||
Self {
|
||||
runtime,
|
||||
sender,
|
||||
receiver,
|
||||
authority_discovery,
|
||||
pending_imports: PendingImports::new(),
|
||||
// Size of MAX_PARALLEL_IMPORTS ensures we are going to immediately get rid of any
|
||||
// malicious requests still pending in the incoming queue.
|
||||
banned_peers: LruCache::new(MAX_PARALLEL_IMPORTS),
|
||||
metrics,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get that receiver started.
|
||||
///
|
||||
/// This is an endless loop and should be spawned into its own task.
|
||||
pub async fn run(mut self) {
|
||||
loop {
|
||||
match log_error(self.run_inner().await) {
|
||||
Ok(()) => {}
|
||||
Err(Fatal::RequestChannelFinished) => {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
"Incoming request stream exhausted - shutting down?"
|
||||
);
|
||||
return
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
?err,
|
||||
"Dispute receiver died."
|
||||
);
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Actual work happening here.
|
||||
async fn run_inner(&mut self) -> Result<()> {
|
||||
|
||||
let msg = MuxedMessage::receive(
|
||||
&mut self.pending_imports,
|
||||
&mut self.receiver
|
||||
)
|
||||
.await?;
|
||||
|
||||
let raw = match msg {
|
||||
// We need to clean up futures, to make sure responses are sent:
|
||||
MuxedMessage::ConfirmedImport(m_bad) => {
|
||||
self.ban_bad_peer(m_bad)?;
|
||||
return Ok(())
|
||||
}
|
||||
MuxedMessage::NewRequest(req) => req,
|
||||
};
|
||||
|
||||
self.metrics.on_received_request();
|
||||
|
||||
let peer = raw.peer;
|
||||
|
||||
// Only accept messages from validators:
|
||||
if self.authority_discovery.get_authority_id_by_peer_id(raw.peer).await.is_none() {
|
||||
raw.pending_response.send(
|
||||
sc_network::config::OutgoingResponse {
|
||||
result: Err(()),
|
||||
reputation_changes: vec![COST_NOT_A_VALIDATOR.into_base_rep()],
|
||||
sent_feedback: None,
|
||||
}
|
||||
)
|
||||
.map_err(|_| NonFatal::SendResponse(peer))?;
|
||||
|
||||
return Err(NonFatal::NotAValidator(peer).into())
|
||||
}
|
||||
|
||||
let incoming = IncomingRequest::<DisputeRequest>::try_from_raw(
|
||||
raw,
|
||||
vec![COST_INVALID_REQUEST]
|
||||
)
|
||||
.map_err(NonFatal::FromRawRequest)?;
|
||||
|
||||
// Immediately drop requests from peers that already have requests in flight or have
|
||||
// been banned recently (flood protection):
|
||||
if self.pending_imports.peer_is_pending(&peer) || self.banned_peers.contains(&peer) {
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
"Dropping message from peer (banned/pending import)"
|
||||
);
|
||||
return Ok(())
|
||||
}
|
||||
|
||||
// Wait for a free slot:
|
||||
if self.pending_imports.len() >= MAX_PARALLEL_IMPORTS as usize {
|
||||
// Wait for one to finish:
|
||||
let r = self.pending_imports.next().await;
|
||||
self.ban_bad_peer(r.expect("pending_imports.len() is greater 0. qed."))?;
|
||||
}
|
||||
|
||||
// All good - initiate import.
|
||||
self.start_import(incoming).await
|
||||
}
|
||||
|
||||
/// Start importing votes for the given request.
|
||||
async fn start_import(
|
||||
&mut self,
|
||||
incoming: IncomingRequest<DisputeRequest>,
|
||||
) -> Result<()> {
|
||||
|
||||
let IncomingRequest {
|
||||
peer, payload, pending_response,
|
||||
} = incoming;
|
||||
|
||||
let info = self.runtime.get_session_info_by_index(
|
||||
&mut self.sender,
|
||||
payload.0.candidate_receipt.descriptor.relay_parent,
|
||||
payload.0.session_index
|
||||
)
|
||||
.await?;
|
||||
|
||||
let votes_result = payload.0.try_into_signed_votes(&info.session_info);
|
||||
|
||||
let (candidate_receipt, valid_vote, invalid_vote) = match votes_result {
|
||||
Err(()) => { // Signature invalid:
|
||||
pending_response.send_outgoing_response(
|
||||
OutgoingResponse {
|
||||
result: Err(()),
|
||||
reputation_changes: vec![COST_INVALID_SIGNATURE],
|
||||
sent_feedback: None,
|
||||
}
|
||||
)
|
||||
.map_err(|_| NonFatal::SetPeerReputation(peer))?;
|
||||
|
||||
return Err(From::from(NonFatal::InvalidSignature(peer)))
|
||||
}
|
||||
Ok(votes) => votes,
|
||||
};
|
||||
|
||||
let (pending_confirmation, confirmation_rx) = oneshot::channel();
|
||||
let candidate_hash = candidate_receipt.hash();
|
||||
self.sender.send_message(
|
||||
AllMessages::DisputeCoordinator(
|
||||
DisputeCoordinatorMessage::ImportStatements {
|
||||
candidate_hash,
|
||||
candidate_receipt,
|
||||
session: valid_vote.0.session_index(),
|
||||
statements: vec![valid_vote, invalid_vote],
|
||||
pending_confirmation,
|
||||
}
|
||||
)
|
||||
)
|
||||
.await;
|
||||
|
||||
self.pending_imports.push(peer, confirmation_rx, pending_response);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Await an import and ban any misbehaving peers.
|
||||
///
|
||||
/// In addition we report import metrics.
|
||||
fn ban_bad_peer(
|
||||
&mut self,
|
||||
result: NonFatalResult<(PeerId, ImportStatementsResult)>
|
||||
) -> NonFatalResult<()> {
|
||||
match result? {
|
||||
(_, ImportStatementsResult::ValidImport) => {
|
||||
self.metrics.on_imported(SUCCEEDED);
|
||||
}
|
||||
(bad_peer, ImportStatementsResult::InvalidImport) => {
|
||||
self.metrics.on_imported(FAILED);
|
||||
self.banned_peers.put(bad_peer, ());
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Manage pending imports in a way that preserves invariants.
|
||||
struct PendingImports {
|
||||
/// Futures in flight.
|
||||
futures: FuturesUnordered<BoxFuture<'static, (PeerId, NonFatalResult<ImportStatementsResult>)>>,
|
||||
/// Peers whose requests are currently in flight.
|
||||
peers: HashSet<PeerId>,
|
||||
}
|
||||
|
||||
impl PendingImports {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
futures: FuturesUnordered::new(),
|
||||
peers: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push(
|
||||
&mut self,
|
||||
peer: PeerId,
|
||||
handled: oneshot::Receiver<ImportStatementsResult>,
|
||||
pending_response: OutgoingResponseSender<DisputeRequest>
|
||||
) {
|
||||
self.peers.insert(peer);
|
||||
self.futures.push(
|
||||
async move {
|
||||
let r = respond_to_request(peer, handled, pending_response).await;
|
||||
(peer, r)
|
||||
}.boxed()
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the number of contained futures.
|
||||
pub fn len(&self) -> usize {
|
||||
self.futures.len()
|
||||
}
|
||||
|
||||
/// Check whether a peer has a pending import.
|
||||
pub fn peer_is_pending(&self, peer: &PeerId) -> bool {
|
||||
self.peers.contains(peer)
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for PendingImports {
|
||||
type Item = NonFatalResult<(PeerId, ImportStatementsResult)>;
|
||||
fn poll_next(
|
||||
mut self: Pin<&mut Self>,
|
||||
ctx: &mut Context<'_>
|
||||
) -> Poll<Option<Self::Item>> {
|
||||
match Pin::new(&mut self.futures).poll_next(ctx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
Poll::Ready(Some((peer, result))) => {
|
||||
self.peers.remove(&peer);
|
||||
Poll::Ready(Some(result.map(|r| (peer,r))))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
impl FusedStream for PendingImports {
|
||||
fn is_terminated(&self) -> bool {
|
||||
self.futures.is_terminated()
|
||||
}
|
||||
}
|
||||
|
||||
// Future for `PendingImports`
|
||||
//
|
||||
// - Wait for import
|
||||
// - Punish peer
|
||||
// - Deliver result
|
||||
async fn respond_to_request(
|
||||
peer: PeerId,
|
||||
handled: oneshot::Receiver<ImportStatementsResult>,
|
||||
pending_response: OutgoingResponseSender<DisputeRequest>
|
||||
) -> NonFatalResult<ImportStatementsResult> {
|
||||
|
||||
let result = handled
|
||||
.await
|
||||
.map_err(|_| NonFatal::ImportCanceled(peer))?
|
||||
;
|
||||
|
||||
let response = match result {
|
||||
ImportStatementsResult::ValidImport =>
|
||||
OutgoingResponse {
|
||||
result: Ok(DisputeResponse::Confirmed),
|
||||
reputation_changes: Vec::new(),
|
||||
sent_feedback: None,
|
||||
},
|
||||
ImportStatementsResult::InvalidImport =>
|
||||
OutgoingResponse {
|
||||
result: Err(()),
|
||||
reputation_changes: vec![COST_INVALID_CANDIDATE],
|
||||
sent_feedback: None,
|
||||
},
|
||||
};
|
||||
|
||||
pending_response
|
||||
.send_outgoing_response(response)
|
||||
.map_err(|_| NonFatal::SendResponse(peer))?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
Reference in New Issue
Block a user