Dispute distribution implementation (#3282)

* Dispute protocol. * Dispute distribution protocol. * Get network requests routed. * WIP: Basic dispute sender logic. * Basic validator determination logic. * WIP: Getting things to typecheck. * Slightly larger timeout. * More typechecking stuff. * Cleanup. * Finished most of the sending logic. * Handle active leaves updates - Cleanup dead disputes - Update sends for new sessions - Retry on errors * Pass sessions in already. * Startup dispute sending. * Provide incoming decoding facilities and use them in statement-distribution. * Relaxed runtime util requirements. We only need a `SubsystemSender` not a full `SubsystemContext`. * Better usability of incoming requests. Make it possible to consume stuff without clones. * Add basic receiver functionality. * Cleanup + fixes for sender. * One more sender fix. * Start receiver. * Make sure to send responses back. * WIP: Exposed authority discovery * Make tests pass. * Fully featured receiver. * Decrease cost of `NotAValidator`. * Make `RuntimeInfo` LRU cache size configurable. * Cache more sessions. * Fix collator protocol. * Disable metrics for now. * Make dispute-distribution a proper subsystem. * Fix naming. * Code style fixes. * Factored out 4x copied mock function. * WIP: Tests. * Whitespace cleanup. * Accessor functions. * More testing. * More Debug instances. * Fix busy loop. * Working tests. * More tests. * Cleanup. * Fix build. * Basic receiving test. * Non validator message gets dropped. * More receiving tests. * Test nested and subsequent imports. * Fix spaces. * Better formatted imports. * Import cleanup. * Metrics. * Message -> MuxedMessage * Message -> MuxedMessage * More review remarks. * Add missing metrics.rs. * Fix flaky test. * Dispute coordinator - deliver confirmations. * Send out `DisputeMessage` on issue local statement. * Unwire dispute distribution. * Review remarks. * Review remarks. * Better docs.
2026-04-27 13:57:58 +00:00 · 2021-07-09 04:29:53 +02:00
parent 20993b32b1
commit b5257b2407
52 changed files with 4040 additions and 407 deletions
@@ -0,0 +1,48 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Authority discovery service interfacing.
+
+use std::fmt::Debug;
+
+use async_trait::async_trait;
+
+use sc_authority_discovery::Service as AuthorityDiscoveryService;
+
+use polkadot_primitives::v1::AuthorityDiscoveryId;
+use sc_network::{Multiaddr, PeerId};
+
+/// An abstraction over the authority discovery service.
+///
+/// Needed for mocking in tests mostly.
+#[async_trait]
+pub trait AuthorityDiscovery: Send + Debug + 'static {
+	/// Get the addresses for the given [`AuthorityId`] from the local address cache.
+	async fn get_addresses_by_authority_id(&mut self, authority: AuthorityDiscoveryId) -> Option<Vec<Multiaddr>>;
+	/// Get the [`AuthorityId`] for the given [`PeerId`] from the local address cache.
+	async fn get_authority_id_by_peer_id(&mut self, peer_id: PeerId) -> Option<AuthorityDiscoveryId>;
+}
+
+#[async_trait]
+impl AuthorityDiscovery for AuthorityDiscoveryService {
+	async fn get_addresses_by_authority_id(&mut self, authority: AuthorityDiscoveryId) -> Option<Vec<Multiaddr>> {
+		AuthorityDiscoveryService::get_addresses_by_authority_id(self, authority).await
+	}
+
+	async fn get_authority_id_by_peer_id(&mut self, peer_id: PeerId) -> Option<AuthorityDiscoveryId> {
+		AuthorityDiscoveryService::get_authority_id_by_peer_id(self, peer_id).await
+	}
+}
@@ -38,6 +38,9 @@ pub mod peer_set;
 /// Request/response protocols used in Polkadot.
 pub mod request_response;

+/// Accessing authority discovery service
+pub mod authority_discovery;
+
 /// A version of the protocol.
 pub type ProtocolVersion = u32;
 /// The minimum amount of peers to send gossip messages to.
@@ -66,6 +66,8 @@ pub enum Protocol {
 	AvailableDataFetching,
 	/// Fetching of statements that are too large for gossip.
 	StatementFetching,
+	/// Sending of dispute statements with application level confirmations.
+	DisputeSending,
 }


@@ -98,7 +100,7 @@ const STATEMENTS_TIMEOUT: Duration = Duration::from_secs(1);
 /// We don't want a slow peer to slow down all the others, at the same time we want to get out the
 /// data quickly in full to at least some peers (as this will reduce load on us as they then can
 /// start serving the data). So this value is a tradeoff. 3 seems to be sensible. So we would need
-/// to have 3 slow noded connected, to delay transfer for others by `STATEMENTS_TIMEOUT`.
+/// to have 3 slow nodes connected, to delay transfer for others by `STATEMENTS_TIMEOUT`.
 pub const MAX_PARALLEL_STATEMENT_REQUESTS: u32 = 3;

 impl Protocol {
@@ -106,9 +108,6 @@ impl Protocol {
 	///
 	/// Returns a receiver for messages received on this protocol and the requested
 	/// `ProtocolConfig`.
-	///
-	/// See also `dispatcher::RequestDispatcher`,  which makes use of this function and provides a more
-	/// high-level interface.
 	pub fn get_config(
 		self,
 	) -> (
@@ -167,6 +166,17 @@ impl Protocol {
 				request_timeout: Duration::from_secs(1),
 				inbound_queue: Some(tx),
 			},
+			Protocol::DisputeSending => RequestResponseConfig {
+				name: p_name,
+				max_request_size: 1_000,
+				/// Responses are just confirmation, in essence not even a bit. So 100 seems
+				/// plenty.
+				max_response_size: 100,
+				/// We can have relative large timeouts here, there is no value of hitting a
+				/// timeout as we want to get statements through to each node in any case.
+				request_timeout: Duration::from_secs(12),
+				inbound_queue: Some(tx),
+			},
 		};
 		(rx, cfg)
 	}
@@ -195,7 +205,7 @@ impl Protocol {
 				// This is just a guess/estimate, with the following considerations: If we are
 				// faster than that, queue size will stay low anyway, even if not - requesters will
 				// get an immediate error, but if we are slower, requesters will run in a timeout -
-				// waisting precious time.
+				// wasting precious time.
 				let available_bandwidth = 7 * MIN_BANDWIDTH_BYTES / 10;
 				let size = u64::saturating_sub(
 					STATEMENTS_TIMEOUT.as_millis() as u64 * available_bandwidth / (1000 * MAX_CODE_SIZE as u64),
@@ -207,6 +217,10 @@ impl Protocol {
 				);
 				size as usize
 			}
+			// Incoming requests can get bursty, we should also be able to handle them fast on
+			// average, so something in the ballpark of 100 should be fine. Nodes will retry on
+			// failure, so having a good value here is mostly about performance tuning.
+			Protocol::DisputeSending => 100,
 		}
 	}

@@ -223,6 +237,7 @@ impl Protocol {
 			Protocol::PoVFetching => "/polkadot/req_pov/1",
 			Protocol::AvailableDataFetching => "/polkadot/req_available_data/1",
 			Protocol::StatementFetching => "/polkadot/req_statement/1",
+			Protocol::DisputeSending => "/polkadot/send_dispute/1",
 		}
 	}
 }
@@ -14,6 +14,8 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.

+use std::marker::PhantomData;
+
 use futures::channel::oneshot;
 use futures::prelude::Future;

@@ -54,6 +56,8 @@ pub enum Requests {
 	AvailableDataFetching(OutgoingRequest<v1::AvailableDataFetchingRequest>),
 	/// Requests for fetching large statements as part of statement distribution.
 	StatementFetching(OutgoingRequest<v1::StatementFetchingRequest>),
+	/// Requests for notifying about an ongoing dispute.
+	DisputeSending(OutgoingRequest<v1::DisputeRequest>),
 }

 impl Requests {
@@ -65,6 +69,7 @@ impl Requests {
 			Self::PoVFetching(_) => Protocol::PoVFetching,
 			Self::AvailableDataFetching(_) => Protocol::AvailableDataFetching,
 			Self::StatementFetching(_) => Protocol::StatementFetching,
+			Self::DisputeSending(_) => Protocol::DisputeSending,
 		}
 	}

@@ -82,12 +87,13 @@ impl Requests {
 			Self::PoVFetching(r) => r.encode_request(),
 			Self::AvailableDataFetching(r) => r.encode_request(),
 			Self::StatementFetching(r) => r.encode_request(),
+			Self::DisputeSending(r) => r.encode_request(),
 		}
 	}
 }

 /// Potential recipients of an outgoing request.
-#[derive(Debug, Eq, Hash, PartialEq)]
+#[derive(Debug, Eq, Hash, PartialEq, Clone)]
 pub enum Recipient {
 	/// Recipient is a regular peer and we know its peer id.
 	Peer(PeerId),
@@ -131,6 +137,18 @@ pub enum RequestError {
 	Canceled(#[source] oneshot::Canceled),
 }

+/// Things that can go wrong when decoding an incoming request.
+#[derive(Debug, Error)]
+pub enum ReceiveError {
+	/// Decoding failed, we were able to change the peer's reputation accordingly.
+	#[error("Decoding request failed for peer {0}.")]
+	DecodingError(PeerId, #[source] DecodingError),
+
+	/// Decoding failed, but sending reputation change failed.
+	#[error("Decoding request failed for peer {0}, and changing reputation failed.")]
+	DecodingErrorNoReputationChange(PeerId, #[source] DecodingError),
+}
+
 /// Responses received for an `OutgoingRequest`.
 pub type OutgoingResult<Res> = Result<Res, RequestError>;

@@ -205,43 +223,22 @@ pub struct IncomingRequest<Req> {
 	pub peer: PeerId,
 	/// The sent request.
 	pub payload: Req,
+	/// Sender for sending response back.
+	pub pending_response: OutgoingResponseSender<Req>,
+}
+
+/// Sender for sendinb back responses on an `IncomingRequest`.
+#[derive(Debug)]
+pub struct OutgoingResponseSender<Req>{
 	pending_response: oneshot::Sender<netconfig::OutgoingResponse>,
+	phantom: PhantomData<Req>,
 }

-/// Typed variant of [`netconfig::OutgoingResponse`].
-///
-/// Responses to `IncomingRequest`s.
-pub struct OutgoingResponse<Response> {
-	/// The payload of the response.
-	pub result: Result<Response, ()>,
-
-	/// Reputation changes accrued while handling the request. To be applied to the reputation of
-	/// the peer sending the request.
-	pub reputation_changes: Vec<UnifiedReputationChange>,
-
-	/// If provided, the `oneshot::Sender` will be notified when the request has been sent to the
-	/// peer.
-	pub sent_feedback: Option<oneshot::Sender<()>>,
-}
-
-impl<Req> IncomingRequest<Req>
+impl<Req> OutgoingResponseSender<Req> 
 where
-	Req: IsRequest,
+	Req: IsRequest + Decode,
 	Req::Response: Encode,
 {
-	/// Create new `IncomingRequest`.
-	pub fn new(
-		peer: PeerId,
-		payload: Req,
-		pending_response: oneshot::Sender<netconfig::OutgoingResponse>,
-	) -> Self {
-		Self {
-			peer,
-			payload,
-			pending_response,
-		}
-	}
-
 	/// Send the response back.
 	///
 	/// On success we return Ok(()), on error we return the not sent `Response`.
@@ -284,6 +281,100 @@ where
 	}
 }

+/// Typed variant of [`netconfig::OutgoingResponse`].
+///
+/// Responses to `IncomingRequest`s.
+pub struct OutgoingResponse<Response> {
+	/// The payload of the response.
+	///
+	/// `Err(())` if none is available e.g. due an error while handling the request.
+	pub result: Result<Response, ()>,
+
+	/// Reputation changes accrued while handling the request. To be applied to the reputation of
+	/// the peer sending the request.
+	pub reputation_changes: Vec<UnifiedReputationChange>,
+
+	/// If provided, the `oneshot::Sender` will be notified when the request has been sent to the
+	/// peer.
+	pub sent_feedback: Option<oneshot::Sender<()>>,
+}
+
+impl<Req> IncomingRequest<Req>
+where
+	Req: IsRequest + Decode,
+	Req::Response: Encode,
+{
+	/// Create new `IncomingRequest`.
+	pub fn new(
+		peer: PeerId,
+		payload: Req,
+		pending_response: oneshot::Sender<netconfig::OutgoingResponse>,
+	) -> Self {
+		Self {
+			peer,
+			payload,
+			pending_response: OutgoingResponseSender {
+				pending_response,
+				phantom: PhantomData {},
+			},
+		}
+	}
+
+	/// Try building from raw substrate request.
+	///
+	/// This function will fail if the request cannot be decoded and will apply passed in
+	/// reputation changes in that case.
+	///
+	/// Params:
+	///		- The raw request to decode
+	///		- Reputation changes to apply for the peer in case decoding fails.
+	pub fn try_from_raw(
+		raw: sc_network::config::IncomingRequest,
+		reputation_changes: Vec<UnifiedReputationChange>
+	) -> Result<Self, ReceiveError> {
+		let sc_network::config::IncomingRequest {
+			payload,
+			peer,
+			pending_response,
+		} = raw;
+		let payload = match Req::decode(&mut payload.as_ref()) {
+			Ok(payload) => payload,
+			Err(err) => {
+				let reputation_changes = reputation_changes
+					.into_iter()
+					.map(|r| r.into_base_rep())
+					.collect();
+				let response = sc_network::config::OutgoingResponse {
+					result: Err(()),
+					reputation_changes,
+					sent_feedback: None,
+				};
+
+				if let Err(_) = pending_response.send(response) {
+					return Err(ReceiveError::DecodingErrorNoReputationChange(peer, err))
+				}
+				return Err(ReceiveError::DecodingError(peer, err))
+			}
+		};
+		Ok(Self::new(peer, payload, pending_response))
+	}
+
+	/// Send the response back.
+	///
+	/// Calls [`OutgoingResponseSender::send_response`].
+	pub fn send_response(self, resp: Req::Response) -> Result<(), Req::Response> {
+		self.pending_response.send_response(resp)
+	}
+
+	/// Send response with additional options.
+	///
+	/// Calls [`OutgoingResponseSender::send_outgoing_response`].
+	pub fn send_outgoing_response(self, resp: OutgoingResponse<<Req as IsRequest>::Response>)
+		-> Result<(), ()> {
+		self.pending_response.send_outgoing_response(resp)
+	}
+}
+
 /// Future for actually receiving a typed response for an OutgoingRequest.
 async fn receive_response<Req>(
 	rec: oneshot::Receiver<Result<Vec<u8>, network::RequestFailure>>,
@@ -20,7 +20,7 @@ use parity_scale_codec::{Decode, Encode};

 use polkadot_primitives::v1::{CandidateHash, CandidateReceipt, CommittedCandidateReceipt, Hash, ValidatorIndex};
 use polkadot_primitives::v1::Id as ParaId;
-use polkadot_node_primitives::{AvailableData, PoV, ErasureChunk};
+use polkadot_node_primitives::{AvailableData, DisputeMessage, ErasureChunk, PoV, UncheckedDisputeMessage};

 use super::request::IsRequest;
 use super::Protocol;
@@ -192,3 +192,28 @@ impl IsRequest for StatementFetchingRequest {
 	type Response = StatementFetchingResponse;
 	const PROTOCOL: Protocol = Protocol::StatementFetching;
 }
+
+/// A dispute request.
+///
+/// Contains an invalid vote a valid one for a particular candidate in a given session.
+#[derive(Clone, Encode, Decode, Debug)]
+pub struct DisputeRequest(pub UncheckedDisputeMessage);
+
+impl From<DisputeMessage> for DisputeRequest {
+	fn from(msg: DisputeMessage) -> Self {
+		Self(msg.into())
+	}
+}
+
+/// Possible responses to a `DisputeRequest`.
+#[derive(Encode, Decode, Debug, PartialEq, Eq)]
+pub enum DisputeResponse {
+	/// Recipient successfully processed the dispute request.
+	#[codec(index = 0)]
+	Confirmed
+}
+
+impl IsRequest for DisputeRequest {
+	type Response = DisputeResponse;
+	const PROTOCOL: Protocol = Protocol::DisputeSending;
+}