mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-14 07:31:08 +00:00
Dispute distribution implementation (#3282)
* Dispute protocol. * Dispute distribution protocol. * Get network requests routed. * WIP: Basic dispute sender logic. * Basic validator determination logic. * WIP: Getting things to typecheck. * Slightly larger timeout. * More typechecking stuff. * Cleanup. * Finished most of the sending logic. * Handle active leaves updates - Cleanup dead disputes - Update sends for new sessions - Retry on errors * Pass sessions in already. * Startup dispute sending. * Provide incoming decoding facilities and use them in statement-distribution. * Relaxed runtime util requirements. We only need a `SubsystemSender` not a full `SubsystemContext`. * Better usability of incoming requests. Make it possible to consume stuff without clones. * Add basic receiver functionality. * Cleanup + fixes for sender. * One more sender fix. * Start receiver. * Make sure to send responses back. * WIP: Exposed authority discovery * Make tests pass. * Fully featured receiver. * Decrease cost of `NotAValidator`. * Make `RuntimeInfo` LRU cache size configurable. * Cache more sessions. * Fix collator protocol. * Disable metrics for now. * Make dispute-distribution a proper subsystem. * Fix naming. * Code style fixes. * Factored out 4x copied mock function. * WIP: Tests. * Whitespace cleanup. * Accessor functions. * More testing. * More Debug instances. * Fix busy loop. * Working tests. * More tests. * Cleanup. * Fix build. * Basic receiving test. * Non validator message gets dropped. * More receiving tests. * Test nested and subsequent imports. * Fix spaces. * Better formatted imports. * Import cleanup. * Metrics. * Message -> MuxedMessage * Message -> MuxedMessage * More review remarks. * Add missing metrics.rs. * Fix flaky test. * Dispute coordinator - deliver confirmations. * Send out `DisputeMessage` on issue local statement. * Unwire dispute distribution. * Review remarks. * Review remarks. * Better docs.
This commit is contained in:
@@ -0,0 +1,328 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
|
||||
use futures::Future;
|
||||
use futures::FutureExt;
|
||||
use futures::SinkExt;
|
||||
use futures::channel::mpsc;
|
||||
use futures::future::RemoteHandle;
|
||||
|
||||
use polkadot_node_network_protocol::{
|
||||
IfDisconnected,
|
||||
request_response::{
|
||||
OutgoingRequest, OutgoingResult, Recipient, Requests,
|
||||
v1::{DisputeRequest, DisputeResponse},
|
||||
}
|
||||
};
|
||||
use polkadot_node_subsystem_util::runtime::RuntimeInfo;
|
||||
use polkadot_primitives::v1::{
|
||||
AuthorityDiscoveryId, CandidateHash, Hash, SessionIndex, ValidatorIndex,
|
||||
};
|
||||
use polkadot_subsystem::{
|
||||
SubsystemContext,
|
||||
messages::{AllMessages, NetworkBridgeMessage},
|
||||
};
|
||||
|
||||
use super::error::{Fatal, Result};
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use crate::metrics::FAILED;
|
||||
use crate::metrics::SUCCEEDED;
|
||||
|
||||
/// Delivery status for a particular dispute.
|
||||
///
|
||||
/// Keeps track of all the validators that have to be reached for a dispute.
|
||||
pub struct SendTask {
|
||||
/// The request we are supposed to get out to all parachain validators of the dispute's session
|
||||
/// and to all current authorities.
|
||||
request: DisputeRequest,
|
||||
|
||||
/// The set of authorities we need to send our messages to. This set will change at session
|
||||
/// boundaries. It will always be at least the parachain validators of the session where the
|
||||
/// dispute happened and the authorities of the current sessions as determined by active heads.
|
||||
deliveries: HashMap<AuthorityDiscoveryId, DeliveryStatus>,
|
||||
|
||||
/// Whether or not we have any tasks failed since the last refresh.
|
||||
has_failed_sends: bool,
|
||||
|
||||
/// Sender to be cloned for tasks.
|
||||
tx: mpsc::Sender<TaskFinish>,
|
||||
}
|
||||
|
||||
/// Status of a particular vote/statement delivery to a particular validator.
|
||||
enum DeliveryStatus {
|
||||
/// Request is still in flight.
|
||||
Pending(RemoteHandle<()>),
|
||||
/// Succeeded - no need to send request to this peer anymore.
|
||||
Succeeded,
|
||||
}
|
||||
|
||||
/// A sending task finishes with this result:
|
||||
#[derive(Debug)]
|
||||
pub struct TaskFinish {
|
||||
/// The candidate this task was running for.
|
||||
pub candidate_hash: CandidateHash,
|
||||
/// The authority the request was sent to.
|
||||
pub receiver: AuthorityDiscoveryId,
|
||||
/// The result of the delivery attempt.
|
||||
pub result: TaskResult,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TaskResult {
|
||||
/// Task succeeded in getting the request to its peer.
|
||||
Succeeded,
|
||||
/// Task was not able to get the request out to its peer.
|
||||
///
|
||||
/// It should be retried in that case.
|
||||
Failed,
|
||||
}
|
||||
|
||||
impl TaskResult {
|
||||
pub fn as_metrics_label(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Succeeded => SUCCEEDED,
|
||||
Self::Failed => FAILED,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SendTask
|
||||
{
|
||||
/// Initiates sending a dispute message to peers.
|
||||
pub async fn new<Context: SubsystemContext>(
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
active_sessions: &HashMap<SessionIndex,Hash>,
|
||||
tx: mpsc::Sender<TaskFinish>,
|
||||
request: DisputeRequest,
|
||||
) -> Result<Self> {
|
||||
let mut send_task = Self {
|
||||
request,
|
||||
deliveries: HashMap::new(),
|
||||
has_failed_sends: false,
|
||||
tx,
|
||||
};
|
||||
send_task.refresh_sends(
|
||||
ctx,
|
||||
runtime,
|
||||
active_sessions,
|
||||
).await?;
|
||||
Ok(send_task)
|
||||
}
|
||||
|
||||
/// Make sure we are sending to all relevant authorities.
|
||||
///
|
||||
/// This function is called at construction and should also be called whenever a session change
|
||||
/// happens and on a regular basis to ensure we are retrying failed attempts.
|
||||
pub async fn refresh_sends<Context: SubsystemContext>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
active_sessions: &HashMap<SessionIndex, Hash>,
|
||||
) -> Result<()> {
|
||||
let new_authorities = self.get_relevant_validators(ctx, runtime, active_sessions).await?;
|
||||
|
||||
let add_authorities = new_authorities
|
||||
.iter()
|
||||
.filter(|a| !self.deliveries.contains_key(a))
|
||||
.map(Clone::clone)
|
||||
.collect();
|
||||
|
||||
// Get rid of dead/irrelevant tasks/statuses:
|
||||
self.deliveries.retain(|k, _| new_authorities.contains(k));
|
||||
|
||||
// Start any new tasks that are needed:
|
||||
let new_statuses = send_requests(
|
||||
ctx,
|
||||
self.tx.clone(),
|
||||
add_authorities,
|
||||
self.request.clone(),
|
||||
).await?;
|
||||
|
||||
self.deliveries.extend(new_statuses.into_iter());
|
||||
self.has_failed_sends = false;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Whether or not any sends have failed since the last refreshed.
|
||||
pub fn has_failed_sends(&self) -> bool {
|
||||
self.has_failed_sends
|
||||
}
|
||||
|
||||
/// Handle a finished response waiting task.
|
||||
pub fn on_finished_send(&mut self, authority: &AuthorityDiscoveryId, result: TaskResult) {
|
||||
match result {
|
||||
TaskResult::Failed => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
candidate = ?self.request.0.candidate_receipt.hash(),
|
||||
?authority,
|
||||
"Could not get our message out! If this keeps happening, then check chain whether the dispute made it there."
|
||||
);
|
||||
self.has_failed_sends = true;
|
||||
// Remove state, so we know what to try again:
|
||||
self.deliveries.remove(authority);
|
||||
}
|
||||
TaskResult::Succeeded => {
|
||||
let status = match self.deliveries.get_mut(&authority) {
|
||||
None => {
|
||||
// Can happen when a sending became irrelevant while the response was already
|
||||
// queued.
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate = ?self.request.0.candidate_receipt.hash(),
|
||||
?authority,
|
||||
?result,
|
||||
"Received `FromSendingTask::Finished` for non existing task."
|
||||
);
|
||||
return
|
||||
}
|
||||
Some(status) => status,
|
||||
};
|
||||
// We are done here:
|
||||
*status = DeliveryStatus::Succeeded;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Determine all validators that should receive the given dispute requests.
|
||||
///
|
||||
/// This is all parachain validators of the session the candidate occurred and all authorities
|
||||
/// of all currently active sessions, determined by currently active heads.
|
||||
async fn get_relevant_validators<Context: SubsystemContext>(
|
||||
&self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
active_sessions: &HashMap<SessionIndex, Hash>,
|
||||
) -> Result<HashSet<AuthorityDiscoveryId>> {
|
||||
let ref_head = self.request.0.candidate_receipt.descriptor.relay_parent;
|
||||
// Parachain validators:
|
||||
let info = runtime
|
||||
.get_session_info_by_index(ctx.sender(), ref_head, self.request.0.session_index)
|
||||
.await?;
|
||||
let session_info = &info.session_info;
|
||||
let validator_count = session_info.validators.len();
|
||||
let mut authorities: HashSet<_> = session_info
|
||||
.discovery_keys
|
||||
.iter()
|
||||
.take(validator_count)
|
||||
.enumerate()
|
||||
.filter(|(i, _)| Some(ValidatorIndex(*i as _)) != info.validator_info.our_index)
|
||||
.map(|(_, v)| v.clone())
|
||||
.collect();
|
||||
|
||||
// Current authorities:
|
||||
for (session_index, head) in active_sessions.iter() {
|
||||
let info = runtime.get_session_info_by_index(ctx.sender(), *head, *session_index).await?;
|
||||
let session_info = &info.session_info;
|
||||
let new_set = session_info
|
||||
.discovery_keys
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| Some(ValidatorIndex(*i as _)) != info.validator_info.our_index)
|
||||
.map(|(_, v)| v.clone());
|
||||
authorities.extend(new_set);
|
||||
}
|
||||
Ok(authorities)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Start sending of the given msg to all given authorities.
|
||||
///
|
||||
/// And spawn tasks for handling the response.
|
||||
async fn send_requests<Context: SubsystemContext>(
|
||||
ctx: &mut Context,
|
||||
tx: mpsc::Sender<TaskFinish>,
|
||||
receivers: Vec<AuthorityDiscoveryId>,
|
||||
req: DisputeRequest,
|
||||
) -> Result<HashMap<AuthorityDiscoveryId, DeliveryStatus>> {
|
||||
let mut statuses = HashMap::with_capacity(receivers.len());
|
||||
let mut reqs = Vec::with_capacity(receivers.len());
|
||||
|
||||
for receiver in receivers {
|
||||
let (outgoing, pending_response) = OutgoingRequest::new(
|
||||
Recipient::Authority(receiver.clone()),
|
||||
req.clone(),
|
||||
);
|
||||
|
||||
reqs.push(Requests::DisputeSending(outgoing));
|
||||
|
||||
let fut = wait_response_task(
|
||||
pending_response,
|
||||
req.0.candidate_receipt.hash(),
|
||||
receiver.clone(),
|
||||
tx.clone(),
|
||||
);
|
||||
|
||||
let (remote, remote_handle) = fut.remote_handle();
|
||||
ctx.spawn("dispute-sender", remote.boxed())
|
||||
.map_err(Fatal::SpawnTask)?;
|
||||
statuses.insert(receiver, DeliveryStatus::Pending(remote_handle));
|
||||
}
|
||||
|
||||
let msg = NetworkBridgeMessage::SendRequests(
|
||||
reqs,
|
||||
// We should be connected, but the hell - if not, try!
|
||||
IfDisconnected::TryConnect,
|
||||
);
|
||||
ctx.send_message(AllMessages::NetworkBridge(msg)).await;
|
||||
Ok(statuses)
|
||||
}
|
||||
|
||||
/// Future to be spawned in a task for awaiting a response.
|
||||
async fn wait_response_task(
|
||||
pending_response: impl Future<Output = OutgoingResult<DisputeResponse>>,
|
||||
candidate_hash: CandidateHash,
|
||||
receiver: AuthorityDiscoveryId,
|
||||
mut tx: mpsc::Sender<TaskFinish>,
|
||||
) {
|
||||
let result = pending_response.await;
|
||||
let msg = match result {
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
%candidate_hash,
|
||||
%receiver,
|
||||
%err,
|
||||
"Error sending dispute statements to node."
|
||||
);
|
||||
TaskFinish { candidate_hash, receiver, result: TaskResult::Failed}
|
||||
}
|
||||
Ok(DisputeResponse::Confirmed) => {
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
%candidate_hash,
|
||||
%receiver,
|
||||
"Sending dispute message succeeded"
|
||||
);
|
||||
TaskFinish { candidate_hash, receiver, result: TaskResult::Succeeded }
|
||||
}
|
||||
};
|
||||
if let Err(err) = tx.feed(msg).await {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
%err,
|
||||
"Failed to notify susystem about dispute sending result."
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user