mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-19 18:11:03 +00:00
Retry availability until the receiver of the request is dropped (#2763)
* guide updates * keep interactions alive until receivers drop * retry indefinitely * cancel approval tasks on finality * use swap_remove instead of remove
This commit is contained in:
committed by
GitHub
parent
6514e00144
commit
08d5b268a0
@@ -53,6 +53,7 @@ use sp_application_crypto::Pair;
|
|||||||
use kvdb::KeyValueDB;
|
use kvdb::KeyValueDB;
|
||||||
|
|
||||||
use futures::prelude::*;
|
use futures::prelude::*;
|
||||||
|
use futures::future::RemoteHandle;
|
||||||
use futures::channel::{mpsc, oneshot};
|
use futures::channel::{mpsc, oneshot};
|
||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
@@ -444,6 +445,7 @@ enum Action {
|
|||||||
WriteCandidateEntry(CandidateHash, CandidateEntry),
|
WriteCandidateEntry(CandidateHash, CandidateEntry),
|
||||||
LaunchApproval {
|
LaunchApproval {
|
||||||
indirect_cert: IndirectAssignmentCert,
|
indirect_cert: IndirectAssignmentCert,
|
||||||
|
relay_block_number: BlockNumber,
|
||||||
candidate_index: CandidateIndex,
|
candidate_index: CandidateIndex,
|
||||||
session: SessionIndex,
|
session: SessionIndex,
|
||||||
candidate: CandidateReceipt,
|
candidate: CandidateReceipt,
|
||||||
@@ -452,6 +454,8 @@ enum Action {
|
|||||||
Conclude,
|
Conclude,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type BackgroundTaskMap = BTreeMap<BlockNumber, Vec<RemoteHandle<()>>>;
|
||||||
|
|
||||||
async fn run<C>(
|
async fn run<C>(
|
||||||
mut ctx: C,
|
mut ctx: C,
|
||||||
subsystem: ApprovalVotingSubsystem,
|
subsystem: ApprovalVotingSubsystem,
|
||||||
@@ -472,6 +476,9 @@ async fn run<C>(
|
|||||||
|
|
||||||
let mut wakeups = Wakeups::default();
|
let mut wakeups = Wakeups::default();
|
||||||
|
|
||||||
|
// map block numbers to background work.
|
||||||
|
let mut background_tasks = BTreeMap::new();
|
||||||
|
|
||||||
let mut last_finalized_height: Option<BlockNumber> = None;
|
let mut last_finalized_height: Option<BlockNumber> = None;
|
||||||
let mut background_rx = background_rx.fuse();
|
let mut background_rx = background_rx.fuse();
|
||||||
|
|
||||||
@@ -489,7 +496,7 @@ async fn run<C>(
|
|||||||
)?
|
)?
|
||||||
}
|
}
|
||||||
next_msg = ctx.recv().fuse() => {
|
next_msg = ctx.recv().fuse() => {
|
||||||
handle_from_overseer(
|
let actions = handle_from_overseer(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
&mut state,
|
&mut state,
|
||||||
&subsystem.metrics,
|
&subsystem.metrics,
|
||||||
@@ -497,7 +504,13 @@ async fn run<C>(
|
|||||||
next_msg?,
|
next_msg?,
|
||||||
&mut last_finalized_height,
|
&mut last_finalized_height,
|
||||||
&wakeups,
|
&wakeups,
|
||||||
).await?
|
).await?;
|
||||||
|
|
||||||
|
if let Some(finalized_height) = last_finalized_height {
|
||||||
|
cleanup_background_tasks(finalized_height, &mut background_tasks);
|
||||||
|
}
|
||||||
|
|
||||||
|
actions
|
||||||
}
|
}
|
||||||
background_request = background_rx.next().fuse() => {
|
background_request = background_rx.next().fuse() => {
|
||||||
if let Some(req) = background_request {
|
if let Some(req) = background_request {
|
||||||
@@ -519,6 +532,7 @@ async fn run<C>(
|
|||||||
&mut wakeups,
|
&mut wakeups,
|
||||||
db_writer,
|
db_writer,
|
||||||
&background_tx,
|
&background_tx,
|
||||||
|
&mut background_tasks,
|
||||||
actions,
|
actions,
|
||||||
).await? {
|
).await? {
|
||||||
break;
|
break;
|
||||||
@@ -535,6 +549,7 @@ async fn handle_actions(
|
|||||||
wakeups: &mut Wakeups,
|
wakeups: &mut Wakeups,
|
||||||
db: &dyn KeyValueDB,
|
db: &dyn KeyValueDB,
|
||||||
background_tx: &mpsc::Sender<BackgroundRequest>,
|
background_tx: &mpsc::Sender<BackgroundRequest>,
|
||||||
|
background_tasks: &mut BackgroundTaskMap,
|
||||||
actions: impl IntoIterator<Item = Action>,
|
actions: impl IntoIterator<Item = Action>,
|
||||||
) -> SubsystemResult<bool> {
|
) -> SubsystemResult<bool> {
|
||||||
let mut transaction = approval_db::v1::Transaction::default();
|
let mut transaction = approval_db::v1::Transaction::default();
|
||||||
@@ -555,6 +570,7 @@ async fn handle_actions(
|
|||||||
}
|
}
|
||||||
Action::LaunchApproval {
|
Action::LaunchApproval {
|
||||||
indirect_cert,
|
indirect_cert,
|
||||||
|
relay_block_number,
|
||||||
candidate_index,
|
candidate_index,
|
||||||
session,
|
session,
|
||||||
candidate,
|
candidate,
|
||||||
@@ -569,7 +585,7 @@ async fn handle_actions(
|
|||||||
candidate_index,
|
candidate_index,
|
||||||
).into());
|
).into());
|
||||||
|
|
||||||
launch_approval(
|
let handle = launch_approval(
|
||||||
ctx,
|
ctx,
|
||||||
background_tx.clone(),
|
background_tx.clone(),
|
||||||
session,
|
session,
|
||||||
@@ -578,7 +594,11 @@ async fn handle_actions(
|
|||||||
block_hash,
|
block_hash,
|
||||||
candidate_index as _,
|
candidate_index as _,
|
||||||
backing_group,
|
backing_group,
|
||||||
).await?
|
).await?;
|
||||||
|
|
||||||
|
if let Some(handle) = handle {
|
||||||
|
background_tasks.entry(relay_block_number).or_default().push(handle);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Action::Conclude => { conclude = true; }
|
Action::Conclude => { conclude = true; }
|
||||||
}
|
}
|
||||||
@@ -594,6 +614,19 @@ async fn handle_actions(
|
|||||||
Ok(conclude)
|
Ok(conclude)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clean up all background tasks which are no longer needed as they correspond to a
|
||||||
|
// finalized block.
|
||||||
|
fn cleanup_background_tasks(
|
||||||
|
current_finalized_block: BlockNumber,
|
||||||
|
tasks: &mut BackgroundTaskMap,
|
||||||
|
) {
|
||||||
|
let after = tasks.split_off(&(current_finalized_block + 1));
|
||||||
|
*tasks = after;
|
||||||
|
|
||||||
|
// tasks up to the finalized block are dropped, and `RemoteHandle` cancels
|
||||||
|
// the task on drop.
|
||||||
|
}
|
||||||
|
|
||||||
// Handle an incoming signal from the overseer. Returns true if execution should conclude.
|
// Handle an incoming signal from the overseer. Returns true if execution should conclude.
|
||||||
async fn handle_from_overseer(
|
async fn handle_from_overseer(
|
||||||
ctx: &mut impl SubsystemContext,
|
ctx: &mut impl SubsystemContext,
|
||||||
@@ -1533,6 +1566,7 @@ fn process_wakeup(
|
|||||||
// sanity: should always be present.
|
// sanity: should always be present.
|
||||||
actions.push(Action::LaunchApproval {
|
actions.push(Action::LaunchApproval {
|
||||||
indirect_cert,
|
indirect_cert,
|
||||||
|
relay_block_number: block_entry.block_number(),
|
||||||
candidate_index: i as _,
|
candidate_index: i as _,
|
||||||
session: block_entry.session(),
|
session: block_entry.session(),
|
||||||
candidate: candidate_entry.candidate_receipt().clone(),
|
candidate: candidate_entry.candidate_receipt().clone(),
|
||||||
@@ -1566,6 +1600,9 @@ fn process_wakeup(
|
|||||||
Ok(actions)
|
Ok(actions)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Launch approval work, returning an `AbortHandle` which corresponds to the background task
|
||||||
|
// spawned. When the background work is no longer needed, the `AbortHandle` should be dropped
|
||||||
|
// to cancel the background work and any requests it has spawned.
|
||||||
async fn launch_approval(
|
async fn launch_approval(
|
||||||
ctx: &mut impl SubsystemContext,
|
ctx: &mut impl SubsystemContext,
|
||||||
mut background_tx: mpsc::Sender<BackgroundRequest>,
|
mut background_tx: mpsc::Sender<BackgroundRequest>,
|
||||||
@@ -1575,7 +1612,7 @@ async fn launch_approval(
|
|||||||
block_hash: Hash,
|
block_hash: Hash,
|
||||||
candidate_index: usize,
|
candidate_index: usize,
|
||||||
backing_group: GroupIndex,
|
backing_group: GroupIndex,
|
||||||
) -> SubsystemResult<()> {
|
) -> SubsystemResult<Option<RemoteHandle<()>>> {
|
||||||
let (a_tx, a_rx) = oneshot::channel();
|
let (a_tx, a_rx) = oneshot::channel();
|
||||||
let (code_tx, code_rx) = oneshot::channel();
|
let (code_tx, code_rx) = oneshot::channel();
|
||||||
let (context_num_tx, context_num_rx) = oneshot::channel();
|
let (context_num_tx, context_num_rx) = oneshot::channel();
|
||||||
@@ -1610,7 +1647,7 @@ async fn launch_approval(
|
|||||||
candidate.descriptor.relay_parent,
|
candidate.descriptor.relay_parent,
|
||||||
);
|
);
|
||||||
|
|
||||||
return Ok(());
|
return Ok(None);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -1719,7 +1756,10 @@ async fn launch_approval(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
ctx.spawn("approval-checks", Box::pin(background)).await
|
let (background, remote_handle) = background.remote_handle();
|
||||||
|
ctx.spawn("approval-checks", Box::pin(background))
|
||||||
|
.await
|
||||||
|
.map(move |()| Some(remote_handle))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Issue and import a local approval vote. Should only be invoked after approval checks
|
// Issue and import a local approval vote. Should only be invoked after approval checks
|
||||||
|
|||||||
@@ -397,6 +397,11 @@ impl BlockEntry {
|
|||||||
pub fn candidates(&self) -> &[(CoreIndex, CandidateHash)] {
|
pub fn candidates(&self) -> &[(CoreIndex, CandidateHash)] {
|
||||||
&self.candidates
|
&self.candidates
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Access the block number of the block entry.
|
||||||
|
pub fn block_number(&self) -> BlockNumber {
|
||||||
|
self.block_number
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<crate::approval_db::v1::BlockEntry> for BlockEntry {
|
impl From<crate::approval_db::v1::BlockEntry> for BlockEntry {
|
||||||
|
|||||||
@@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
//! The `Error` and `Result` types used by the subsystem.
|
//! The `Error` and `Result` types used by the subsystem.
|
||||||
|
|
||||||
use futures::channel::{mpsc, oneshot};
|
use futures::channel::oneshot;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
/// Error type used by the Availability Recovery subsystem.
|
/// Error type used by the Availability Recovery subsystem.
|
||||||
@@ -34,9 +34,6 @@ pub enum Error {
|
|||||||
#[error("failed to send response")]
|
#[error("failed to send response")]
|
||||||
CanceledResponseSender,
|
CanceledResponseSender,
|
||||||
|
|
||||||
#[error("to_state channel is closed")]
|
|
||||||
ClosedToState(#[source] mpsc::SendError),
|
|
||||||
|
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Runtime(#[from] polkadot_subsystem::errors::RuntimeApiError),
|
Runtime(#[from] polkadot_subsystem::errors::RuntimeApiError),
|
||||||
|
|
||||||
|
|||||||
@@ -19,9 +19,11 @@
|
|||||||
#![warn(missing_docs)]
|
#![warn(missing_docs)]
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::pin::Pin;
|
||||||
|
|
||||||
use futures::{channel::{oneshot, mpsc}, prelude::*, stream::FuturesUnordered};
|
use futures::{channel::oneshot, prelude::*, stream::FuturesUnordered};
|
||||||
use futures::future::BoxFuture;
|
use futures::future::{BoxFuture, RemoteHandle, FutureExt};
|
||||||
|
use futures::task::{Context, Poll};
|
||||||
use lru::LruCache;
|
use lru::LruCache;
|
||||||
use rand::seq::SliceRandom;
|
use rand::seq::SliceRandom;
|
||||||
|
|
||||||
@@ -33,7 +35,7 @@ use polkadot_primitives::v1::{
|
|||||||
use polkadot_node_primitives::{ErasureChunk, AvailableData};
|
use polkadot_node_primitives::{ErasureChunk, AvailableData};
|
||||||
use polkadot_subsystem::{
|
use polkadot_subsystem::{
|
||||||
SubsystemContext, SubsystemResult, SubsystemError, Subsystem, SpawnedSubsystem, FromOverseer,
|
SubsystemContext, SubsystemResult, SubsystemError, Subsystem, SpawnedSubsystem, FromOverseer,
|
||||||
OverseerSignal, ActiveLeavesUpdate,
|
OverseerSignal, ActiveLeavesUpdate, SubsystemSender,
|
||||||
errors::RecoveryError,
|
errors::RecoveryError,
|
||||||
jaeger,
|
jaeger,
|
||||||
messages::{
|
messages::{
|
||||||
@@ -67,21 +69,6 @@ pub struct AvailabilityRecoverySubsystem {
|
|||||||
fast_path: bool,
|
fast_path: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Accumulate all awaiting sides for some particular `AvailableData`.
|
|
||||||
struct InteractionHandle {
|
|
||||||
awaiting: Vec<oneshot::Sender<Result<AvailableData, RecoveryError>>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A message received by main code from an async `Interaction` task.
|
|
||||||
#[derive(Debug)]
|
|
||||||
enum FromInteraction {
|
|
||||||
/// An interaction concluded.
|
|
||||||
Concluded(CandidateHash, Result<AvailableData, RecoveryError>),
|
|
||||||
|
|
||||||
/// Send a request on the network service.
|
|
||||||
NetworkRequest(Requests),
|
|
||||||
}
|
|
||||||
|
|
||||||
struct RequestFromBackersPhase {
|
struct RequestFromBackersPhase {
|
||||||
// a random shuffling of the validators from the backing group which indicates the order
|
// a random shuffling of the validators from the backing group which indicates the order
|
||||||
// in which we connect to them and request the chunk.
|
// in which we connect to them and request the chunk.
|
||||||
@@ -95,7 +82,7 @@ struct RequestChunksPhase {
|
|||||||
received_chunks: HashMap<ValidatorIndex, ErasureChunk>,
|
received_chunks: HashMap<ValidatorIndex, ErasureChunk>,
|
||||||
requesting_chunks: FuturesUnordered<BoxFuture<
|
requesting_chunks: FuturesUnordered<BoxFuture<
|
||||||
'static,
|
'static,
|
||||||
Result<Option<ErasureChunk>, RequestError>>,
|
Result<Option<ErasureChunk>, (ValidatorIndex, RequestError)>>,
|
||||||
>,
|
>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -122,9 +109,8 @@ enum InteractionPhase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// A state of a single interaction reconstructing an available data.
|
/// A state of a single interaction reconstructing an available data.
|
||||||
struct Interaction {
|
struct Interaction<S> {
|
||||||
/// A communication channel with the `State`.
|
sender: S,
|
||||||
to_state: mpsc::Sender<FromInteraction>,
|
|
||||||
|
|
||||||
/// The parameters of the interaction.
|
/// The parameters of the interaction.
|
||||||
params: InteractionParams,
|
params: InteractionParams,
|
||||||
@@ -142,13 +128,12 @@ impl RequestFromBackersPhase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run this phase to completion, returning `true` if data was successfully recovered and
|
// Run this phase to completion.
|
||||||
// false otherwise.
|
|
||||||
async fn run(
|
async fn run(
|
||||||
&mut self,
|
&mut self,
|
||||||
params: &InteractionParams,
|
params: &InteractionParams,
|
||||||
to_state: &mut mpsc::Sender<FromInteraction>
|
sender: &mut impl SubsystemSender,
|
||||||
) -> Result<bool, mpsc::SendError> {
|
) -> Result<AvailableData, RecoveryError> {
|
||||||
tracing::trace!(
|
tracing::trace!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
candidate_hash = ?params.candidate_hash,
|
candidate_hash = ?params.candidate_hash,
|
||||||
@@ -158,7 +143,7 @@ impl RequestFromBackersPhase {
|
|||||||
loop {
|
loop {
|
||||||
// Pop the next backer, and proceed to next phase if we're out.
|
// Pop the next backer, and proceed to next phase if we're out.
|
||||||
let validator_index = match self.shuffled_backers.pop() {
|
let validator_index = match self.shuffled_backers.pop() {
|
||||||
None => return Ok(false),
|
None => return Err(RecoveryError::Unavailable),
|
||||||
Some(i) => i,
|
Some(i) => i,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -168,21 +153,21 @@ impl RequestFromBackersPhase {
|
|||||||
req_res::v1::AvailableDataFetchingRequest { candidate_hash: params.candidate_hash },
|
req_res::v1::AvailableDataFetchingRequest { candidate_hash: params.candidate_hash },
|
||||||
);
|
);
|
||||||
|
|
||||||
to_state.send(FromInteraction::NetworkRequest(Requests::AvailableDataFetching(req))).await?;
|
sender.send_message(NetworkBridgeMessage::SendRequests(
|
||||||
|
vec![Requests::AvailableDataFetching(req)],
|
||||||
|
IfDisconnected::TryConnect,
|
||||||
|
).into()).await;
|
||||||
|
|
||||||
match res.await {
|
match res.await {
|
||||||
Ok(req_res::v1::AvailableDataFetchingResponse::AvailableData(data)) => {
|
Ok(req_res::v1::AvailableDataFetchingResponse::AvailableData(data)) => {
|
||||||
if reconstructed_data_matches_root(params.validators.len(), ¶ms.erasure_root, &data) {
|
if reconstructed_data_matches_root(params.validators.len(), ¶ms.erasure_root, &data) {
|
||||||
to_state.send(
|
|
||||||
FromInteraction::Concluded(params.candidate_hash.clone(), Ok(data))
|
|
||||||
).await?;
|
|
||||||
|
|
||||||
tracing::trace!(
|
tracing::trace!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
candidate_hash = ?params.candidate_hash,
|
candidate_hash = ?params.candidate_hash,
|
||||||
"Received full data",
|
"Received full data",
|
||||||
);
|
);
|
||||||
return Ok(true);
|
|
||||||
|
return Ok(data);
|
||||||
} else {
|
} else {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
@@ -222,8 +207,8 @@ impl RequestChunksPhase {
|
|||||||
async fn launch_parallel_requests(
|
async fn launch_parallel_requests(
|
||||||
&mut self,
|
&mut self,
|
||||||
params: &InteractionParams,
|
params: &InteractionParams,
|
||||||
to_state: &mut mpsc::Sender<FromInteraction>,
|
sender: &mut impl SubsystemSender,
|
||||||
) -> Result<(), mpsc::SendError> {
|
) {
|
||||||
let max_requests = std::cmp::min(N_PARALLEL, params.threshold);
|
let max_requests = std::cmp::min(N_PARALLEL, params.threshold);
|
||||||
while self.requesting_chunks.len() < max_requests {
|
while self.requesting_chunks.len() < max_requests {
|
||||||
if let Some(validator_index) = self.shuffling.pop() {
|
if let Some(validator_index) = self.shuffling.pop() {
|
||||||
@@ -247,39 +232,36 @@ impl RequestChunksPhase {
|
|||||||
raw_request.clone(),
|
raw_request.clone(),
|
||||||
);
|
);
|
||||||
|
|
||||||
to_state.send(FromInteraction::NetworkRequest(Requests::ChunkFetching(req))).await?;
|
sender.send_message(NetworkBridgeMessage::SendRequests(
|
||||||
|
vec![Requests::ChunkFetching(req)],
|
||||||
|
IfDisconnected::TryConnect,
|
||||||
|
).into()).await;
|
||||||
|
|
||||||
self.requesting_chunks.push(Box::pin(async move {
|
self.requesting_chunks.push(Box::pin(async move {
|
||||||
match res.await {
|
match res.await {
|
||||||
Ok(req_res::v1::ChunkFetchingResponse::Chunk(chunk))
|
Ok(req_res::v1::ChunkFetchingResponse::Chunk(chunk))
|
||||||
=> Ok(Some(chunk.recombine_into_chunk(&raw_request))),
|
=> Ok(Some(chunk.recombine_into_chunk(&raw_request))),
|
||||||
Ok(req_res::v1::ChunkFetchingResponse::NoSuchChunk) => Ok(None),
|
Ok(req_res::v1::ChunkFetchingResponse::NoSuchChunk) => Ok(None),
|
||||||
Err(e) => Err(e),
|
Err(e) => Err((validator_index, e)),
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn wait_for_chunks(
|
async fn wait_for_chunks(
|
||||||
&mut self,
|
&mut self,
|
||||||
params: &InteractionParams,
|
params: &InteractionParams,
|
||||||
) -> Result<(), mpsc::SendError> {
|
) {
|
||||||
// Check if the requesting chunks is not empty not to poll to completion.
|
|
||||||
if self.requesting_chunks.is_empty() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Poll for new updates from requesting_chunks.
|
// Poll for new updates from requesting_chunks.
|
||||||
while let Some(request_result) = self.requesting_chunks.next().await {
|
while let Poll::Ready(Some(request_result))
|
||||||
|
= futures::poll!(self.requesting_chunks.next())
|
||||||
|
{
|
||||||
match request_result {
|
match request_result {
|
||||||
Ok(Some(chunk)) => {
|
Ok(Some(chunk)) => {
|
||||||
// Check merkle proofs of any received chunks, and any failures should
|
// Check merkle proofs of any received chunks.
|
||||||
// lead to issuance of a FromInteraction::ReportPeer message.
|
|
||||||
|
|
||||||
let validator_index = chunk.index;
|
let validator_index = chunk.index;
|
||||||
|
|
||||||
@@ -313,24 +295,30 @@ impl RequestChunksPhase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(None) => {}
|
Ok(None) => {}
|
||||||
Err(e) => {
|
Err((validator_index, e)) => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
err = ?e,
|
err = ?e,
|
||||||
|
?validator_index,
|
||||||
"Failure requesting chunk",
|
"Failure requesting chunk",
|
||||||
);
|
);
|
||||||
|
|
||||||
|
match e {
|
||||||
|
RequestError::InvalidResponse(_) => {}
|
||||||
|
RequestError::NetworkError(_) | RequestError::Canceled(_) => {
|
||||||
|
self.shuffling.push(validator_index);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run(
|
async fn run(
|
||||||
&mut self,
|
&mut self,
|
||||||
params: &InteractionParams,
|
params: &InteractionParams,
|
||||||
to_state: &mut mpsc::Sender<FromInteraction>,
|
sender: &mut impl SubsystemSender,
|
||||||
) -> Result<(), mpsc::SendError> {
|
) -> Result<AvailableData, RecoveryError> {
|
||||||
loop {
|
loop {
|
||||||
if is_unavailable(
|
if is_unavailable(
|
||||||
self.received_chunks.len(),
|
self.received_chunks.len(),
|
||||||
@@ -347,23 +335,18 @@ impl RequestChunksPhase {
|
|||||||
n_validators = %params.validators.len(),
|
n_validators = %params.validators.len(),
|
||||||
"Data recovery is not possible",
|
"Data recovery is not possible",
|
||||||
);
|
);
|
||||||
to_state.send(FromInteraction::Concluded(
|
|
||||||
params.candidate_hash,
|
|
||||||
Err(RecoveryError::Unavailable),
|
|
||||||
)).await?;
|
|
||||||
|
|
||||||
return Ok(());
|
return Err(RecoveryError::Unavailable);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.launch_parallel_requests(params, to_state).await?;
|
self.launch_parallel_requests(params, sender).await;
|
||||||
self.wait_for_chunks(params).await?;
|
self.wait_for_chunks(params).await;
|
||||||
|
|
||||||
// If received_chunks has more than threshold entries, attempt to recover the data.
|
// If received_chunks has more than threshold entries, attempt to recover the data.
|
||||||
// If that fails, or a re-encoding of it doesn't match the expected erasure root,
|
// If that fails, or a re-encoding of it doesn't match the expected erasure root,
|
||||||
// break and issue a FromInteraction::Concluded(RecoveryError::Invalid).
|
// return Err(RecoveryError::Invalid)
|
||||||
// Otherwise, issue a FromInteraction::Concluded(Ok(())).
|
|
||||||
if self.received_chunks.len() >= params.threshold {
|
if self.received_chunks.len() >= params.threshold {
|
||||||
let concluded = match polkadot_erasure_coding::reconstruct_v1(
|
return match polkadot_erasure_coding::reconstruct_v1(
|
||||||
params.validators.len(),
|
params.validators.len(),
|
||||||
self.received_chunks.values().map(|c| (&c.chunk[..], c.index.0 as usize)),
|
self.received_chunks.values().map(|c| (&c.chunk[..], c.index.0 as usize)),
|
||||||
) {
|
) {
|
||||||
@@ -375,7 +358,8 @@ impl RequestChunksPhase {
|
|||||||
erasure_root = ?params.erasure_root,
|
erasure_root = ?params.erasure_root,
|
||||||
"Data recovery complete",
|
"Data recovery complete",
|
||||||
);
|
);
|
||||||
FromInteraction::Concluded(params.candidate_hash.clone(), Ok(data))
|
|
||||||
|
Ok(data)
|
||||||
} else {
|
} else {
|
||||||
tracing::trace!(
|
tracing::trace!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
@@ -383,10 +367,8 @@ impl RequestChunksPhase {
|
|||||||
erasure_root = ?params.erasure_root,
|
erasure_root = ?params.erasure_root,
|
||||||
"Data recovery - root mismatch",
|
"Data recovery - root mismatch",
|
||||||
);
|
);
|
||||||
FromInteraction::Concluded(
|
|
||||||
params.candidate_hash.clone(),
|
Err(RecoveryError::Invalid)
|
||||||
Err(RecoveryError::Invalid),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
@@ -397,15 +379,10 @@ impl RequestChunksPhase {
|
|||||||
?err,
|
?err,
|
||||||
"Data recovery error ",
|
"Data recovery error ",
|
||||||
);
|
);
|
||||||
FromInteraction::Concluded(
|
|
||||||
params.candidate_hash.clone(),
|
Err(RecoveryError::Invalid)
|
||||||
Err(RecoveryError::Invalid),
|
|
||||||
)
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
to_state.send(concluded).await?;
|
|
||||||
return Ok(());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -442,59 +419,99 @@ fn reconstructed_data_matches_root(
|
|||||||
branches.root() == *expected_root
|
branches.root() == *expected_root
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Interaction {
|
impl<S: SubsystemSender> Interaction<S> {
|
||||||
async fn run(mut self) -> error::Result<()> {
|
async fn run(mut self) -> Result<AvailableData, RecoveryError> {
|
||||||
loop {
|
loop {
|
||||||
// These only fail if we cannot reach the underlying subsystem, which case there is nothing
|
// These only fail if we cannot reach the underlying subsystem, which case there is nothing
|
||||||
// meaningful we can do.
|
// meaningful we can do.
|
||||||
match self.phase {
|
match self.phase {
|
||||||
InteractionPhase::RequestFromBackers(ref mut from_backers) => {
|
InteractionPhase::RequestFromBackers(ref mut from_backers) => {
|
||||||
if from_backers.run(&self.params, &mut self.to_state).await
|
match from_backers.run(&self.params, &mut self.sender).await {
|
||||||
.map_err(error::Error::ClosedToState)?
|
Ok(data) => break Ok(data),
|
||||||
{
|
Err(RecoveryError::Invalid) => break Err(RecoveryError::Invalid),
|
||||||
break Ok(())
|
Err(RecoveryError::Unavailable) => {
|
||||||
} else {
|
self.phase = InteractionPhase::RequestChunks(
|
||||||
self.phase = InteractionPhase::RequestChunks(
|
RequestChunksPhase::new(self.params.validators.len() as _)
|
||||||
RequestChunksPhase::new(self.params.validators.len() as _)
|
)
|
||||||
);
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
InteractionPhase::RequestChunks(ref mut from_all) => {
|
InteractionPhase::RequestChunks(ref mut from_all) => {
|
||||||
break from_all.run(&self.params, &mut self.to_state).await
|
break from_all.run(&self.params, &mut self.sender).await;
|
||||||
.map_err(error::Error::ClosedToState)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Accumulate all awaiting sides for some particular `AvailableData`.
|
||||||
|
struct InteractionHandle {
|
||||||
|
candidate_hash: CandidateHash,
|
||||||
|
remote: RemoteHandle<Result<AvailableData, RecoveryError>>,
|
||||||
|
awaiting: Vec<oneshot::Sender<Result<AvailableData, RecoveryError>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Future for InteractionHandle {
|
||||||
|
type Output = Option<(CandidateHash, Result<AvailableData, RecoveryError>)>;
|
||||||
|
|
||||||
|
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||||
|
let mut indices_to_remove = Vec::new();
|
||||||
|
for (i, awaiting) in self.awaiting.iter_mut().enumerate().rev() {
|
||||||
|
if let Poll::Ready(()) = awaiting.poll_canceled(cx) {
|
||||||
|
indices_to_remove.push(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// these are reverse order, so remove is fine.
|
||||||
|
for index in indices_to_remove {
|
||||||
|
tracing::debug!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
candidate_hash = ?self.candidate_hash,
|
||||||
|
"Receiver for available data dropped.",
|
||||||
|
);
|
||||||
|
|
||||||
|
self.awaiting.swap_remove(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.awaiting.is_empty() {
|
||||||
|
tracing::debug!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
candidate_hash = ?self.candidate_hash,
|
||||||
|
"All receivers for available data dropped.",
|
||||||
|
);
|
||||||
|
|
||||||
|
return Poll::Ready(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let remote = &mut self.remote;
|
||||||
|
futures::pin_mut!(remote);
|
||||||
|
let result = futures::ready!(remote.poll(cx));
|
||||||
|
|
||||||
|
for awaiting in self.awaiting.drain(..) {
|
||||||
|
let _ = awaiting.send(result.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
Poll::Ready(Some((self.candidate_hash, result)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct State {
|
struct State {
|
||||||
/// Each interaction is implemented as its own async task,
|
/// Each interaction is implemented as its own async task,
|
||||||
/// and these handles are for communicating with them.
|
/// and these handles are for communicating with them.
|
||||||
interactions: HashMap<CandidateHash, InteractionHandle>,
|
interactions: FuturesUnordered<InteractionHandle>,
|
||||||
|
|
||||||
/// A recent block hash for which state should be available.
|
/// A recent block hash for which state should be available.
|
||||||
live_block: (BlockNumber, Hash),
|
live_block: (BlockNumber, Hash),
|
||||||
|
|
||||||
/// interaction communication. This is cloned and given to interactions that are spun up.
|
|
||||||
from_interaction_tx: mpsc::Sender<FromInteraction>,
|
|
||||||
|
|
||||||
/// receiver for messages from interactions.
|
|
||||||
from_interaction_rx: mpsc::Receiver<FromInteraction>,
|
|
||||||
|
|
||||||
/// An LRU cache of recently recovered data.
|
/// An LRU cache of recently recovered data.
|
||||||
availability_lru: LruCache<CandidateHash, Result<AvailableData, RecoveryError>>,
|
availability_lru: LruCache<CandidateHash, Result<AvailableData, RecoveryError>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for State {
|
impl Default for State {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
let (from_interaction_tx, from_interaction_rx) = mpsc::channel(16);
|
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
interactions: HashMap::new(),
|
interactions: FuturesUnordered::new(),
|
||||||
live_block: (0, Hash::default()),
|
live_block: (0, Hash::default()),
|
||||||
from_interaction_tx,
|
|
||||||
from_interaction_rx,
|
|
||||||
availability_lru: LruCache::new(LRU_SIZE),
|
availability_lru: LruCache::new(LRU_SIZE),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -546,15 +563,7 @@ async fn launch_interaction(
|
|||||||
backing_group: Option<GroupIndex>,
|
backing_group: Option<GroupIndex>,
|
||||||
response_sender: oneshot::Sender<Result<AvailableData, RecoveryError>>,
|
response_sender: oneshot::Sender<Result<AvailableData, RecoveryError>>,
|
||||||
) -> error::Result<()> {
|
) -> error::Result<()> {
|
||||||
let to_state = state.from_interaction_tx.clone();
|
|
||||||
|
|
||||||
let candidate_hash = receipt.hash();
|
let candidate_hash = receipt.hash();
|
||||||
state.interactions.insert(
|
|
||||||
candidate_hash.clone(),
|
|
||||||
InteractionHandle {
|
|
||||||
awaiting: vec![response_sender],
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
let params = InteractionParams {
|
let params = InteractionParams {
|
||||||
validator_authority_keys: session_info.discovery_keys.clone(),
|
validator_authority_keys: session_info.discovery_keys.clone(),
|
||||||
@@ -574,22 +583,20 @@ async fn launch_interaction(
|
|||||||
));
|
));
|
||||||
|
|
||||||
let interaction = Interaction {
|
let interaction = Interaction {
|
||||||
to_state,
|
sender: ctx.sender().clone(),
|
||||||
params,
|
params,
|
||||||
phase,
|
phase,
|
||||||
};
|
};
|
||||||
|
|
||||||
let future = async move {
|
let (remote, remote_handle) = interaction.run().remote_handle();
|
||||||
if let Err(e) = interaction.run().await {
|
|
||||||
tracing::debug!(
|
|
||||||
target: LOG_TARGET,
|
|
||||||
err = ?e,
|
|
||||||
"Interaction finished with an error",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}.boxed();
|
|
||||||
|
|
||||||
if let Err(e) = ctx.spawn("recovery interaction", future).await {
|
state.interactions.push(InteractionHandle {
|
||||||
|
candidate_hash,
|
||||||
|
remote: remote_handle,
|
||||||
|
awaiting: vec![response_sender],
|
||||||
|
});
|
||||||
|
|
||||||
|
if let Err(e) = ctx.spawn("recovery interaction", Box::pin(remote)).await {
|
||||||
tracing::warn!(
|
tracing::warn!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
err = ?e,
|
err = ?e,
|
||||||
@@ -626,8 +633,8 @@ async fn handle_recover(
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(interaction) = state.interactions.get_mut(&candidate_hash) {
|
if let Some(i) = state.interactions.iter_mut().find(|i| i.candidate_hash == candidate_hash) {
|
||||||
interaction.awaiting.push(response_sender);
|
i.awaiting.push(response_sender);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -678,48 +685,6 @@ async fn query_full_data(
|
|||||||
Ok(rx.await.map_err(error::Error::CanceledQueryFullData)?)
|
Ok(rx.await.map_err(error::Error::CanceledQueryFullData)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handles message from interaction.
|
|
||||||
#[tracing::instrument(level = "trace", skip(ctx, state), fields(subsystem = LOG_TARGET))]
|
|
||||||
async fn handle_from_interaction(
|
|
||||||
state: &mut State,
|
|
||||||
ctx: &mut impl SubsystemContext<Message = AvailabilityRecoveryMessage>,
|
|
||||||
from_interaction: FromInteraction,
|
|
||||||
) -> error::Result<()> {
|
|
||||||
match from_interaction {
|
|
||||||
FromInteraction::Concluded(candidate_hash, result) => {
|
|
||||||
// Load the entry from the interactions map.
|
|
||||||
// It should always exist, if not for logic errors.
|
|
||||||
if let Some(interaction) = state.interactions.remove(&candidate_hash) {
|
|
||||||
// Send the result to each member of awaiting.
|
|
||||||
for awaiting in interaction.awaiting {
|
|
||||||
if let Err(_) = awaiting.send(result.clone()) {
|
|
||||||
tracing::debug!(
|
|
||||||
target: LOG_TARGET,
|
|
||||||
"An awaiting side of the interaction has been canceled",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tracing::warn!(
|
|
||||||
target: LOG_TARGET,
|
|
||||||
"Interaction under candidate hash {} is missing",
|
|
||||||
candidate_hash,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
state.availability_lru.put(candidate_hash, result);
|
|
||||||
}
|
|
||||||
FromInteraction::NetworkRequest(request) => {
|
|
||||||
ctx.send_message(NetworkBridgeMessage::SendRequests(
|
|
||||||
vec![request],
|
|
||||||
IfDisconnected::TryConnect,
|
|
||||||
).into()).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AvailabilityRecoverySubsystem {
|
impl AvailabilityRecoverySubsystem {
|
||||||
/// Create a new instance of `AvailabilityRecoverySubsystem` which starts with a fast path to request data from backers.
|
/// Create a new instance of `AvailabilityRecoverySubsystem` which starts with a fast path to request data from backers.
|
||||||
pub fn with_fast_path() -> Self {
|
pub fn with_fast_path() -> Self {
|
||||||
@@ -790,19 +755,9 @@ impl AvailabilityRecoverySubsystem {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
from_interaction = state.from_interaction_rx.next() => {
|
output = state.interactions.next() => {
|
||||||
if let Some(from_interaction) = from_interaction {
|
if let Some((candidate_hash, result)) = output.flatten() {
|
||||||
if let Err(e) = handle_from_interaction(
|
state.availability_lru.put(candidate_hash, result);
|
||||||
&mut state,
|
|
||||||
&mut ctx,
|
|
||||||
from_interaction,
|
|
||||||
).await {
|
|
||||||
tracing::warn!(
|
|
||||||
target: LOG_TARGET,
|
|
||||||
err = ?e,
|
|
||||||
"Error handling message from interaction",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -141,11 +141,19 @@ async fn overseer_recv(
|
|||||||
|
|
||||||
use sp_keyring::Sr25519Keyring;
|
use sp_keyring::Sr25519Keyring;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug)]
|
||||||
enum Has {
|
enum Has {
|
||||||
No,
|
No,
|
||||||
Yes,
|
Yes,
|
||||||
Timeout,
|
NetworkError(sc_network::RequestFailure),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Has {
|
||||||
|
fn timeout() -> Self {
|
||||||
|
Has::NetworkError(sc_network::RequestFailure::Network(
|
||||||
|
sc_network::OutboundFailure::Timeout
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -172,18 +180,6 @@ impl TestState {
|
|||||||
self.validators.len() - self.threshold() + 1
|
self.validators.len() - self.threshold() + 1
|
||||||
}
|
}
|
||||||
|
|
||||||
fn all_have(&self) -> Vec<Has> {
|
|
||||||
(0..self.validators.len()).map(|_| Has::Yes).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn all_dont_have(&self) -> Vec<Has> {
|
|
||||||
(0..self.validators.len()).map(|_| Has::Yes).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn all_timeout(&self) -> Vec<Has> {
|
|
||||||
(0..self.validators.len()).map(|_| Has::Timeout).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn test_runtime_api(
|
async fn test_runtime_api(
|
||||||
&self,
|
&self,
|
||||||
virtual_overseer: &mut VirtualOverseer,
|
virtual_overseer: &mut VirtualOverseer,
|
||||||
@@ -216,7 +212,7 @@ impl TestState {
|
|||||||
candidate_hash: CandidateHash,
|
candidate_hash: CandidateHash,
|
||||||
virtual_overseer: &mut VirtualOverseer,
|
virtual_overseer: &mut VirtualOverseer,
|
||||||
n: usize,
|
n: usize,
|
||||||
who_has: &[Has],
|
who_has: impl Fn(usize) -> Has,
|
||||||
) {
|
) {
|
||||||
// arbitrary order.
|
// arbitrary order.
|
||||||
for _ in 0..n {
|
for _ in 0..n {
|
||||||
@@ -237,14 +233,10 @@ impl TestState {
|
|||||||
assert_eq!(req.payload.candidate_hash, candidate_hash);
|
assert_eq!(req.payload.candidate_hash, candidate_hash);
|
||||||
|
|
||||||
let validator_index = req.payload.index.0 as usize;
|
let validator_index = req.payload.index.0 as usize;
|
||||||
let available_data = match who_has[validator_index] {
|
let available_data = match who_has(validator_index) {
|
||||||
Has::No => Ok(None),
|
Has::No => Ok(None),
|
||||||
Has::Yes => Ok(Some(self.chunks[validator_index].clone().into())),
|
Has::Yes => Ok(Some(self.chunks[validator_index].clone().into())),
|
||||||
Has::Timeout => {
|
Has::NetworkError(e) => Err(e),
|
||||||
Err(sc_network::RequestFailure::Network(
|
|
||||||
sc_network::OutboundFailure::Timeout
|
|
||||||
))
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let _ = req.pending_response.send(
|
let _ = req.pending_response.send(
|
||||||
@@ -263,7 +255,7 @@ impl TestState {
|
|||||||
&self,
|
&self,
|
||||||
candidate_hash: CandidateHash,
|
candidate_hash: CandidateHash,
|
||||||
virtual_overseer: &mut VirtualOverseer,
|
virtual_overseer: &mut VirtualOverseer,
|
||||||
who_has: &[Has],
|
who_has: impl Fn(usize) -> Has,
|
||||||
) {
|
) {
|
||||||
for _ in 0..self.validators.len() {
|
for _ in 0..self.validators.len() {
|
||||||
// Receive a request for a chunk.
|
// Receive a request for a chunk.
|
||||||
@@ -286,27 +278,21 @@ impl TestState {
|
|||||||
.position(|a| Recipient::Authority(a.clone()) == req.peer)
|
.position(|a| Recipient::Authority(a.clone()) == req.peer)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let available_data = match who_has[validator_index] {
|
let available_data = match who_has(validator_index) {
|
||||||
Has::No => Ok(None),
|
Has::No => Ok(None),
|
||||||
Has::Yes => Ok(Some(self.available_data.clone())),
|
Has::Yes => Ok(Some(self.available_data.clone())),
|
||||||
Has::Timeout => {
|
Has::NetworkError(e) => Err(e),
|
||||||
Err(sc_network::RequestFailure::Network(
|
|
||||||
sc_network::OutboundFailure::Timeout
|
|
||||||
))
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let done = available_data.as_ref().ok().map_or(false, |x| x.is_some());
|
||||||
|
|
||||||
let _ = req.pending_response.send(
|
let _ = req.pending_response.send(
|
||||||
available_data.map(|r|
|
available_data.map(|r|
|
||||||
req_res::v1::AvailableDataFetchingResponse::from(r).encode()
|
req_res::v1::AvailableDataFetchingResponse::from(r).encode()
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
match who_has[validator_index].clone() {
|
if done { break }
|
||||||
Has::Yes => break, // done
|
|
||||||
Has::No => {}
|
|
||||||
Has::Timeout => {}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -448,7 +434,7 @@ fn availability_is_recovered_from_chunks_if_no_group_provided() {
|
|||||||
candidate_hash,
|
candidate_hash,
|
||||||
&mut virtual_overseer,
|
&mut virtual_overseer,
|
||||||
test_state.threshold(),
|
test_state.threshold(),
|
||||||
&test_state.all_have(),
|
|_| Has::Yes,
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
// Recovered data should match the original one.
|
// Recovered data should match the original one.
|
||||||
@@ -477,7 +463,7 @@ fn availability_is_recovered_from_chunks_if_no_group_provided() {
|
|||||||
new_candidate.hash(),
|
new_candidate.hash(),
|
||||||
&mut virtual_overseer,
|
&mut virtual_overseer,
|
||||||
test_state.impossibility_threshold(),
|
test_state.impossibility_threshold(),
|
||||||
&test_state.all_dont_have(),
|
|_| Has::No,
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
// A request times out with `Unavailable` error.
|
// A request times out with `Unavailable` error.
|
||||||
@@ -524,7 +510,7 @@ fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunk
|
|||||||
candidate_hash,
|
candidate_hash,
|
||||||
&mut virtual_overseer,
|
&mut virtual_overseer,
|
||||||
test_state.threshold(),
|
test_state.threshold(),
|
||||||
&test_state.all_have(),
|
|_| Has::Yes,
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
// Recovered data should match the original one.
|
// Recovered data should match the original one.
|
||||||
@@ -553,7 +539,7 @@ fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunk
|
|||||||
new_candidate.hash(),
|
new_candidate.hash(),
|
||||||
&mut virtual_overseer,
|
&mut virtual_overseer,
|
||||||
test_state.impossibility_threshold(),
|
test_state.impossibility_threshold(),
|
||||||
&test_state.all_dont_have(),
|
|_| Has::No,
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
// A request times out with `Unavailable` error.
|
// A request times out with `Unavailable` error.
|
||||||
@@ -607,7 +593,7 @@ fn bad_merkle_path_leads_to_recovery_error() {
|
|||||||
candidate_hash,
|
candidate_hash,
|
||||||
&mut virtual_overseer,
|
&mut virtual_overseer,
|
||||||
test_state.impossibility_threshold(),
|
test_state.impossibility_threshold(),
|
||||||
&test_state.all_have(),
|
|_| Has::Yes,
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
// A request times out with `Unavailable` error.
|
// A request times out with `Unavailable` error.
|
||||||
@@ -656,14 +642,11 @@ fn wrong_chunk_index_leads_to_recovery_error() {
|
|||||||
test_state.chunks[3] = test_state.chunks[0].clone();
|
test_state.chunks[3] = test_state.chunks[0].clone();
|
||||||
test_state.chunks[4] = test_state.chunks[0].clone();
|
test_state.chunks[4] = test_state.chunks[0].clone();
|
||||||
|
|
||||||
let mut have = test_state.all_dont_have();
|
|
||||||
have[0] = Has::No;
|
|
||||||
|
|
||||||
test_state.test_chunk_requests(
|
test_state.test_chunk_requests(
|
||||||
candidate_hash,
|
candidate_hash,
|
||||||
&mut virtual_overseer,
|
&mut virtual_overseer,
|
||||||
test_state.impossibility_threshold(),
|
test_state.impossibility_threshold(),
|
||||||
&have,
|
|_| Has::No,
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
// A request times out with `Unavailable` error as there are no good peers.
|
// A request times out with `Unavailable` error as there are no good peers.
|
||||||
@@ -726,7 +709,7 @@ fn invalid_erasure_coding_leads_to_invalid_error() {
|
|||||||
candidate_hash,
|
candidate_hash,
|
||||||
&mut virtual_overseer,
|
&mut virtual_overseer,
|
||||||
test_state.threshold(),
|
test_state.threshold(),
|
||||||
&test_state.all_have(),
|
|_| Has::Yes,
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
// f+1 'valid' chunks can't produce correct data.
|
// f+1 'valid' chunks can't produce correct data.
|
||||||
@@ -769,13 +752,15 @@ fn fast_path_backing_group_recovers() {
|
|||||||
|
|
||||||
let candidate_hash = test_state.candidate.hash();
|
let candidate_hash = test_state.candidate.hash();
|
||||||
|
|
||||||
let mut who_has = test_state.all_dont_have();
|
let who_has = |i| match i {
|
||||||
who_has[3] = Has::Yes;
|
3 => Has::Yes,
|
||||||
|
_ => Has::No,
|
||||||
|
};
|
||||||
|
|
||||||
test_state.test_full_data_requests(
|
test_state.test_full_data_requests(
|
||||||
candidate_hash,
|
candidate_hash,
|
||||||
&mut virtual_overseer,
|
&mut virtual_overseer,
|
||||||
&who_has,
|
who_has,
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
// Recovered data should match the original one.
|
// Recovered data should match the original one.
|
||||||
@@ -819,24 +804,124 @@ fn no_answers_in_fast_path_causes_chunk_requests() {
|
|||||||
let candidate_hash = test_state.candidate.hash();
|
let candidate_hash = test_state.candidate.hash();
|
||||||
|
|
||||||
// mix of timeout and no.
|
// mix of timeout and no.
|
||||||
let mut who_has = test_state.all_timeout();
|
let who_has = |i| match i {
|
||||||
who_has[0] = Has::No;
|
0 | 3 => Has::No,
|
||||||
who_has[3] = Has::No;
|
_ => Has::timeout(),
|
||||||
|
};
|
||||||
test_state.test_full_data_requests(
|
test_state.test_full_data_requests(
|
||||||
candidate_hash,
|
candidate_hash,
|
||||||
&mut virtual_overseer,
|
&mut virtual_overseer,
|
||||||
&who_has,
|
who_has,
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
test_state.test_chunk_requests(
|
test_state.test_chunk_requests(
|
||||||
candidate_hash,
|
candidate_hash,
|
||||||
&mut virtual_overseer,
|
&mut virtual_overseer,
|
||||||
test_state.threshold(),
|
test_state.threshold(),
|
||||||
&test_state.all_have(),
|
|_| Has::Yes,
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
// Recovered data should match the original one.
|
// Recovered data should match the original one.
|
||||||
assert_eq!(rx.await.unwrap().unwrap(), test_state.available_data);
|
assert_eq!(rx.await.unwrap().unwrap(), test_state.available_data);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn task_canceled_when_receivers_dropped() {
|
||||||
|
let test_state = TestState::default();
|
||||||
|
|
||||||
|
test_harness_chunks_only(|test_harness| async move {
|
||||||
|
let TestHarness { mut virtual_overseer } = test_harness;
|
||||||
|
|
||||||
|
overseer_signal(
|
||||||
|
&mut virtual_overseer,
|
||||||
|
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
|
||||||
|
activated: smallvec![ActivatedLeaf {
|
||||||
|
hash: test_state.current.clone(),
|
||||||
|
number: 1,
|
||||||
|
span: Arc::new(jaeger::Span::Disabled),
|
||||||
|
}],
|
||||||
|
deactivated: smallvec![],
|
||||||
|
}),
|
||||||
|
).await;
|
||||||
|
|
||||||
|
let (tx, _) = oneshot::channel();
|
||||||
|
|
||||||
|
overseer_send(
|
||||||
|
&mut virtual_overseer,
|
||||||
|
AvailabilityRecoveryMessage::RecoverAvailableData(
|
||||||
|
test_state.candidate.clone(),
|
||||||
|
test_state.session_index,
|
||||||
|
None,
|
||||||
|
tx,
|
||||||
|
)
|
||||||
|
).await;
|
||||||
|
|
||||||
|
test_state.test_runtime_api(&mut virtual_overseer).await;
|
||||||
|
|
||||||
|
for _ in 0..test_state.validators.len() {
|
||||||
|
match virtual_overseer.recv().timeout(TIMEOUT).await {
|
||||||
|
None => return,
|
||||||
|
Some(_) => continue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
panic!("task requested all validators without concluding")
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn chunks_retry_until_all_nodes_respond() {
|
||||||
|
let test_state = TestState::default();
|
||||||
|
|
||||||
|
test_harness_chunks_only(|test_harness| async move {
|
||||||
|
let TestHarness { mut virtual_overseer } = test_harness;
|
||||||
|
|
||||||
|
overseer_signal(
|
||||||
|
&mut virtual_overseer,
|
||||||
|
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
|
||||||
|
activated: smallvec![ActivatedLeaf {
|
||||||
|
hash: test_state.current.clone(),
|
||||||
|
number: 1,
|
||||||
|
span: Arc::new(jaeger::Span::Disabled),
|
||||||
|
}],
|
||||||
|
deactivated: smallvec![],
|
||||||
|
}),
|
||||||
|
).await;
|
||||||
|
|
||||||
|
let (tx, rx) = oneshot::channel();
|
||||||
|
|
||||||
|
overseer_send(
|
||||||
|
&mut virtual_overseer,
|
||||||
|
AvailabilityRecoveryMessage::RecoverAvailableData(
|
||||||
|
test_state.candidate.clone(),
|
||||||
|
test_state.session_index,
|
||||||
|
Some(GroupIndex(0)),
|
||||||
|
tx,
|
||||||
|
)
|
||||||
|
).await;
|
||||||
|
|
||||||
|
test_state.test_runtime_api(&mut virtual_overseer).await;
|
||||||
|
|
||||||
|
let candidate_hash = test_state.candidate.hash();
|
||||||
|
|
||||||
|
test_state.test_chunk_requests(
|
||||||
|
candidate_hash,
|
||||||
|
&mut virtual_overseer,
|
||||||
|
test_state.validators.len(),
|
||||||
|
|_| Has::timeout(),
|
||||||
|
).await;
|
||||||
|
|
||||||
|
// we get to go another round!
|
||||||
|
|
||||||
|
test_state.test_chunk_requests(
|
||||||
|
candidate_hash,
|
||||||
|
&mut virtual_overseer,
|
||||||
|
test_state.impossibility_threshold(),
|
||||||
|
|_| Has::No,
|
||||||
|
).await;
|
||||||
|
|
||||||
|
// Recovered data should match the original one.
|
||||||
|
assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|||||||
+21
-28
@@ -25,31 +25,27 @@ We hold a state which tracks the current recovery interactions we have live, as
|
|||||||
|
|
||||||
```rust
|
```rust
|
||||||
struct State {
|
struct State {
|
||||||
/// Each interaction is implemented as its own async task, and these handles are for communicating with them.
|
/// Each interaction is implemented as its own remote async task, and these handles are remote
|
||||||
interactions: Map<CandidateHash, InteractionHandle>,
|
/// for it.
|
||||||
|
interactions: FuturesUnordered<InteractionHandle>,
|
||||||
|
/// A multiplexer over receivers from live interactions.
|
||||||
|
interaction_receivers: FuturesUnordered<ResponseReceiver<Concluded>>,
|
||||||
/// A recent block hash for which state should be available.
|
/// A recent block hash for which state should be available.
|
||||||
live_block_hash: Hash,
|
live_block_hash: Hash,
|
||||||
|
|
||||||
/// interaction communication. This is cloned and given to interactions that are spun up.
|
|
||||||
from_interaction_tx: Sender<FromInteraction>,
|
|
||||||
/// receiver for messages from interactions.
|
|
||||||
from_interaction_rx: Receiver<FromInteraction>,
|
|
||||||
|
|
||||||
// An LRU cache of recently recovered data.
|
// An LRU cache of recently recovered data.
|
||||||
availability_lru: LruCache<CandidateHash, Result<AvailableData, RecoveryError>>,
|
availability_lru: LruCache<CandidateHash, Result<AvailableData, RecoveryError>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This is a future, which concludes either when a response is received from the interaction,
|
||||||
|
/// or all the `awaiting` channels have closed.
|
||||||
struct InteractionHandle {
|
struct InteractionHandle {
|
||||||
|
candidate_hash: CandidateHash,
|
||||||
|
interaction_response: RemoteHandle<Concluded>,
|
||||||
awaiting: Vec<ResponseChannel<Result<AvailableData, RecoveryError>>>,
|
awaiting: Vec<ResponseChannel<Result<AvailableData, RecoveryError>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Unavailable;
|
struct Unavailable;
|
||||||
enum FromInteraction {
|
struct Concluded(CandidateHash, Result<AvailableData, RecoveryError>);
|
||||||
// An interaction concluded.
|
|
||||||
Concluded(CandidateHash, Result<AvailableData, RecoveryError>),
|
|
||||||
// Send a request on the network.
|
|
||||||
NetworkRequest(Requests),
|
|
||||||
}
|
|
||||||
|
|
||||||
struct InteractionParams {
|
struct InteractionParams {
|
||||||
validator_authority_keys: Vec<AuthorityId>,
|
validator_authority_keys: Vec<AuthorityId>,
|
||||||
@@ -71,12 +67,12 @@ enum InteractionPhase {
|
|||||||
// request the chunk from them.
|
// request the chunk from them.
|
||||||
shuffling: Vec<ValidatorIndex>,
|
shuffling: Vec<ValidatorIndex>,
|
||||||
received_chunks: Map<ValidatorIndex, ErasureChunk>,
|
received_chunks: Map<ValidatorIndex, ErasureChunk>,
|
||||||
requesting_chunks: FuturesUnordered<Receiver<DataResponse<ErasureChunk>>>,
|
requesting_chunks: FuturesUnordered<Receiver<ErasureChunkRequestResponse>>,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Interaction {
|
struct Interaction {
|
||||||
to_state: Sender<FromInteraction>,
|
to_subsystems: SubsystemSender,
|
||||||
params: InteractionParams,
|
params: InteractionParams,
|
||||||
phase: InteractionPhase,
|
phase: InteractionPhase,
|
||||||
}
|
}
|
||||||
@@ -104,10 +100,6 @@ On `Conclude`, shut down the subsystem.
|
|||||||
1. Load the entry from the `interactions` map. It should always exist, if not for logic errors. Send the result to each member of `awaiting`.
|
1. Load the entry from the `interactions` map. It should always exist, if not for logic errors. Send the result to each member of `awaiting`.
|
||||||
1. Add the entry to the availability_lru.
|
1. Add the entry to the availability_lru.
|
||||||
|
|
||||||
#### `FromInteraction::NetworkRequest(requests)`
|
|
||||||
|
|
||||||
1. Forward with `NetworkBridgeMessage::SendRequests`.
|
|
||||||
|
|
||||||
### Interaction logic
|
### Interaction logic
|
||||||
|
|
||||||
#### `launch_interaction(session_index, session_info, candidate_receipt, candidate_hash, Option<backing_group_index>)`
|
#### `launch_interaction(session_index, session_info, candidate_receipt, candidate_hash, Option<backing_group_index>)`
|
||||||
@@ -115,13 +107,13 @@ On `Conclude`, shut down the subsystem.
|
|||||||
1. Compute the threshold from the session info. It should be `f + 1`, where `n = 3f + k`, where `k in {1, 2, 3}`, and `n` is the number of validators.
|
1. Compute the threshold from the session info. It should be `f + 1`, where `n = 3f + k`, where `k in {1, 2, 3}`, and `n` is the number of validators.
|
||||||
1. Set the various fields of `InteractionParams` based on the validator lists in `session_info` and information about the candidate.
|
1. Set the various fields of `InteractionParams` based on the validator lists in `session_info` and information about the candidate.
|
||||||
1. If the `backing_group_index` is `Some`, start in the `RequestFromBackers` phase with a shuffling of the backing group validator indices and a `None` requesting value.
|
1. If the `backing_group_index` is `Some`, start in the `RequestFromBackers` phase with a shuffling of the backing group validator indices and a `None` requesting value.
|
||||||
1. Otherwise, start in the `RequestChunks` phase with `received_chunks` and `requesting_chunks` both empty.
|
1. Otherwise, start in the `RequestChunks` phase with `received_chunks`,`requesting_chunks`, and `next_shuffling` all empty.
|
||||||
1. Set the `to_state` sender to be equal to a clone of `state.from_interaction_tx`.
|
1. Set the `to_subsystems` sender to be equal to a clone of the `SubsystemContext`'s sender.
|
||||||
1. Initialize `received_chunks` to an empty set, as well as `requesting_chunks`.
|
1. Initialize `received_chunks` to an empty set, as well as `requesting_chunks`.
|
||||||
|
|
||||||
Launch the interaction as a background task running `interaction_loop(interaction)`.
|
Launch the interaction as a background task running `interaction_loop(interaction)`.
|
||||||
|
|
||||||
#### `interaction_loop(interaction)`
|
#### `interaction_loop(interaction) -> Result<AvailableData, RecoeryError>`
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
// How many parallel requests to have going at once.
|
// How many parallel requests to have going at once.
|
||||||
@@ -135,13 +127,14 @@ Loop:
|
|||||||
* If the backer is `Some`, issue a `FromInteraction::NetworkRequest` with a network request for the `AvailableData` and wait for the response.
|
* If the backer is `Some`, issue a `FromInteraction::NetworkRequest` with a network request for the `AvailableData` and wait for the response.
|
||||||
* If it concludes with a `None` result, return to beginning.
|
* If it concludes with a `None` result, return to beginning.
|
||||||
* If it concludes with available data, attempt a re-encoding.
|
* If it concludes with available data, attempt a re-encoding.
|
||||||
* If it has the correct erasure-root, break and issue a `Concluded(Ok(available_data))`.
|
* If it has the correct erasure-root, break and issue a `Ok(available_data)`.
|
||||||
* If it has an incorrect erasure-root, issue a `FromInteraction::ReportPeer` message and return to beginning.
|
* If it has an incorrect erasure-root, issue a `FromInteraction::ReportPeer` message and return to beginning.
|
||||||
* If the backer is `None`, set the phase to `InteractionPhase::RequestChunks` with a random shuffling of validators and empty `received_chunks` and `requesting_chunks`.
|
* If the backer is `None`, set the phase to `InteractionPhase::RequestChunks` with a random shuffling of validators and empty `next_shuffling`, `received_chunks`, and `requesting_chunks`.
|
||||||
|
|
||||||
* If the phase is `InteractionPhase::RequestChunks`:
|
* If the phase is `InteractionPhase::RequestChunks`:
|
||||||
* Poll for new updates from `requesting_chunks`. Check merkle proofs of any received chunks, and any failures should lead to issuance of a `FromInteraction::ReportPeer` message.
|
* If `received_chunks + requesting_chunks + shuffling` lengths are less than the threshold, break and return `Err(Unavailable)`.
|
||||||
* If `received_chunks` has more than `threshold` entries, attempt to recover the data. If that fails, or a re-encoding produces an incorrect erasure-root, break and issue a `Concluded(RecoveryError::Invalid)`. If correct, break and issue `Concluded(Ok(available_data))`.
|
* Poll for new updates from `requesting_chunks`. Check merkle proofs of any received chunks. If the request simply fails due to network issues, push onto the back of `shuffling` to be retried.
|
||||||
|
* If `received_chunks` has more than `threshold` entries, attempt to recover the data. If that fails, or a re-encoding produces an incorrect erasure-root, break and issue a `Err(RecoveryError::Invalid)`. If correct, break and issue `Ok(available_data)`.
|
||||||
* While there are fewer than `N_PARALLEL` entries in `requesting_chunks`,
|
* While there are fewer than `N_PARALLEL` entries in `requesting_chunks`,
|
||||||
* Pop the next item from `shuffling`. If it's empty and `requesting_chunks` is empty, break and set the phase to `Concluded(None)`.
|
* Pop the next item from `shuffling`. If it's empty and `requesting_chunks` is empty, return `Err(RecoveryError::Unavailable)`.
|
||||||
* Issue a `FromInteraction::NetworkRequest` and wait for the response in `requesting_chunks`.
|
* Issue a `FromInteraction::NetworkRequest` and wait for the response in `requesting_chunks`.
|
||||||
|
|||||||
Reference in New Issue
Block a user