feat: initialize Kurdistan SDK - independent fork of Polkadot SDK
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
[package]
|
||||
name = "pezkuwi-availability-recovery"
|
||||
description = "The Availability Recovery subsystem. Handles requests for recovering the availability data of included candidates."
|
||||
version = "7.0.0"
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "availability-recovery-regression-bench"
|
||||
path = "benches/availability-recovery-regression-bench.rs"
|
||||
harness = false
|
||||
required-features = ["subsystem-benchmarks"]
|
||||
|
||||
[dependencies]
|
||||
async-trait = { workspace = true }
|
||||
fatality = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
gum = { workspace = true, default-features = true }
|
||||
rand = { workspace = true, default-features = true }
|
||||
schnellru = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true, default-features = true }
|
||||
|
||||
codec = { features = ["derive"], workspace = true }
|
||||
pezkuwi-erasure-coding = { workspace = true, default-features = true }
|
||||
pezkuwi-node-network-protocol = { workspace = true, default-features = true }
|
||||
pezkuwi-node-primitives = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem-util = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives = { workspace = true, default-features = true }
|
||||
sc-network = { workspace = true, default-features = true }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = { workspace = true }
|
||||
futures-timer = { workspace = true }
|
||||
rstest = { workspace = true }
|
||||
|
||||
sp-core = { workspace = true, default-features = true }
|
||||
sp-keyring = { workspace = true, default-features = true }
|
||||
sp-tracing = { workspace = true, default-features = true }
|
||||
|
||||
pezkuwi-node-subsystem-test-helpers = { workspace = true }
|
||||
pezkuwi-primitives-test-helpers = { workspace = true }
|
||||
pezkuwi-subsystem-bench = { workspace = true }
|
||||
|
||||
[features]
|
||||
subsystem-benchmarks = []
|
||||
runtime-benchmarks = [
|
||||
"gum/runtime-benchmarks",
|
||||
"pezkuwi-erasure-coding/runtime-benchmarks",
|
||||
"pezkuwi-node-network-protocol/runtime-benchmarks",
|
||||
"pezkuwi-node-primitives/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-util/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem/runtime-benchmarks",
|
||||
"pezkuwi-primitives-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-primitives/runtime-benchmarks",
|
||||
"pezkuwi-subsystem-bench/runtime-benchmarks",
|
||||
"sc-network/runtime-benchmarks",
|
||||
"sp-keyring/runtime-benchmarks",
|
||||
]
|
||||
+81
@@ -0,0 +1,81 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! availability-read regression tests
|
||||
//!
|
||||
//! Availability read benchmark based on Kusama parameters and scale.
|
||||
//!
|
||||
//! Subsystems involved:
|
||||
//! - availability-recovery
|
||||
|
||||
use pezkuwi_subsystem_bench::{
|
||||
availability::{
|
||||
benchmark_availability_read, prepare_test, DataAvailabilityReadOptions, Strategy,
|
||||
TestDataAvailability, TestState,
|
||||
},
|
||||
configuration::TestConfiguration,
|
||||
usage::BenchmarkUsage,
|
||||
utils::save_to_file,
|
||||
};
|
||||
use std::io::Write;
|
||||
|
||||
const BENCH_COUNT: usize = 10;
|
||||
|
||||
fn main() -> Result<(), String> {
|
||||
let mut messages = vec![];
|
||||
|
||||
let options = DataAvailabilityReadOptions { strategy: Strategy::FullFromBackers };
|
||||
let mut config = TestConfiguration::default();
|
||||
config.num_blocks = 3;
|
||||
config.generate_pov_sizes();
|
||||
|
||||
let state = TestState::new(&config);
|
||||
|
||||
println!("Benchmarking...");
|
||||
let usages: Vec<BenchmarkUsage> = (0..BENCH_COUNT)
|
||||
.map(|n| {
|
||||
print!("\r[{}{}]", "#".repeat(n), "_".repeat(BENCH_COUNT - n));
|
||||
std::io::stdout().flush().unwrap();
|
||||
let (mut env, _cfgs) =
|
||||
prepare_test(&state, TestDataAvailability::Read(options.clone()), false);
|
||||
env.runtime().block_on(benchmark_availability_read(&mut env, &state))
|
||||
})
|
||||
.collect();
|
||||
println!("\rDone!{}", " ".repeat(BENCH_COUNT));
|
||||
|
||||
let average_usage = BenchmarkUsage::average(&usages);
|
||||
save_to_file(
|
||||
"charts/availability-recovery-regression-bench.json",
|
||||
average_usage.to_chart_json().map_err(|e| e.to_string())?,
|
||||
)
|
||||
.map_err(|e| e.to_string())?;
|
||||
println!("{}", average_usage);
|
||||
|
||||
// We expect no variance for received and sent
|
||||
// but use 0.001 because we operate with floats
|
||||
messages.extend(average_usage.check_network_usage(&[
|
||||
("Received from peers", 307203.0000, 0.001),
|
||||
("Sent to peers", 1.6667, 0.001),
|
||||
]));
|
||||
messages.extend(average_usage.check_cpu_usage(&[("availability-recovery", 11.2758, 0.1)]));
|
||||
|
||||
if messages.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
eprintln!("{}", messages.join("\n"));
|
||||
Err("Regressions found".to_string())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! The `Error` and `Result` types used by the subsystem.
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use fatality::{fatality, Nested};
|
||||
use futures::channel::oneshot;
|
||||
use pezkuwi_node_network_protocol::request_response::incoming;
|
||||
use pezkuwi_node_subsystem::{RecoveryError, SubsystemError};
|
||||
use pezkuwi_primitives::Hash;
|
||||
|
||||
/// Error type used by the Availability Recovery subsystem.
|
||||
#[fatality(splitable)]
|
||||
pub enum Error {
|
||||
#[fatal]
|
||||
#[error("Spawning subsystem task failed: {0}")]
|
||||
SpawnTask(#[source] SubsystemError),
|
||||
|
||||
/// Receiving subsystem message from overseer failed.
|
||||
#[fatal]
|
||||
#[error("Receiving message from overseer failed: {0}")]
|
||||
SubsystemReceive(#[source] SubsystemError),
|
||||
|
||||
#[fatal]
|
||||
#[error("failed to query full data from store")]
|
||||
CanceledQueryFullData(#[source] oneshot::Canceled),
|
||||
|
||||
#[error("`SessionInfo` is `None` at {0}")]
|
||||
SessionInfoUnavailable(Hash),
|
||||
|
||||
#[error("failed to query node features from runtime")]
|
||||
RequestNodeFeatures(#[source] pezkuwi_node_subsystem_util::runtime::Error),
|
||||
|
||||
#[error("failed to send response")]
|
||||
CanceledResponseSender,
|
||||
|
||||
#[error(transparent)]
|
||||
Runtime(#[from] pezkuwi_node_subsystem::errors::RuntimeApiError),
|
||||
|
||||
#[error(transparent)]
|
||||
Erasure(#[from] pezkuwi_erasure_coding::Error),
|
||||
|
||||
#[fatal]
|
||||
#[error(transparent)]
|
||||
Oneshot(#[from] oneshot::Canceled),
|
||||
|
||||
#[fatal(forward)]
|
||||
#[error("Error during recovery: {0}")]
|
||||
Recovery(#[from] RecoveryError),
|
||||
|
||||
#[fatal(forward)]
|
||||
#[error("Retrieving next incoming request failed: {0}")]
|
||||
IncomingRequest(#[from] incoming::Error),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error, unless the error is fatal for the entire
|
||||
/// subsystem.
|
||||
pub fn log_error(result: Result<()>) -> std::result::Result<(), FatalError> {
|
||||
match result.into_nested()? {
|
||||
Ok(()) => Ok(()),
|
||||
Err(jfyi) => {
|
||||
jfyi.log();
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
impl JfyiError {
|
||||
/// Log a `JfyiError`.
|
||||
pub fn log(self) {
|
||||
gum::warn!(target: LOG_TARGET, "{}", self);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,236 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! FuturesUndead: A `FuturesUnordered` with support for semi canceled futures. Those undead
|
||||
//! futures will still get polled, but will not count towards length. So length will only count
|
||||
//! futures, which are still considered live.
|
||||
//!
|
||||
//! Use case: If futures take longer than we would like them too, we may be able to request the data
|
||||
//! from somewhere else as well. We don't really want to cancel the old future, because maybe it
|
||||
//! was almost done, thus we would have wasted time with our impatience. By simply making them
|
||||
//! not count towards length, we can make sure to have enough "live" requests ongoing, while at the
|
||||
//! same time taking advantage of some maybe "late" response from the undead.
|
||||
|
||||
use std::{
|
||||
pin::Pin,
|
||||
task::{Context, Poll},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use futures::{future::BoxFuture, stream::FuturesUnordered, Future, Stream, StreamExt};
|
||||
use pezkuwi_node_subsystem_util::TimeoutExt;
|
||||
|
||||
/// FuturesUndead - `FuturesUnordered` with semi canceled (undead) futures.
|
||||
///
|
||||
/// Limitations: Keeps track of undead futures by means of a counter, which is limited to 64
|
||||
/// bits, so after `1.8*10^19` pushed futures, this implementation will panic.
|
||||
pub struct FuturesUndead<Output> {
|
||||
/// Actual `FuturesUnordered`.
|
||||
inner: FuturesUnordered<Undead<Output>>,
|
||||
/// Next sequence number to assign to the next future that gets pushed.
|
||||
next_sequence: SequenceNumber,
|
||||
/// Sequence number of first future considered live.
|
||||
first_live: Option<SequenceNumber>,
|
||||
/// How many undead are there right now.
|
||||
undead: usize,
|
||||
}
|
||||
|
||||
/// All futures get a number, to determine which are live.
|
||||
#[derive(Eq, PartialEq, Copy, Clone, Debug, PartialOrd)]
|
||||
struct SequenceNumber(usize);
|
||||
|
||||
struct Undead<Output> {
|
||||
inner: BoxFuture<'static, Output>,
|
||||
our_sequence: SequenceNumber,
|
||||
}
|
||||
|
||||
impl<Output> FuturesUndead<Output> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
inner: FuturesUnordered::new(),
|
||||
next_sequence: SequenceNumber(0),
|
||||
first_live: None,
|
||||
undead: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push(&mut self, f: BoxFuture<'static, Output>) {
|
||||
self.inner.push(Undead { inner: f, our_sequence: self.next_sequence });
|
||||
self.next_sequence.inc();
|
||||
}
|
||||
|
||||
/// Make all contained futures undead.
|
||||
///
|
||||
/// They will no longer be counted on a call to `len`.
|
||||
pub fn soft_cancel(&mut self) {
|
||||
self.undead = self.inner.len();
|
||||
self.first_live = Some(self.next_sequence);
|
||||
}
|
||||
|
||||
/// Number of contained futures minus undead.
|
||||
pub fn len(&self) -> usize {
|
||||
self.inner.len() - self.undead
|
||||
}
|
||||
|
||||
/// Total number of futures, including undead.
|
||||
pub fn total_len(&self) -> usize {
|
||||
self.inner.len()
|
||||
}
|
||||
|
||||
/// Wait for next future to return with timeout.
|
||||
///
|
||||
/// When timeout passes, return `None` and make all currently contained futures undead.
|
||||
pub async fn next_with_timeout(&mut self, timeout: Duration) -> Option<Output> {
|
||||
match self.next().timeout(timeout).await {
|
||||
// Timeout:
|
||||
None => {
|
||||
self.soft_cancel();
|
||||
None
|
||||
},
|
||||
Some(inner) => inner,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Output> Stream for FuturesUndead<Output> {
|
||||
type Item = Output;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
match self.inner.poll_next_unpin(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
Poll::Ready(Some((sequence, v))) => {
|
||||
// Cleanup in case we became completely empty:
|
||||
if self.inner.len() == 0 {
|
||||
*self = Self::new();
|
||||
return Poll::Ready(Some(v));
|
||||
}
|
||||
|
||||
let first_live = match self.first_live {
|
||||
None => return Poll::Ready(Some(v)),
|
||||
Some(first_live) => first_live,
|
||||
};
|
||||
// An undead came back:
|
||||
if sequence < first_live {
|
||||
self.undead = self.undead.saturating_sub(1);
|
||||
}
|
||||
Poll::Ready(Some(v))
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SequenceNumber {
|
||||
pub fn inc(&mut self) {
|
||||
self.0 = self.0.checked_add(1).expect(
|
||||
"We don't expect an `UndeadFuture` to live long enough for 2^64 entries ever getting inserted."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Future for Undead<T> {
|
||||
type Output = (SequenceNumber, T);
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
match self.inner.as_mut().poll(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(v) => Poll::Ready((self.our_sequence, v)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use futures::{executor, pending, FutureExt};
|
||||
|
||||
#[test]
|
||||
fn cancel_sets_len_to_zero() {
|
||||
let mut undead = FuturesUndead::new();
|
||||
undead.push((async { () }).boxed());
|
||||
assert_eq!(undead.len(), 1);
|
||||
undead.soft_cancel();
|
||||
assert_eq!(undead.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn finished_undead_does_not_change_len() {
|
||||
executor::block_on(async {
|
||||
let mut undead = FuturesUndead::new();
|
||||
undead.push(async { 1_i32 }.boxed());
|
||||
undead.push(async { 2_i32 }.boxed());
|
||||
assert_eq!(undead.len(), 2);
|
||||
undead.soft_cancel();
|
||||
assert_eq!(undead.len(), 0);
|
||||
undead.push(
|
||||
async {
|
||||
pending!();
|
||||
0_i32
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
undead.next().await;
|
||||
assert_eq!(undead.len(), 1);
|
||||
undead.push(async { 9_i32 }.boxed());
|
||||
undead.soft_cancel();
|
||||
assert_eq!(undead.len(), 0);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn len_stays_correct_when_live_future_ends() {
|
||||
executor::block_on(async {
|
||||
let mut undead = FuturesUndead::new();
|
||||
undead.push(
|
||||
async {
|
||||
pending!();
|
||||
1_i32
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
undead.push(
|
||||
async {
|
||||
pending!();
|
||||
2_i32
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
assert_eq!(undead.len(), 2);
|
||||
undead.soft_cancel();
|
||||
assert_eq!(undead.len(), 0);
|
||||
undead.push(async { 0_i32 }.boxed());
|
||||
undead.push(async { 1_i32 }.boxed());
|
||||
undead.next().await;
|
||||
assert_eq!(undead.len(), 1);
|
||||
undead.next().await;
|
||||
assert_eq!(undead.len(), 0);
|
||||
undead.push(async { 9_i32 }.boxed());
|
||||
assert_eq!(undead.len(), 1);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cleanup_works() {
|
||||
executor::block_on(async {
|
||||
let mut undead = FuturesUndead::new();
|
||||
undead.push(async { 1_i32 }.boxed());
|
||||
undead.soft_cancel();
|
||||
undead.push(async { 2_i32 }.boxed());
|
||||
undead.next().await;
|
||||
undead.next().await;
|
||||
assert_eq!(undead.first_live, None);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,925 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Availability Recovery Subsystem of Pezkuwi.
|
||||
|
||||
#![warn(missing_docs)]
|
||||
|
||||
use std::{
|
||||
collections::{BTreeMap, VecDeque},
|
||||
iter::Iterator,
|
||||
num::NonZeroUsize,
|
||||
pin::Pin,
|
||||
};
|
||||
|
||||
use futures::{
|
||||
channel::oneshot,
|
||||
future::{Future, FutureExt, RemoteHandle},
|
||||
pin_mut,
|
||||
prelude::*,
|
||||
sink::SinkExt,
|
||||
stream::{FuturesUnordered, StreamExt},
|
||||
task::{Context, Poll},
|
||||
};
|
||||
use sc_network::ProtocolName;
|
||||
use schnellru::{ByLength, LruMap};
|
||||
use task::{
|
||||
FetchChunks, FetchChunksParams, FetchFull, FetchFullParams, FetchSystematicChunks,
|
||||
FetchSystematicChunksParams,
|
||||
};
|
||||
|
||||
use pezkuwi_erasure_coding::{
|
||||
branches, obtain_chunks_v1, recovery_threshold, systematic_recovery_threshold,
|
||||
Error as ErasureEncodingError,
|
||||
};
|
||||
use task::{RecoveryParams, RecoveryStrategy, RecoveryTask};
|
||||
|
||||
use error::{log_error, Error, FatalError, Result};
|
||||
use pezkuwi_node_network_protocol::{
|
||||
request_response::{
|
||||
v1 as request_v1, v2 as request_v2, IncomingRequestReceiver, IsRequest, ReqProtocolNames,
|
||||
},
|
||||
UnifiedReputationChange as Rep,
|
||||
};
|
||||
use pezkuwi_node_primitives::AvailableData;
|
||||
use pezkuwi_node_subsystem::{
|
||||
errors::RecoveryError,
|
||||
messages::{AvailabilityRecoveryMessage, AvailabilityStoreMessage},
|
||||
overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem,
|
||||
SubsystemContext, SubsystemError,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util::{
|
||||
availability_chunks::availability_chunk_indices,
|
||||
runtime::{ExtendedSessionInfo, RuntimeInfo},
|
||||
};
|
||||
use pezkuwi_primitives::{
|
||||
node_features, BlockNumber, CandidateHash, CandidateReceiptV2 as CandidateReceipt, ChunkIndex,
|
||||
CoreIndex, GroupIndex, Hash, SessionIndex, ValidatorIndex,
|
||||
};
|
||||
|
||||
mod error;
|
||||
mod futures_undead;
|
||||
mod metrics;
|
||||
mod task;
|
||||
pub use metrics::Metrics;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
type RecoveryResult = std::result::Result<AvailableData, RecoveryError>;
|
||||
|
||||
const LOG_TARGET: &str = "teyrchain::availability-recovery";
|
||||
|
||||
// Size of the LRU cache where we keep recovered data.
|
||||
const LRU_SIZE: u32 = 16;
|
||||
|
||||
const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Peer sent unparsable request");
|
||||
|
||||
/// PoV size limit in bytes for which prefer fetching from backers. (conservative, Pezkuwi for now)
|
||||
pub(crate) const CONSERVATIVE_FETCH_CHUNKS_THRESHOLD: usize = 1 * 1024 * 1024;
|
||||
/// PoV size limit in bytes for which prefer fetching from backers. (Kusama and all testnets)
|
||||
pub const FETCH_CHUNKS_THRESHOLD: usize = 4 * 1024 * 1024;
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
/// The strategy we use to recover the PoV.
|
||||
pub enum RecoveryStrategyKind {
|
||||
/// We try the backing group first if PoV size is lower than specified, then fallback to
|
||||
/// validator chunks.
|
||||
BackersFirstIfSizeLower(usize),
|
||||
/// We try the backing group first if PoV size is lower than specified, then fallback to
|
||||
/// systematic chunks. Regular chunk recovery as a last resort.
|
||||
BackersFirstIfSizeLowerThenSystematicChunks(usize),
|
||||
|
||||
/// The following variants are only helpful for integration tests.
|
||||
///
|
||||
/// We always try the backing group first, then fallback to validator chunks.
|
||||
#[allow(dead_code)]
|
||||
BackersFirstAlways,
|
||||
/// We always recover using validator chunks.
|
||||
#[allow(dead_code)]
|
||||
ChunksAlways,
|
||||
/// First try the backing group. Then systematic chunks.
|
||||
#[allow(dead_code)]
|
||||
BackersThenSystematicChunks,
|
||||
/// Always recover using systematic chunks, fall back to regular chunks.
|
||||
#[allow(dead_code)]
|
||||
SystematicChunks,
|
||||
}
|
||||
|
||||
/// The Availability Recovery Subsystem.
|
||||
pub struct AvailabilityRecoverySubsystem {
|
||||
/// PoV recovery strategy to use.
|
||||
recovery_strategy_kind: RecoveryStrategyKind,
|
||||
// If this is true, do not request data from the availability store.
|
||||
/// This is the useful for nodes where the
|
||||
/// availability-store subsystem is not expected to run,
|
||||
/// such as collators.
|
||||
bypass_availability_store: bool,
|
||||
/// Receiver for available data requests.
|
||||
req_receiver: IncomingRequestReceiver<request_v1::AvailableDataFetchingRequest>,
|
||||
/// Metrics for this subsystem.
|
||||
metrics: Metrics,
|
||||
/// The type of check to perform after available data was recovered.
|
||||
post_recovery_check: PostRecoveryCheck,
|
||||
/// Full protocol name for ChunkFetchingV1.
|
||||
req_v1_protocol_name: ProtocolName,
|
||||
/// Full protocol name for ChunkFetchingV2.
|
||||
req_v2_protocol_name: ProtocolName,
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Debug)]
|
||||
/// The type of check to perform after available data was recovered.
|
||||
enum PostRecoveryCheck {
|
||||
/// Reencode the data and check erasure root. For validators.
|
||||
Reencode,
|
||||
/// Only check the pov hash. For collators only.
|
||||
PovHash,
|
||||
}
|
||||
|
||||
/// Expensive erasure coding computations that we want to run on a blocking thread.
|
||||
enum ErasureTask {
|
||||
/// Reconstructs `AvailableData` from chunks given `n_validators`.
|
||||
Reconstruct(
|
||||
usize,
|
||||
BTreeMap<ChunkIndex, Vec<u8>>,
|
||||
oneshot::Sender<std::result::Result<AvailableData, ErasureEncodingError>>,
|
||||
),
|
||||
/// Re-encode `AvailableData` into erasure chunks in order to verify the provided root hash of
|
||||
/// the Merkle tree.
|
||||
Reencode(usize, Hash, AvailableData, oneshot::Sender<Option<AvailableData>>),
|
||||
}
|
||||
|
||||
/// Re-encode the data into erasure chunks in order to verify
|
||||
/// the root hash of the provided Merkle tree, which is built
|
||||
/// on-top of the encoded chunks.
|
||||
///
|
||||
/// This (expensive) check is necessary, as otherwise we can't be sure that some chunks won't have
|
||||
/// been tampered with by the backers, which would result in some validators considering the data
|
||||
/// valid and some invalid as having fetched different set of chunks. The checking of the Merkle
|
||||
/// proof for individual chunks only gives us guarantees, that we have fetched a chunk belonging to
|
||||
/// a set the backers have committed to.
|
||||
///
|
||||
/// NOTE: It is fine to do this check with already decoded data, because if the decoding failed for
|
||||
/// some validators, we can be sure that chunks have been tampered with (by the backers) or the
|
||||
/// data was invalid to begin with. In the former case, validators fetching valid chunks will see
|
||||
/// invalid data as well, because the root won't match. In the latter case the situation is the
|
||||
/// same for anyone anyways.
|
||||
fn reconstructed_data_matches_root(
|
||||
n_validators: usize,
|
||||
expected_root: &Hash,
|
||||
data: &AvailableData,
|
||||
metrics: &Metrics,
|
||||
) -> bool {
|
||||
let _timer = metrics.time_reencode_chunks();
|
||||
|
||||
let chunks = match obtain_chunks_v1(n_validators, data) {
|
||||
Ok(chunks) => chunks,
|
||||
Err(e) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
err = ?e,
|
||||
"Failed to obtain chunks",
|
||||
);
|
||||
return false;
|
||||
},
|
||||
};
|
||||
|
||||
let branches = branches(&chunks);
|
||||
|
||||
branches.root() == *expected_root
|
||||
}
|
||||
|
||||
/// Accumulate all awaiting sides for some particular `AvailableData`.
|
||||
struct RecoveryHandle {
|
||||
candidate_hash: CandidateHash,
|
||||
remote: RemoteHandle<RecoveryResult>,
|
||||
awaiting: Vec<oneshot::Sender<RecoveryResult>>,
|
||||
}
|
||||
|
||||
impl Future for RecoveryHandle {
|
||||
type Output = Option<(CandidateHash, RecoveryResult)>;
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
let mut indices_to_remove = Vec::new();
|
||||
for (i, awaiting) in self.awaiting.iter_mut().enumerate().rev() {
|
||||
if let Poll::Ready(()) = awaiting.poll_canceled(cx) {
|
||||
indices_to_remove.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
// these are reverse order, so remove is fine.
|
||||
for index in indices_to_remove {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.candidate_hash,
|
||||
"Receiver for available data dropped.",
|
||||
);
|
||||
|
||||
self.awaiting.swap_remove(index);
|
||||
}
|
||||
|
||||
if self.awaiting.is_empty() {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.candidate_hash,
|
||||
"All receivers for available data dropped.",
|
||||
);
|
||||
|
||||
return Poll::Ready(None);
|
||||
}
|
||||
|
||||
let remote = &mut self.remote;
|
||||
futures::pin_mut!(remote);
|
||||
let result = futures::ready!(remote.poll(cx));
|
||||
|
||||
for awaiting in self.awaiting.drain(..) {
|
||||
let _ = awaiting.send(result.clone());
|
||||
}
|
||||
|
||||
Poll::Ready(Some((self.candidate_hash, result)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Cached result of an availability recovery operation.
|
||||
#[derive(Debug, Clone)]
|
||||
enum CachedRecovery {
|
||||
/// Availability was successfully retrieved before.
|
||||
Valid(AvailableData),
|
||||
/// Availability was successfully retrieved before, but was found to be invalid.
|
||||
Invalid,
|
||||
}
|
||||
|
||||
impl CachedRecovery {
|
||||
/// Convert back to `Result` to deliver responses.
|
||||
fn into_result(self) -> RecoveryResult {
|
||||
match self {
|
||||
Self::Valid(d) => Ok(d),
|
||||
Self::Invalid => Err(RecoveryError::Invalid),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<RecoveryResult> for CachedRecovery {
|
||||
type Error = ();
|
||||
fn try_from(o: RecoveryResult) -> std::result::Result<CachedRecovery, Self::Error> {
|
||||
match o {
|
||||
Ok(d) => Ok(Self::Valid(d)),
|
||||
Err(RecoveryError::Invalid) => Ok(Self::Invalid),
|
||||
// We don't want to cache unavailable state, as that state might change, so if
|
||||
// requested again we want to try again!
|
||||
Err(RecoveryError::Unavailable) => Err(()),
|
||||
Err(RecoveryError::ChannelClosed) => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct State {
|
||||
/// Each recovery task is implemented as its own async task,
|
||||
/// and these handles are for communicating with them.
|
||||
ongoing_recoveries: FuturesUnordered<RecoveryHandle>,
|
||||
|
||||
/// A recent block hash for which state should be available.
|
||||
live_block: (BlockNumber, Hash),
|
||||
|
||||
/// An LRU cache of recently recovered data.
|
||||
availability_lru: LruMap<CandidateHash, CachedRecovery>,
|
||||
|
||||
/// Cached runtime info.
|
||||
runtime_info: RuntimeInfo,
|
||||
}
|
||||
|
||||
impl Default for State {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ongoing_recoveries: FuturesUnordered::new(),
|
||||
live_block: (0, Hash::default()),
|
||||
availability_lru: LruMap::new(ByLength::new(LRU_SIZE)),
|
||||
runtime_info: RuntimeInfo::new(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::subsystem(AvailabilityRecovery, error=SubsystemError, prefix=self::overseer)]
|
||||
impl<Context> AvailabilityRecoverySubsystem {
|
||||
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
||||
let future = self
|
||||
.run(ctx)
|
||||
.map_err(|e| SubsystemError::with_origin("availability-recovery", e))
|
||||
.boxed();
|
||||
SpawnedSubsystem { name: "availability-recovery-subsystem", future }
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles a signal from the overseer.
|
||||
/// Returns true if subsystem receives a deadly signal.
|
||||
async fn handle_signal(state: &mut State, signal: OverseerSignal) -> bool {
|
||||
match signal {
|
||||
OverseerSignal::Conclude => true,
|
||||
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { activated, .. }) => {
|
||||
// if activated is non-empty, set state.live_block to the highest block in `activated`
|
||||
if let Some(activated) = activated {
|
||||
if activated.number > state.live_block.0 {
|
||||
state.live_block = (activated.number, activated.hash)
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
},
|
||||
OverseerSignal::BlockFinalized(_, _) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Machinery around launching recovery tasks into the background.
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
async fn launch_recovery_task<Context>(
|
||||
state: &mut State,
|
||||
ctx: &mut Context,
|
||||
response_sender: oneshot::Sender<RecoveryResult>,
|
||||
recovery_strategies: VecDeque<Box<dyn RecoveryStrategy<<Context as SubsystemContext>::Sender>>>,
|
||||
params: RecoveryParams,
|
||||
) -> Result<()> {
|
||||
let candidate_hash = params.candidate_hash;
|
||||
let recovery_task = RecoveryTask::new(ctx.sender().clone(), params, recovery_strategies);
|
||||
|
||||
let (remote, remote_handle) = recovery_task.run().remote_handle();
|
||||
|
||||
state.ongoing_recoveries.push(RecoveryHandle {
|
||||
candidate_hash,
|
||||
remote: remote_handle,
|
||||
awaiting: vec![response_sender],
|
||||
});
|
||||
|
||||
ctx.spawn("recovery-task", Box::pin(remote))
|
||||
.map_err(|err| Error::SpawnTask(err))
|
||||
}
|
||||
|
||||
/// Handles an availability recovery request.
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
async fn handle_recover<Context>(
|
||||
state: &mut State,
|
||||
ctx: &mut Context,
|
||||
receipt: CandidateReceipt,
|
||||
session_index: SessionIndex,
|
||||
backing_group: Option<GroupIndex>,
|
||||
response_sender: oneshot::Sender<RecoveryResult>,
|
||||
metrics: &Metrics,
|
||||
erasure_task_tx: futures::channel::mpsc::Sender<ErasureTask>,
|
||||
recovery_strategy_kind: RecoveryStrategyKind,
|
||||
bypass_availability_store: bool,
|
||||
post_recovery_check: PostRecoveryCheck,
|
||||
maybe_core_index: Option<CoreIndex>,
|
||||
req_v1_protocol_name: ProtocolName,
|
||||
req_v2_protocol_name: ProtocolName,
|
||||
) -> Result<()> {
|
||||
let candidate_hash = receipt.hash();
|
||||
|
||||
if let Some(result) =
|
||||
state.availability_lru.get(&candidate_hash).cloned().map(|v| v.into_result())
|
||||
{
|
||||
return response_sender.send(result).map_err(|_| Error::CanceledResponseSender);
|
||||
}
|
||||
|
||||
if let Some(i) =
|
||||
state.ongoing_recoveries.iter_mut().find(|i| i.candidate_hash == candidate_hash)
|
||||
{
|
||||
i.awaiting.push(response_sender);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let session_info_res = state
|
||||
.runtime_info
|
||||
.get_session_info_by_index(ctx.sender(), state.live_block.1, session_index)
|
||||
.await;
|
||||
|
||||
match session_info_res {
|
||||
Ok(ExtendedSessionInfo { session_info, node_features, .. }) => {
|
||||
let mut backer_group = None;
|
||||
let n_validators = session_info.validators.len();
|
||||
let systematic_threshold = systematic_recovery_threshold(n_validators)?;
|
||||
let mut recovery_strategies: VecDeque<
|
||||
Box<dyn RecoveryStrategy<<Context as SubsystemContext>::Sender>>,
|
||||
> = VecDeque::with_capacity(3);
|
||||
|
||||
if let Some(backing_group) = backing_group {
|
||||
if let Some(backing_validators) = session_info.validator_groups.get(backing_group) {
|
||||
let mut small_pov_size = true;
|
||||
|
||||
match recovery_strategy_kind {
|
||||
RecoveryStrategyKind::BackersFirstIfSizeLower(fetch_chunks_threshold) |
|
||||
RecoveryStrategyKind::BackersFirstIfSizeLowerThenSystematicChunks(
|
||||
fetch_chunks_threshold,
|
||||
) => {
|
||||
// Get our own chunk size to get an estimate of the PoV size.
|
||||
let chunk_size: Result<Option<usize>> =
|
||||
query_chunk_size(ctx, candidate_hash).await;
|
||||
if let Ok(Some(chunk_size)) = chunk_size {
|
||||
let pov_size_estimate = chunk_size * systematic_threshold;
|
||||
small_pov_size = pov_size_estimate < fetch_chunks_threshold;
|
||||
|
||||
if small_pov_size {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?candidate_hash,
|
||||
pov_size_estimate,
|
||||
fetch_chunks_threshold,
|
||||
"Prefer fetch from backing group",
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// we have a POV limit but were not able to query the chunk size, so
|
||||
// don't use the backing group.
|
||||
small_pov_size = false;
|
||||
}
|
||||
},
|
||||
_ => {},
|
||||
};
|
||||
|
||||
match (&recovery_strategy_kind, small_pov_size) {
|
||||
(RecoveryStrategyKind::BackersFirstAlways, _) |
|
||||
(RecoveryStrategyKind::BackersFirstIfSizeLower(_), true) |
|
||||
(
|
||||
RecoveryStrategyKind::BackersFirstIfSizeLowerThenSystematicChunks(_),
|
||||
true,
|
||||
) |
|
||||
(RecoveryStrategyKind::BackersThenSystematicChunks, _) =>
|
||||
recovery_strategies.push_back(Box::new(FetchFull::new(
|
||||
FetchFullParams { validators: backing_validators.to_vec() },
|
||||
))),
|
||||
_ => {},
|
||||
};
|
||||
|
||||
backer_group = Some(backing_validators);
|
||||
}
|
||||
}
|
||||
|
||||
let chunk_mapping_enabled = if let Some(&true) = node_features
|
||||
.get(usize::from(node_features::FeatureIndex::AvailabilityChunkMapping as u8))
|
||||
.as_deref()
|
||||
{
|
||||
true
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
// We can only attempt systematic recovery if we received the core index of the
|
||||
// candidate and chunk mapping is enabled.
|
||||
if let Some(core_index) = maybe_core_index {
|
||||
if matches!(
|
||||
recovery_strategy_kind,
|
||||
RecoveryStrategyKind::BackersThenSystematicChunks |
|
||||
RecoveryStrategyKind::SystematicChunks |
|
||||
RecoveryStrategyKind::BackersFirstIfSizeLowerThenSystematicChunks(_)
|
||||
) && chunk_mapping_enabled
|
||||
{
|
||||
let chunk_indices =
|
||||
availability_chunk_indices(node_features, n_validators, core_index)?;
|
||||
|
||||
let chunk_indices: VecDeque<_> = chunk_indices
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(v_index, c_index)| {
|
||||
(
|
||||
*c_index,
|
||||
ValidatorIndex(
|
||||
u32::try_from(v_index)
|
||||
.expect("validator count should not exceed u32"),
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Only get the validators according to the threshold.
|
||||
let validators = chunk_indices
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|(c_index, _)| {
|
||||
usize::try_from(c_index.0)
|
||||
.expect("usize is at least u32 bytes on all modern targets.") <
|
||||
systematic_threshold
|
||||
})
|
||||
.collect();
|
||||
|
||||
recovery_strategies.push_back(Box::new(FetchSystematicChunks::new(
|
||||
FetchSystematicChunksParams {
|
||||
validators,
|
||||
backers: backer_group.map(|v| v.to_vec()).unwrap_or_else(|| vec![]),
|
||||
},
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
recovery_strategies.push_back(Box::new(FetchChunks::new(FetchChunksParams {
|
||||
n_validators: session_info.validators.len(),
|
||||
})));
|
||||
|
||||
let session_info = session_info.clone();
|
||||
|
||||
let n_validators = session_info.validators.len();
|
||||
|
||||
launch_recovery_task(
|
||||
state,
|
||||
ctx,
|
||||
response_sender,
|
||||
recovery_strategies,
|
||||
RecoveryParams {
|
||||
validator_authority_keys: session_info.discovery_keys.clone(),
|
||||
n_validators,
|
||||
threshold: recovery_threshold(n_validators)?,
|
||||
systematic_threshold,
|
||||
candidate_hash,
|
||||
erasure_root: receipt.descriptor.erasure_root(),
|
||||
metrics: metrics.clone(),
|
||||
bypass_availability_store,
|
||||
post_recovery_check,
|
||||
pov_hash: receipt.descriptor.pov_hash(),
|
||||
req_v1_protocol_name,
|
||||
req_v2_protocol_name,
|
||||
chunk_mapping_enabled,
|
||||
erasure_task_tx,
|
||||
},
|
||||
)
|
||||
.await
|
||||
},
|
||||
Err(_) => {
|
||||
response_sender
|
||||
.send(Err(RecoveryError::Unavailable))
|
||||
.map_err(|_| Error::CanceledResponseSender)?;
|
||||
|
||||
Err(Error::SessionInfoUnavailable(state.live_block.1))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Queries the full `AvailableData` from av-store.
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
async fn query_full_data<Context>(
|
||||
ctx: &mut Context,
|
||||
candidate_hash: CandidateHash,
|
||||
) -> Result<Option<AvailableData>> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
ctx.send_message(AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx))
|
||||
.await;
|
||||
|
||||
rx.await.map_err(Error::CanceledQueryFullData)
|
||||
}
|
||||
|
||||
/// Queries a chunk from av-store.
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
async fn query_chunk_size<Context>(
|
||||
ctx: &mut Context,
|
||||
candidate_hash: CandidateHash,
|
||||
) -> Result<Option<usize>> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
ctx.send_message(AvailabilityStoreMessage::QueryChunkSize(candidate_hash, tx))
|
||||
.await;
|
||||
|
||||
rx.await.map_err(Error::CanceledQueryFullData)
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
impl AvailabilityRecoverySubsystem {
|
||||
/// Create a new instance of `AvailabilityRecoverySubsystem` suitable for collator nodes,
|
||||
/// which never requests the `AvailabilityStoreSubsystem` subsystem and only checks the POV hash
|
||||
/// instead of reencoding the available data.
|
||||
pub fn for_collator(
|
||||
fetch_chunks_threshold: Option<usize>,
|
||||
req_receiver: IncomingRequestReceiver<request_v1::AvailableDataFetchingRequest>,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
metrics: Metrics,
|
||||
) -> Self {
|
||||
Self {
|
||||
recovery_strategy_kind: RecoveryStrategyKind::BackersFirstIfSizeLower(
|
||||
fetch_chunks_threshold.unwrap_or(CONSERVATIVE_FETCH_CHUNKS_THRESHOLD),
|
||||
),
|
||||
bypass_availability_store: true,
|
||||
post_recovery_check: PostRecoveryCheck::PovHash,
|
||||
req_receiver,
|
||||
metrics,
|
||||
req_v1_protocol_name: req_protocol_names
|
||||
.get_name(request_v1::ChunkFetchingRequest::PROTOCOL),
|
||||
req_v2_protocol_name: req_protocol_names
|
||||
.get_name(request_v2::ChunkFetchingRequest::PROTOCOL),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an optimised new instance of `AvailabilityRecoverySubsystem` suitable for validator
|
||||
/// nodes, which:
|
||||
/// - for small POVs (over the `fetch_chunks_threshold` or the
|
||||
/// `CONSERVATIVE_FETCH_CHUNKS_THRESHOLD`), it attempts full recovery from backers, if backing
|
||||
/// group supplied.
|
||||
/// - for large POVs, attempts systematic recovery, if core_index supplied and
|
||||
/// AvailabilityChunkMapping node feature is enabled.
|
||||
/// - as a last resort, attempt regular chunk recovery from all validators.
|
||||
pub fn for_validator(
|
||||
fetch_chunks_threshold: Option<usize>,
|
||||
req_receiver: IncomingRequestReceiver<request_v1::AvailableDataFetchingRequest>,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
metrics: Metrics,
|
||||
) -> Self {
|
||||
Self {
|
||||
recovery_strategy_kind:
|
||||
RecoveryStrategyKind::BackersFirstIfSizeLowerThenSystematicChunks(
|
||||
fetch_chunks_threshold.unwrap_or(CONSERVATIVE_FETCH_CHUNKS_THRESHOLD),
|
||||
),
|
||||
bypass_availability_store: false,
|
||||
post_recovery_check: PostRecoveryCheck::Reencode,
|
||||
req_receiver,
|
||||
metrics,
|
||||
req_v1_protocol_name: req_protocol_names
|
||||
.get_name(request_v1::ChunkFetchingRequest::PROTOCOL),
|
||||
req_v2_protocol_name: req_protocol_names
|
||||
.get_name(request_v2::ChunkFetchingRequest::PROTOCOL),
|
||||
}
|
||||
}
|
||||
|
||||
/// Customise the recovery strategy kind
|
||||
/// Currently only useful for tests.
|
||||
#[cfg(any(test, feature = "subsystem-benchmarks"))]
|
||||
pub fn with_recovery_strategy_kind(
|
||||
req_receiver: IncomingRequestReceiver<request_v1::AvailableDataFetchingRequest>,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
metrics: Metrics,
|
||||
recovery_strategy_kind: RecoveryStrategyKind,
|
||||
) -> Self {
|
||||
Self {
|
||||
recovery_strategy_kind,
|
||||
bypass_availability_store: false,
|
||||
post_recovery_check: PostRecoveryCheck::Reencode,
|
||||
req_receiver,
|
||||
metrics,
|
||||
req_v1_protocol_name: req_protocol_names
|
||||
.get_name(request_v1::ChunkFetchingRequest::PROTOCOL),
|
||||
req_v2_protocol_name: req_protocol_names
|
||||
.get_name(request_v2::ChunkFetchingRequest::PROTOCOL),
|
||||
}
|
||||
}
|
||||
|
||||
/// Starts the inner subsystem loop.
|
||||
pub async fn run<Context>(self, mut ctx: Context) -> std::result::Result<(), FatalError> {
|
||||
let mut state = State::default();
|
||||
let Self {
|
||||
mut req_receiver,
|
||||
metrics,
|
||||
recovery_strategy_kind,
|
||||
bypass_availability_store,
|
||||
post_recovery_check,
|
||||
req_v1_protocol_name,
|
||||
req_v2_protocol_name,
|
||||
} = self;
|
||||
|
||||
let (erasure_task_tx, erasure_task_rx) = futures::channel::mpsc::channel(16);
|
||||
let mut erasure_task_rx = erasure_task_rx.fuse();
|
||||
|
||||
// `ThreadPoolBuilder` spawns the tasks using `spawn_blocking`. For each worker there will
|
||||
// be a `mpsc` channel created. Each of these workers take the `Receiver` and poll it in an
|
||||
// infinite loop. All of the sender ends of the channel are sent as a vec which we then use
|
||||
// to create a `Cycle` iterator. We use this iterator to assign work in a round-robin
|
||||
// fashion to the workers in the pool.
|
||||
//
|
||||
// How work is dispatched to the pool from the recovery tasks:
|
||||
// - Once a recovery task finishes retrieving the availability data, it needs to reconstruct
|
||||
// from chunks and/or
|
||||
// re-encode the data which are heavy CPU computations.
|
||||
// To do so it sends an `ErasureTask` to the main loop via the `erasure_task` channel, and
|
||||
// waits for the results over a `oneshot` channel.
|
||||
// - In the subsystem main loop we poll the `erasure_task_rx` receiver.
|
||||
// - We forward the received `ErasureTask` to the `next()` sender yielded by the `Cycle`
|
||||
// iterator.
|
||||
// - Some worker thread handles it and sends the response over the `oneshot` channel.
|
||||
|
||||
// Create a thread pool with 2 workers.
|
||||
let mut to_pool = ThreadPoolBuilder::build(
|
||||
// Pool is guaranteed to have at least 1 worker thread.
|
||||
NonZeroUsize::new(2).expect("There are 2 threads; qed"),
|
||||
metrics.clone(),
|
||||
&mut ctx,
|
||||
)
|
||||
.into_iter()
|
||||
.cycle();
|
||||
|
||||
loop {
|
||||
let recv_req = req_receiver.recv(|| vec![COST_INVALID_REQUEST]).fuse();
|
||||
pin_mut!(recv_req);
|
||||
let res = futures::select! {
|
||||
erasure_task = erasure_task_rx.next() => {
|
||||
match erasure_task {
|
||||
Some(task) => {
|
||||
to_pool
|
||||
.next()
|
||||
.expect("Pool size is `NonZeroUsize`; qed")
|
||||
.send(task)
|
||||
.await
|
||||
.map_err(|_| RecoveryError::ChannelClosed)
|
||||
},
|
||||
None => {
|
||||
Err(RecoveryError::ChannelClosed)
|
||||
}
|
||||
}.map_err(Into::into)
|
||||
}
|
||||
signal = ctx.recv().fuse() => {
|
||||
match signal {
|
||||
Ok(signal) => {
|
||||
match signal {
|
||||
FromOrchestra::Signal(signal) => if handle_signal(
|
||||
&mut state,
|
||||
signal,
|
||||
).await {
|
||||
gum::debug!(target: LOG_TARGET, "subsystem concluded");
|
||||
return Ok(());
|
||||
} else {
|
||||
Ok(())
|
||||
},
|
||||
FromOrchestra::Communication {
|
||||
msg: AvailabilityRecoveryMessage::RecoverAvailableData(
|
||||
receipt,
|
||||
session_index,
|
||||
maybe_backing_group,
|
||||
maybe_core_index,
|
||||
response_sender,
|
||||
)
|
||||
} => handle_recover(
|
||||
&mut state,
|
||||
&mut ctx,
|
||||
receipt,
|
||||
session_index,
|
||||
maybe_backing_group,
|
||||
response_sender,
|
||||
&metrics,
|
||||
erasure_task_tx.clone(),
|
||||
recovery_strategy_kind.clone(),
|
||||
bypass_availability_store,
|
||||
post_recovery_check.clone(),
|
||||
maybe_core_index,
|
||||
req_v1_protocol_name.clone(),
|
||||
req_v2_protocol_name.clone(),
|
||||
).await
|
||||
}
|
||||
},
|
||||
Err(e) => Err(Error::SubsystemReceive(e))
|
||||
}
|
||||
}
|
||||
in_req = recv_req => {
|
||||
match in_req {
|
||||
Ok(req) => {
|
||||
if bypass_availability_store {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
"Skipping request to availability-store.",
|
||||
);
|
||||
let _ = req.send_response(None.into());
|
||||
Ok(())
|
||||
} else {
|
||||
match query_full_data(&mut ctx, req.payload.candidate_hash).await {
|
||||
Ok(res) => {
|
||||
let _ = req.send_response(res.into());
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = req.send_response(None.into());
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => Err(Error::IncomingRequest(e))
|
||||
}
|
||||
}
|
||||
output = state.ongoing_recoveries.select_next_some() => {
|
||||
let mut res = Ok(());
|
||||
if let Some((candidate_hash, result)) = output {
|
||||
if let Err(ref e) = result {
|
||||
res = Err(Error::Recovery(e.clone()));
|
||||
}
|
||||
|
||||
if let Ok(recovery) = CachedRecovery::try_from(result) {
|
||||
state.availability_lru.insert(candidate_hash, recovery);
|
||||
}
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
};
|
||||
|
||||
// Only bubble up fatal errors, but log all of them.
|
||||
if let Err(e) = res {
|
||||
log_error(Err(e))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A simple thread pool implementation using `spawn_blocking` threads.
|
||||
struct ThreadPoolBuilder;
|
||||
|
||||
const MAX_THREADS: NonZeroUsize = match NonZeroUsize::new(4) {
|
||||
Some(max_threads) => max_threads,
|
||||
None => panic!("MAX_THREADS must be non-zero"),
|
||||
};
|
||||
|
||||
impl ThreadPoolBuilder {
|
||||
// Creates a pool of `size` workers, where 1 <= `size` <= `MAX_THREADS`.
|
||||
//
|
||||
// Each worker is created by `spawn_blocking` and takes the receiver side of a channel
|
||||
// while all of the senders are returned to the caller. Each worker runs `erasure_task_thread`
|
||||
// that polls the `Receiver` for an `ErasureTask` which is expected to be CPU intensive. The
|
||||
// larger the input (more or larger chunks/availability data), the more CPU cycles will be
|
||||
// spent.
|
||||
//
|
||||
// For example, for 32KB PoVs, we'd expect re-encode to eat as much as 90ms and 500ms for
|
||||
// 2.5MiB.
|
||||
//
|
||||
// After executing such a task, the worker sends the response via a provided `oneshot` sender.
|
||||
//
|
||||
// The caller is responsible for routing work to the workers.
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
pub fn build<Context>(
|
||||
size: NonZeroUsize,
|
||||
metrics: Metrics,
|
||||
ctx: &mut Context,
|
||||
) -> Vec<futures::channel::mpsc::Sender<ErasureTask>> {
|
||||
// At least 1 task, at most `MAX_THREADS.
|
||||
let size = std::cmp::min(size, MAX_THREADS);
|
||||
let mut senders = Vec::new();
|
||||
|
||||
for index in 0..size.into() {
|
||||
let (tx, rx) = futures::channel::mpsc::channel(8);
|
||||
senders.push(tx);
|
||||
|
||||
if let Err(e) = ctx
|
||||
.spawn_blocking("erasure-task", Box::pin(erasure_task_thread(metrics.clone(), rx)))
|
||||
{
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
err = ?e,
|
||||
index,
|
||||
"Failed to spawn a erasure task",
|
||||
);
|
||||
}
|
||||
}
|
||||
senders
|
||||
}
|
||||
}
|
||||
|
||||
// Handles CPU intensive operation on a dedicated blocking thread.
|
||||
async fn erasure_task_thread(
|
||||
metrics: Metrics,
|
||||
mut ingress: futures::channel::mpsc::Receiver<ErasureTask>,
|
||||
) {
|
||||
loop {
|
||||
match ingress.next().await {
|
||||
Some(ErasureTask::Reconstruct(n_validators, chunks, sender)) => {
|
||||
let _ = sender.send(pezkuwi_erasure_coding::reconstruct_v1(
|
||||
n_validators,
|
||||
chunks.iter().map(|(c_index, chunk)| {
|
||||
(
|
||||
&chunk[..],
|
||||
usize::try_from(c_index.0)
|
||||
.expect("usize is at least u32 bytes on all modern targets."),
|
||||
)
|
||||
}),
|
||||
));
|
||||
},
|
||||
Some(ErasureTask::Reencode(n_validators, root, available_data, sender)) => {
|
||||
let metrics = metrics.clone();
|
||||
|
||||
let maybe_data = if reconstructed_data_matches_root(
|
||||
n_validators,
|
||||
&root,
|
||||
&available_data,
|
||||
&metrics,
|
||||
) {
|
||||
Some(available_data)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let _ = sender.send(maybe_data);
|
||||
},
|
||||
None => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
"Erasure task channel closed. Node shutting down ?",
|
||||
);
|
||||
break;
|
||||
},
|
||||
}
|
||||
|
||||
// In benchmarks this is a very hot loop not yielding at all.
|
||||
// To update CPU metrics for the task we need to yield.
|
||||
#[cfg(feature = "subsystem-benchmarks")]
|
||||
tokio::task::yield_now().await;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,409 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_subsystem::prometheus::HistogramVec;
|
||||
use pezkuwi_node_subsystem_util::metrics::{
|
||||
self,
|
||||
prometheus::{
|
||||
self, prometheus::HistogramTimer, Counter, CounterVec, Histogram, Opts, PrometheusError,
|
||||
Registry, U64,
|
||||
},
|
||||
};
|
||||
|
||||
/// Availability Distribution metrics.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
/// Number of sent chunk requests.
|
||||
///
|
||||
/// Gets incremented on each sent chunk requests.
|
||||
///
|
||||
/// Split by chunk type:
|
||||
/// - `regular_chunks`
|
||||
/// - `systematic_chunks`
|
||||
chunk_requests_issued: CounterVec<U64>,
|
||||
|
||||
/// Total number of bytes recovered
|
||||
///
|
||||
/// Gets incremented on each successful recovery
|
||||
recovered_bytes_total: Counter<U64>,
|
||||
|
||||
/// A counter for finished chunk requests.
|
||||
///
|
||||
/// Split by the chunk type (`regular_chunks` or `systematic_chunks`)
|
||||
///
|
||||
/// Also split by result:
|
||||
/// - `no_such_chunk` ... peer did not have the requested chunk
|
||||
/// - `timeout` ... request timed out.
|
||||
/// - `error` ... Some networking issue except timeout
|
||||
/// - `invalid` ... Chunk was received, but not valid.
|
||||
/// - `success`
|
||||
chunk_requests_finished: CounterVec<U64>,
|
||||
|
||||
/// A counter for successful chunk requests, split by the network protocol version.
|
||||
chunk_request_protocols: CounterVec<U64>,
|
||||
|
||||
/// Number of sent available data requests.
|
||||
full_data_requests_issued: Counter<U64>,
|
||||
|
||||
/// Counter for finished available data requests.
|
||||
///
|
||||
/// Split by the result type:
|
||||
///
|
||||
/// - `no_such_data` ... peer did not have the requested data
|
||||
/// - `timeout` ... request timed out.
|
||||
/// - `error` ... Some networking issue except timeout
|
||||
/// - `invalid` ... data was received, but not valid.
|
||||
/// - `success`
|
||||
full_data_requests_finished: CounterVec<U64>,
|
||||
|
||||
/// The duration of request to response.
|
||||
///
|
||||
/// Split by chunk type (`regular_chunks` or `systematic_chunks`).
|
||||
time_chunk_request: HistogramVec,
|
||||
|
||||
/// The duration between the pure recovery and verification.
|
||||
///
|
||||
/// Split by recovery type (`regular_chunks`, `systematic_chunks` or `full_from_backers`).
|
||||
time_erasure_recovery: HistogramVec,
|
||||
|
||||
/// How much time it takes to reconstruct the available data from chunks.
|
||||
///
|
||||
/// Split by chunk type (`regular_chunks` or `systematic_chunks`), as the algorithms are
|
||||
/// different.
|
||||
time_erasure_reconstruct: HistogramVec,
|
||||
|
||||
/// How much time it takes to re-encode the data into erasure chunks in order to verify
|
||||
/// the root hash of the provided Merkle tree. See `reconstructed_data_matches_root`.
|
||||
time_reencode_chunks: Histogram,
|
||||
|
||||
/// Time of a full recovery, including erasure decoding or until we gave
|
||||
/// up.
|
||||
time_full_recovery: Histogram,
|
||||
|
||||
/// Number of full recoveries that have been finished one way or the other.
|
||||
///
|
||||
/// Split by recovery `strategy_type` (`full_from_backers, systematic_chunks, regular_chunks,
|
||||
/// all`). `all` is used for failed recoveries that tried all available strategies.
|
||||
/// Also split by `result` type.
|
||||
full_recoveries_finished: CounterVec<U64>,
|
||||
|
||||
/// Number of full recoveries that have been started on this subsystem.
|
||||
///
|
||||
/// Note: Those are only recoveries which could not get served locally already - so in other
|
||||
/// words: Only real recoveries.
|
||||
full_recoveries_started: Counter<U64>,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
/// Create new dummy metrics, not reporting anything.
|
||||
pub fn new_dummy() -> Self {
|
||||
Metrics(None)
|
||||
}
|
||||
|
||||
/// Increment counter for chunk requests.
|
||||
pub fn on_chunk_request_issued(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.chunk_requests_issued.with_label_values(&[chunk_type]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment counter for full data requests.
|
||||
pub fn on_full_request_issued(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_issued.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk request timed out.
|
||||
pub fn on_chunk_request_timeout(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.chunk_requests_finished
|
||||
.with_label_values(&[chunk_type, "timeout"])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full data request timed out.
|
||||
pub fn on_full_request_timeout(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_finished.with_label_values(&["timeout"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk request failed because validator did not have its chunk.
|
||||
pub fn on_chunk_request_no_such_chunk(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.chunk_requests_finished
|
||||
.with_label_values(&[chunk_type, "no_such_chunk"])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full data request failed because the validator did not have it.
|
||||
pub fn on_full_request_no_such_data(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_finished.with_label_values(&["no_such_data"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk request failed for some non timeout related network error.
|
||||
pub fn on_chunk_request_error(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.chunk_requests_finished.with_label_values(&[chunk_type, "error"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full data request failed for some non timeout related network error.
|
||||
pub fn on_full_request_error(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_finished.with_label_values(&["error"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk request succeeded, but was not valid.
|
||||
pub fn on_chunk_request_invalid(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.chunk_requests_finished
|
||||
.with_label_values(&[chunk_type, "invalid"])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full data request succeeded, but was not valid.
|
||||
pub fn on_full_request_invalid(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_finished.with_label_values(&["invalid"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk request succeeded.
|
||||
pub fn on_chunk_request_succeeded(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.chunk_requests_finished
|
||||
.with_label_values(&[chunk_type, "success"])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk response was received on the v1 protocol.
|
||||
pub fn on_chunk_response_v1(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.chunk_request_protocols.with_label_values(&["v1"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk response was received on the v2 protocol.
|
||||
pub fn on_chunk_response_v2(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.chunk_request_protocols.with_label_values(&["v2"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full data request succeeded.
|
||||
pub fn on_full_request_succeeded(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_finished.with_label_values(&["success"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a timer to time request/response duration.
|
||||
pub fn time_chunk_request(&self, chunk_type: &str) -> Option<HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics.time_chunk_request.with_label_values(&[chunk_type]).start_timer()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a timer to time erasure code recover.
|
||||
pub fn time_erasure_recovery(&self, chunk_type: &str) -> Option<HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics.time_erasure_recovery.with_label_values(&[chunk_type]).start_timer()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a timer for available data reconstruction.
|
||||
pub fn time_erasure_reconstruct(&self, chunk_type: &str) -> Option<HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics.time_erasure_reconstruct.with_label_values(&[chunk_type]).start_timer()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a timer to time chunk encoding.
|
||||
pub fn time_reencode_chunks(&self) -> Option<HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.time_reencode_chunks.start_timer())
|
||||
}
|
||||
|
||||
/// Get a timer to measure the time of the complete recovery process.
|
||||
pub fn time_full_recovery(&self) -> Option<HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.time_full_recovery.start_timer())
|
||||
}
|
||||
|
||||
/// A full recovery succeeded.
|
||||
pub fn on_recovery_succeeded(&self, strategy_type: &str, bytes: usize) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.full_recoveries_finished
|
||||
.with_label_values(&["success", strategy_type])
|
||||
.inc();
|
||||
metrics.recovered_bytes_total.inc_by(bytes as u64)
|
||||
}
|
||||
}
|
||||
|
||||
/// A full recovery failed (data not available).
|
||||
pub fn on_recovery_failed(&self, strategy_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.full_recoveries_finished
|
||||
.with_label_values(&["failure", strategy_type])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full recovery failed (data was recovered, but invalid).
|
||||
pub fn on_recovery_invalid(&self, strategy_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.full_recoveries_finished
|
||||
.with_label_values(&["invalid", strategy_type])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A recover was started.
|
||||
pub fn on_recovery_started(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_recoveries_started.inc()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(registry: &Registry) -> Result<Self, PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
chunk_requests_issued: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new("pezkuwi_teyrchain_availability_recovery_chunk_requests_issued",
|
||||
"Total number of issued chunk requests."),
|
||||
&["type"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
full_data_requests_issued: prometheus::register(
|
||||
Counter::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_full_data_requests_issued",
|
||||
"Total number of issued full data requests.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
recovered_bytes_total: prometheus::register(
|
||||
Counter::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_bytes_total",
|
||||
"Total number of bytes recovered",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
chunk_requests_finished: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_chunk_requests_finished",
|
||||
"Total number of chunk requests finished.",
|
||||
),
|
||||
&["result", "type"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
chunk_request_protocols: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_chunk_request_protocols",
|
||||
"Total number of successful chunk requests, mapped by the protocol version (v1 or v2).",
|
||||
),
|
||||
&["protocol"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
full_data_requests_finished: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_full_data_requests_finished",
|
||||
"Total number of full data requests finished.",
|
||||
),
|
||||
&["result"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
time_chunk_request: prometheus::register(
|
||||
prometheus::HistogramVec::new(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_time_chunk_request",
|
||||
"Time spent waiting for a response to a chunk request",
|
||||
), &["type"])?,
|
||||
registry,
|
||||
)?,
|
||||
time_erasure_recovery: prometheus::register(
|
||||
prometheus::HistogramVec::new(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_time_erasure_recovery",
|
||||
"Time spent to recover the erasure code and verify the merkle root by re-encoding as erasure chunks",
|
||||
), &["type"])?,
|
||||
registry,
|
||||
)?,
|
||||
time_erasure_reconstruct: prometheus::register(
|
||||
prometheus::HistogramVec::new(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_time_erasure_reconstruct",
|
||||
"Time spent to reconstruct the data from chunks",
|
||||
), &["type"])?,
|
||||
registry,
|
||||
)?,
|
||||
time_reencode_chunks: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_availability_reencode_chunks",
|
||||
"Time spent re-encoding the data as erasure chunks",
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
time_full_recovery: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_time_total",
|
||||
"Time a full recovery process took, either until failure or successful erasure decoding.",
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
full_recoveries_finished: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_recoveries_finished",
|
||||
"Total number of recoveries that finished.",
|
||||
),
|
||||
&["result", "strategy_type"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
full_recoveries_started: prometheus::register(
|
||||
Counter::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_recoveries_started",
|
||||
"Total number of started recoveries.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,197 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Main recovery task logic. Runs recovery strategies.
|
||||
|
||||
#![warn(missing_docs)]
|
||||
|
||||
mod strategy;
|
||||
|
||||
pub use self::strategy::{
|
||||
FetchChunks, FetchChunksParams, FetchFull, FetchFullParams, FetchSystematicChunks,
|
||||
FetchSystematicChunksParams, RecoveryStrategy, State,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
pub use self::strategy::{REGULAR_CHUNKS_REQ_RETRY_LIMIT, SYSTEMATIC_CHUNKS_REQ_RETRY_LIMIT};
|
||||
|
||||
use crate::{metrics::Metrics, ErasureTask, PostRecoveryCheck, LOG_TARGET};
|
||||
|
||||
use codec::Encode;
|
||||
use pezkuwi_node_primitives::AvailableData;
|
||||
use pezkuwi_node_subsystem::{messages::AvailabilityStoreMessage, overseer, RecoveryError};
|
||||
use pezkuwi_primitives::{AuthorityDiscoveryId, CandidateHash, Hash};
|
||||
use sc_network::ProtocolName;
|
||||
|
||||
use futures::channel::{mpsc, oneshot};
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// Recovery parameters common to all strategies in a `RecoveryTask`.
|
||||
#[derive(Clone)]
|
||||
pub struct RecoveryParams {
|
||||
/// Discovery ids of `validators`.
|
||||
pub validator_authority_keys: Vec<AuthorityDiscoveryId>,
|
||||
|
||||
/// Number of validators.
|
||||
pub n_validators: usize,
|
||||
|
||||
/// The number of regular chunks needed.
|
||||
pub threshold: usize,
|
||||
|
||||
/// The number of systematic chunks needed.
|
||||
pub systematic_threshold: usize,
|
||||
|
||||
/// A hash of the relevant candidate.
|
||||
pub candidate_hash: CandidateHash,
|
||||
|
||||
/// The root of the erasure encoding of the candidate.
|
||||
pub erasure_root: Hash,
|
||||
|
||||
/// Metrics to report.
|
||||
pub metrics: Metrics,
|
||||
|
||||
/// Do not request data from availability-store. Useful for collators.
|
||||
pub bypass_availability_store: bool,
|
||||
|
||||
/// The type of check to perform after available data was recovered.
|
||||
pub post_recovery_check: PostRecoveryCheck,
|
||||
|
||||
/// The blake2-256 hash of the PoV.
|
||||
pub pov_hash: Hash,
|
||||
|
||||
/// Protocol name for ChunkFetchingV1.
|
||||
pub req_v1_protocol_name: ProtocolName,
|
||||
|
||||
/// Protocol name for ChunkFetchingV2.
|
||||
pub req_v2_protocol_name: ProtocolName,
|
||||
|
||||
/// Whether or not chunk mapping is enabled.
|
||||
pub chunk_mapping_enabled: bool,
|
||||
|
||||
/// Channel to the erasure task handler.
|
||||
pub erasure_task_tx: mpsc::Sender<ErasureTask>,
|
||||
}
|
||||
|
||||
/// A stateful reconstruction of availability data in reference to
|
||||
/// a candidate hash.
|
||||
pub struct RecoveryTask<Sender: overseer::AvailabilityRecoverySenderTrait> {
|
||||
sender: Sender,
|
||||
params: RecoveryParams,
|
||||
strategies: VecDeque<Box<dyn RecoveryStrategy<Sender>>>,
|
||||
state: State,
|
||||
}
|
||||
|
||||
impl<Sender> RecoveryTask<Sender>
|
||||
where
|
||||
Sender: overseer::AvailabilityRecoverySenderTrait,
|
||||
{
|
||||
/// Instantiate a new recovery task.
|
||||
pub fn new(
|
||||
sender: Sender,
|
||||
params: RecoveryParams,
|
||||
strategies: VecDeque<Box<dyn RecoveryStrategy<Sender>>>,
|
||||
) -> Self {
|
||||
Self { sender, params, strategies, state: State::new() }
|
||||
}
|
||||
|
||||
async fn in_availability_store(&mut self) -> Option<AvailableData> {
|
||||
if !self.params.bypass_availability_store {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.sender
|
||||
.send_message(AvailabilityStoreMessage::QueryAvailableData(
|
||||
self.params.candidate_hash,
|
||||
tx,
|
||||
))
|
||||
.await;
|
||||
|
||||
match rx.await {
|
||||
Ok(Some(data)) => return Some(data),
|
||||
Ok(None) => {},
|
||||
Err(oneshot::Canceled) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.params.candidate_hash,
|
||||
"Failed to reach the availability store",
|
||||
)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Run this recovery task to completion. It will loop through the configured strategies
|
||||
/// in-order and return whenever the first one recovers the full `AvailableData`.
|
||||
pub async fn run(mut self) -> Result<AvailableData, RecoveryError> {
|
||||
if let Some(data) = self.in_availability_store().await {
|
||||
return Ok(data);
|
||||
}
|
||||
|
||||
self.params.metrics.on_recovery_started();
|
||||
|
||||
let _timer = self.params.metrics.time_full_recovery();
|
||||
|
||||
while let Some(current_strategy) = self.strategies.pop_front() {
|
||||
let display_name = current_strategy.display_name();
|
||||
let strategy_type = current_strategy.strategy_type();
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.params.candidate_hash,
|
||||
"Starting `{}` strategy",
|
||||
display_name
|
||||
);
|
||||
|
||||
let res = current_strategy.run(&mut self.state, &mut self.sender, &self.params).await;
|
||||
|
||||
match res {
|
||||
Err(RecoveryError::Unavailable) =>
|
||||
if self.strategies.front().is_some() {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.params.candidate_hash,
|
||||
"Recovery strategy `{}` did not conclude. Trying the next one.",
|
||||
display_name
|
||||
);
|
||||
continue;
|
||||
},
|
||||
Err(err) => {
|
||||
match &err {
|
||||
RecoveryError::Invalid =>
|
||||
self.params.metrics.on_recovery_invalid(strategy_type),
|
||||
_ => self.params.metrics.on_recovery_failed(strategy_type),
|
||||
}
|
||||
return Err(err);
|
||||
},
|
||||
Ok(data) => {
|
||||
self.params.metrics.on_recovery_succeeded(strategy_type, data.encoded_size());
|
||||
return Ok(data);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// We have no other strategies to try.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.params.candidate_hash,
|
||||
"Recovery of available data failed.",
|
||||
);
|
||||
|
||||
self.params.metrics.on_recovery_failed("all");
|
||||
|
||||
Err(RecoveryError::Unavailable)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,334 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
futures_undead::FuturesUndead,
|
||||
task::{
|
||||
strategy::{
|
||||
do_post_recovery_check, is_unavailable, OngoingRequests, N_PARALLEL,
|
||||
REGULAR_CHUNKS_REQ_RETRY_LIMIT,
|
||||
},
|
||||
RecoveryParams, State,
|
||||
},
|
||||
ErasureTask, RecoveryStrategy, LOG_TARGET,
|
||||
};
|
||||
|
||||
use pezkuwi_node_primitives::AvailableData;
|
||||
use pezkuwi_node_subsystem::{overseer, RecoveryError};
|
||||
use pezkuwi_primitives::ValidatorIndex;
|
||||
|
||||
use futures::{channel::oneshot, SinkExt};
|
||||
use rand::seq::SliceRandom;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// Parameters specific to the `FetchChunks` strategy.
|
||||
pub struct FetchChunksParams {
|
||||
pub n_validators: usize,
|
||||
}
|
||||
|
||||
/// `RecoveryStrategy` that requests chunks from validators, in parallel.
|
||||
pub struct FetchChunks {
|
||||
/// How many requests have been unsuccessful so far.
|
||||
error_count: usize,
|
||||
/// Total number of responses that have been received, including failed ones.
|
||||
total_received_responses: usize,
|
||||
/// A shuffled array of validator indices.
|
||||
validators: VecDeque<ValidatorIndex>,
|
||||
/// Collection of in-flight requests.
|
||||
requesting_chunks: OngoingRequests,
|
||||
}
|
||||
|
||||
impl FetchChunks {
|
||||
/// Instantiate a new strategy.
|
||||
pub fn new(params: FetchChunksParams) -> Self {
|
||||
// Shuffle the validators to make sure that we don't request chunks from the same
|
||||
// validators over and over.
|
||||
let mut validators: VecDeque<ValidatorIndex> =
|
||||
(0..params.n_validators).map(|i| ValidatorIndex(i as u32)).collect();
|
||||
validators.make_contiguous().shuffle(&mut rand::thread_rng());
|
||||
|
||||
Self {
|
||||
error_count: 0,
|
||||
total_received_responses: 0,
|
||||
validators,
|
||||
requesting_chunks: FuturesUndead::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_unavailable(
|
||||
unrequested_validators: usize,
|
||||
in_flight_requests: usize,
|
||||
chunk_count: usize,
|
||||
threshold: usize,
|
||||
) -> bool {
|
||||
is_unavailable(chunk_count, in_flight_requests, unrequested_validators, threshold)
|
||||
}
|
||||
|
||||
/// Desired number of parallel requests.
|
||||
///
|
||||
/// For the given threshold (total required number of chunks) get the desired number of
|
||||
/// requests we want to have running in parallel at this time.
|
||||
fn get_desired_request_count(&self, chunk_count: usize, threshold: usize) -> usize {
|
||||
// Upper bound for parallel requests.
|
||||
// We want to limit this, so requests can be processed within the timeout and we limit the
|
||||
// following feedback loop:
|
||||
// 1. Requests fail due to timeout
|
||||
// 2. We request more chunks to make up for it
|
||||
// 3. Bandwidth is spread out even more, so we get even more timeouts
|
||||
// 4. We request more chunks to make up for it ...
|
||||
let max_requests_boundary = std::cmp::min(N_PARALLEL, threshold);
|
||||
// How many chunks are still needed?
|
||||
let remaining_chunks = threshold.saturating_sub(chunk_count);
|
||||
// What is the current error rate, so we can make up for it?
|
||||
let inv_error_rate =
|
||||
self.total_received_responses.checked_div(self.error_count).unwrap_or(0);
|
||||
// Actual number of requests we want to have in flight in parallel:
|
||||
std::cmp::min(
|
||||
max_requests_boundary,
|
||||
remaining_chunks + remaining_chunks.checked_div(inv_error_rate).unwrap_or(0),
|
||||
)
|
||||
}
|
||||
|
||||
async fn attempt_recovery<Sender: overseer::AvailabilityRecoverySenderTrait>(
|
||||
&mut self,
|
||||
state: &mut State,
|
||||
common_params: &RecoveryParams,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
let recovery_duration =
|
||||
common_params
|
||||
.metrics
|
||||
.time_erasure_recovery(RecoveryStrategy::<Sender>::strategy_type(self));
|
||||
|
||||
// Send request to reconstruct available data from chunks.
|
||||
let (avilable_data_tx, available_data_rx) = oneshot::channel();
|
||||
|
||||
let mut erasure_task_tx = common_params.erasure_task_tx.clone();
|
||||
erasure_task_tx
|
||||
.send(ErasureTask::Reconstruct(
|
||||
common_params.n_validators,
|
||||
// Safe to leave an empty vec in place, as we're stopping the recovery process if
|
||||
// this reconstruct fails.
|
||||
std::mem::take(&mut state.received_chunks)
|
||||
.into_iter()
|
||||
.map(|(c_index, chunk)| (c_index, chunk.chunk))
|
||||
.collect(),
|
||||
avilable_data_tx,
|
||||
))
|
||||
.await
|
||||
.map_err(|_| RecoveryError::ChannelClosed)?;
|
||||
|
||||
let available_data_response =
|
||||
available_data_rx.await.map_err(|_| RecoveryError::ChannelClosed)?;
|
||||
|
||||
match available_data_response {
|
||||
// Attempt post-recovery check.
|
||||
Ok(data) => do_post_recovery_check(common_params, data)
|
||||
.await
|
||||
.inspect_err(|_| {
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
})
|
||||
.inspect(|_| {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
"Data recovery from chunks complete",
|
||||
);
|
||||
}),
|
||||
Err(err) => {
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
?err,
|
||||
"Data recovery error",
|
||||
);
|
||||
|
||||
Err(RecoveryError::Invalid)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender> for FetchChunks {
|
||||
fn display_name(&self) -> &'static str {
|
||||
"Fetch chunks"
|
||||
}
|
||||
|
||||
fn strategy_type(&self) -> &'static str {
|
||||
"regular_chunks"
|
||||
}
|
||||
|
||||
async fn run(
|
||||
mut self: Box<Self>,
|
||||
state: &mut State,
|
||||
sender: &mut Sender,
|
||||
common_params: &RecoveryParams,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
// First query the store for any chunks we've got.
|
||||
if !common_params.bypass_availability_store {
|
||||
let local_chunk_indices = state.populate_from_av_store(common_params, sender).await;
|
||||
self.validators.retain(|validator_index| {
|
||||
!local_chunk_indices.iter().any(|(v_index, _)| v_index == validator_index)
|
||||
});
|
||||
}
|
||||
|
||||
// No need to query the validators that have the chunks we already received or that we know
|
||||
// don't have the data from previous strategies.
|
||||
self.validators.retain(|v_index| {
|
||||
!state.received_chunks.values().any(|c| v_index == &c.validator_index) &&
|
||||
state.can_retry_request(
|
||||
&(common_params.validator_authority_keys[v_index.0 as usize].clone(), *v_index),
|
||||
REGULAR_CHUNKS_REQ_RETRY_LIMIT,
|
||||
)
|
||||
});
|
||||
|
||||
// Safe to `take` here, as we're consuming `self` anyway and we're not using the
|
||||
// `validators` field in other methods.
|
||||
let mut validators_queue: VecDeque<_> = std::mem::take(&mut self.validators)
|
||||
.into_iter()
|
||||
.map(|validator_index| {
|
||||
(
|
||||
common_params.validator_authority_keys[validator_index.0 as usize].clone(),
|
||||
validator_index,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
loop {
|
||||
// If received_chunks has more than threshold entries, attempt to recover the data.
|
||||
// If that fails, or a re-encoding of it doesn't match the expected erasure root,
|
||||
// return Err(RecoveryError::Invalid).
|
||||
// Do this before requesting any chunks because we may have enough of them coming from
|
||||
// past RecoveryStrategies.
|
||||
if state.chunk_count() >= common_params.threshold {
|
||||
return self.attempt_recovery::<Sender>(state, common_params).await;
|
||||
}
|
||||
|
||||
if Self::is_unavailable(
|
||||
validators_queue.len(),
|
||||
self.requesting_chunks.total_len(),
|
||||
state.chunk_count(),
|
||||
common_params.threshold,
|
||||
) {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
received = %state.chunk_count(),
|
||||
requesting = %self.requesting_chunks.len(),
|
||||
total_requesting = %self.requesting_chunks.total_len(),
|
||||
n_validators = %common_params.n_validators,
|
||||
"Data recovery from chunks is not possible",
|
||||
);
|
||||
|
||||
return Err(RecoveryError::Unavailable);
|
||||
}
|
||||
|
||||
let desired_requests_count =
|
||||
self.get_desired_request_count(state.chunk_count(), common_params.threshold);
|
||||
let already_requesting_count = self.requesting_chunks.len();
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?common_params.candidate_hash,
|
||||
?desired_requests_count,
|
||||
error_count= ?self.error_count,
|
||||
total_received = ?self.total_received_responses,
|
||||
threshold = ?common_params.threshold,
|
||||
?already_requesting_count,
|
||||
"Requesting availability chunks for a candidate",
|
||||
);
|
||||
|
||||
let strategy_type = RecoveryStrategy::<Sender>::strategy_type(&*self);
|
||||
|
||||
state
|
||||
.launch_parallel_chunk_requests(
|
||||
strategy_type,
|
||||
common_params,
|
||||
sender,
|
||||
desired_requests_count,
|
||||
&mut validators_queue,
|
||||
&mut self.requesting_chunks,
|
||||
)
|
||||
.await;
|
||||
|
||||
let (total_responses, error_count) = state
|
||||
.wait_for_chunks(
|
||||
strategy_type,
|
||||
common_params,
|
||||
REGULAR_CHUNKS_REQ_RETRY_LIMIT,
|
||||
&mut validators_queue,
|
||||
&mut self.requesting_chunks,
|
||||
&mut vec![],
|
||||
|unrequested_validators,
|
||||
in_flight_reqs,
|
||||
chunk_count,
|
||||
_systematic_chunk_count| {
|
||||
chunk_count >= common_params.threshold ||
|
||||
Self::is_unavailable(
|
||||
unrequested_validators,
|
||||
in_flight_reqs,
|
||||
chunk_count,
|
||||
common_params.threshold,
|
||||
)
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
self.total_received_responses += total_responses;
|
||||
self.error_count += error_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pezkuwi_erasure_coding::recovery_threshold;
|
||||
|
||||
#[test]
|
||||
fn test_get_desired_request_count() {
|
||||
let n_validators = 100;
|
||||
let threshold = recovery_threshold(n_validators).unwrap();
|
||||
|
||||
let mut fetch_chunks_task = FetchChunks::new(FetchChunksParams { n_validators });
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(0, threshold), threshold);
|
||||
fetch_chunks_task.error_count = 1;
|
||||
fetch_chunks_task.total_received_responses = 1;
|
||||
// We saturate at threshold (34):
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(0, threshold), threshold);
|
||||
|
||||
// We saturate at the parallel limit.
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(0, N_PARALLEL + 2), N_PARALLEL);
|
||||
|
||||
fetch_chunks_task.total_received_responses = 2;
|
||||
// With given error rate - still saturating:
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(1, threshold), threshold);
|
||||
fetch_chunks_task.total_received_responses = 10;
|
||||
// error rate: 1/10
|
||||
// remaining chunks needed: threshold (34) - 9
|
||||
// expected: 24 * (1+ 1/10) = (next greater integer) = 27
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(9, threshold), 27);
|
||||
// We saturate at the parallel limit.
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(9, N_PARALLEL + 9), N_PARALLEL);
|
||||
|
||||
fetch_chunks_task.error_count = 0;
|
||||
// With error count zero - we should fetch exactly as needed:
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(10, threshold), threshold - 10);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,174 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
task::{RecoveryParams, RecoveryStrategy, State},
|
||||
ErasureTask, PostRecoveryCheck, LOG_TARGET,
|
||||
};
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::{
|
||||
self as req_res, outgoing::RequestError, OutgoingRequest, Recipient, Requests,
|
||||
};
|
||||
use pezkuwi_node_primitives::AvailableData;
|
||||
use pezkuwi_node_subsystem::{messages::NetworkBridgeTxMessage, overseer, RecoveryError};
|
||||
use pezkuwi_primitives::ValidatorIndex;
|
||||
use sc_network::{IfDisconnected, OutboundFailure, RequestFailure};
|
||||
|
||||
use futures::{channel::oneshot, SinkExt};
|
||||
use rand::seq::SliceRandom;
|
||||
|
||||
/// Parameters specific to the `FetchFull` strategy.
|
||||
pub struct FetchFullParams {
|
||||
/// Validators that will be used for fetching the data.
|
||||
pub validators: Vec<ValidatorIndex>,
|
||||
}
|
||||
|
||||
/// `RecoveryStrategy` that sequentially tries to fetch the full `AvailableData` from
|
||||
/// already-connected validators in the configured validator set.
|
||||
pub struct FetchFull {
|
||||
params: FetchFullParams,
|
||||
}
|
||||
|
||||
impl FetchFull {
|
||||
/// Create a new `FetchFull` recovery strategy.
|
||||
pub fn new(mut params: FetchFullParams) -> Self {
|
||||
params.validators.shuffle(&mut rand::thread_rng());
|
||||
Self { params }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender> for FetchFull {
|
||||
fn display_name(&self) -> &'static str {
|
||||
"Full recovery from backers"
|
||||
}
|
||||
|
||||
fn strategy_type(&self) -> &'static str {
|
||||
"full_from_backers"
|
||||
}
|
||||
|
||||
async fn run(
|
||||
mut self: Box<Self>,
|
||||
_: &mut State,
|
||||
sender: &mut Sender,
|
||||
common_params: &RecoveryParams,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
let strategy_type = RecoveryStrategy::<Sender>::strategy_type(&*self);
|
||||
|
||||
loop {
|
||||
// Pop the next validator.
|
||||
let validator_index =
|
||||
self.params.validators.pop().ok_or_else(|| RecoveryError::Unavailable)?;
|
||||
|
||||
// Request data.
|
||||
let (req, response) = OutgoingRequest::new(
|
||||
Recipient::Authority(
|
||||
common_params.validator_authority_keys[validator_index.0 as usize].clone(),
|
||||
),
|
||||
req_res::v1::AvailableDataFetchingRequest {
|
||||
candidate_hash: common_params.candidate_hash,
|
||||
},
|
||||
);
|
||||
|
||||
sender
|
||||
.send_message(NetworkBridgeTxMessage::SendRequests(
|
||||
vec![Requests::AvailableDataFetchingV1(req)],
|
||||
IfDisconnected::ImmediateError,
|
||||
))
|
||||
.await;
|
||||
|
||||
common_params.metrics.on_full_request_issued();
|
||||
|
||||
match response.await {
|
||||
Ok(req_res::v1::AvailableDataFetchingResponse::AvailableData(data)) => {
|
||||
let recovery_duration =
|
||||
common_params.metrics.time_erasure_recovery(strategy_type);
|
||||
let maybe_data = match common_params.post_recovery_check {
|
||||
PostRecoveryCheck::Reencode => {
|
||||
let (reencode_tx, reencode_rx) = oneshot::channel();
|
||||
let mut erasure_task_tx = common_params.erasure_task_tx.clone();
|
||||
|
||||
erasure_task_tx
|
||||
.send(ErasureTask::Reencode(
|
||||
common_params.n_validators,
|
||||
common_params.erasure_root,
|
||||
data,
|
||||
reencode_tx,
|
||||
))
|
||||
.await
|
||||
.map_err(|_| RecoveryError::ChannelClosed)?;
|
||||
|
||||
reencode_rx.await.map_err(|_| RecoveryError::ChannelClosed)?
|
||||
},
|
||||
PostRecoveryCheck::PovHash =>
|
||||
(data.pov.hash() == common_params.pov_hash).then_some(data),
|
||||
};
|
||||
|
||||
match maybe_data {
|
||||
Some(data) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
"Received full data",
|
||||
);
|
||||
|
||||
common_params.metrics.on_full_request_succeeded();
|
||||
return Ok(data);
|
||||
},
|
||||
None => {
|
||||
common_params.metrics.on_full_request_invalid();
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
?validator_index,
|
||||
"Invalid data response",
|
||||
);
|
||||
|
||||
// it doesn't help to report the peer with req/res.
|
||||
// we'll try the next backer.
|
||||
},
|
||||
}
|
||||
},
|
||||
Ok(req_res::v1::AvailableDataFetchingResponse::NoSuchData) => {
|
||||
common_params.metrics.on_full_request_no_such_data();
|
||||
},
|
||||
Err(e) => {
|
||||
match &e {
|
||||
RequestError::Canceled(_) => common_params.metrics.on_full_request_error(),
|
||||
RequestError::InvalidResponse(_) =>
|
||||
common_params.metrics.on_full_request_invalid(),
|
||||
RequestError::NetworkError(req_failure) => {
|
||||
if let RequestFailure::Network(OutboundFailure::Timeout) = req_failure {
|
||||
common_params.metrics.on_full_request_timeout();
|
||||
} else {
|
||||
common_params.metrics.on_full_request_error();
|
||||
}
|
||||
},
|
||||
};
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
?validator_index,
|
||||
err = ?e,
|
||||
"Error fetching full available data."
|
||||
);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,341 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
futures_undead::FuturesUndead,
|
||||
task::{
|
||||
strategy::{
|
||||
do_post_recovery_check, is_unavailable, OngoingRequests, N_PARALLEL,
|
||||
SYSTEMATIC_CHUNKS_REQ_RETRY_LIMIT,
|
||||
},
|
||||
RecoveryParams, RecoveryStrategy, State,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
|
||||
use pezkuwi_node_primitives::AvailableData;
|
||||
use pezkuwi_node_subsystem::{overseer, RecoveryError};
|
||||
use pezkuwi_primitives::{ChunkIndex, ValidatorIndex};
|
||||
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// Parameters needed for fetching systematic chunks.
|
||||
pub struct FetchSystematicChunksParams {
|
||||
/// Validators that hold the systematic chunks.
|
||||
pub validators: Vec<(ChunkIndex, ValidatorIndex)>,
|
||||
/// Validators in the backing group, to be used as a backup for requesting systematic chunks.
|
||||
pub backers: Vec<ValidatorIndex>,
|
||||
}
|
||||
|
||||
/// `RecoveryStrategy` that attempts to recover the systematic chunks from the validators that
|
||||
/// hold them, in order to bypass the erasure code reconstruction step, which is costly.
|
||||
pub struct FetchSystematicChunks {
|
||||
/// Systematic recovery threshold.
|
||||
threshold: usize,
|
||||
/// Validators that hold the systematic chunks.
|
||||
validators: Vec<(ChunkIndex, ValidatorIndex)>,
|
||||
/// Backers to be used as a backup.
|
||||
backers: Vec<ValidatorIndex>,
|
||||
/// Collection of in-flight requests.
|
||||
requesting_chunks: OngoingRequests,
|
||||
}
|
||||
|
||||
impl FetchSystematicChunks {
|
||||
/// Instantiate a new systematic chunks strategy.
|
||||
pub fn new(params: FetchSystematicChunksParams) -> Self {
|
||||
Self {
|
||||
threshold: params.validators.len(),
|
||||
validators: params.validators,
|
||||
backers: params.backers,
|
||||
requesting_chunks: FuturesUndead::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_unavailable(
|
||||
unrequested_validators: usize,
|
||||
in_flight_requests: usize,
|
||||
systematic_chunk_count: usize,
|
||||
threshold: usize,
|
||||
) -> bool {
|
||||
is_unavailable(
|
||||
systematic_chunk_count,
|
||||
in_flight_requests,
|
||||
unrequested_validators,
|
||||
threshold,
|
||||
)
|
||||
}
|
||||
|
||||
/// Desired number of parallel requests.
|
||||
///
|
||||
/// For the given threshold (total required number of chunks) get the desired number of
|
||||
/// requests we want to have running in parallel at this time.
|
||||
fn get_desired_request_count(&self, chunk_count: usize, threshold: usize) -> usize {
|
||||
// Upper bound for parallel requests.
|
||||
let max_requests_boundary = std::cmp::min(N_PARALLEL, threshold);
|
||||
// How many chunks are still needed?
|
||||
let remaining_chunks = threshold.saturating_sub(chunk_count);
|
||||
// Actual number of requests we want to have in flight in parallel:
|
||||
// We don't have to make up for any error rate, as an error fetching a systematic chunk
|
||||
// results in failure of the entire strategy.
|
||||
std::cmp::min(max_requests_boundary, remaining_chunks)
|
||||
}
|
||||
|
||||
async fn attempt_systematic_recovery<Sender: overseer::AvailabilityRecoverySenderTrait>(
|
||||
&mut self,
|
||||
state: &mut State,
|
||||
common_params: &RecoveryParams,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
let strategy_type = RecoveryStrategy::<Sender>::strategy_type(self);
|
||||
let recovery_duration = common_params.metrics.time_erasure_recovery(strategy_type);
|
||||
let reconstruct_duration = common_params.metrics.time_erasure_reconstruct(strategy_type);
|
||||
let chunks = state
|
||||
.received_chunks
|
||||
.range(
|
||||
ChunkIndex(0)..
|
||||
ChunkIndex(
|
||||
u32::try_from(self.threshold)
|
||||
.expect("validator count should not exceed u32"),
|
||||
),
|
||||
)
|
||||
.map(|(_, chunk)| chunk.chunk.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let available_data = pezkuwi_erasure_coding::reconstruct_from_systematic_v1(
|
||||
common_params.n_validators,
|
||||
chunks,
|
||||
);
|
||||
|
||||
match available_data {
|
||||
Ok(data) => {
|
||||
drop(reconstruct_duration);
|
||||
|
||||
// Attempt post-recovery check.
|
||||
do_post_recovery_check(common_params, data)
|
||||
.await
|
||||
.inspect_err(|_| {
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
})
|
||||
.inspect(|_| {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
"Data recovery from systematic chunks complete",
|
||||
);
|
||||
})
|
||||
},
|
||||
Err(err) => {
|
||||
reconstruct_duration.map(|rd| rd.stop_and_discard());
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
?err,
|
||||
"Systematic data recovery error",
|
||||
);
|
||||
|
||||
Err(RecoveryError::Invalid)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender>
|
||||
for FetchSystematicChunks
|
||||
{
|
||||
fn display_name(&self) -> &'static str {
|
||||
"Fetch systematic chunks"
|
||||
}
|
||||
|
||||
fn strategy_type(&self) -> &'static str {
|
||||
"systematic_chunks"
|
||||
}
|
||||
|
||||
async fn run(
|
||||
mut self: Box<Self>,
|
||||
state: &mut State,
|
||||
sender: &mut Sender,
|
||||
common_params: &RecoveryParams,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
// First query the store for any chunks we've got.
|
||||
if !common_params.bypass_availability_store {
|
||||
let local_chunk_indices = state.populate_from_av_store(common_params, sender).await;
|
||||
|
||||
for (_, our_c_index) in &local_chunk_indices {
|
||||
// If we are among the systematic validators but hold an invalid chunk, we cannot
|
||||
// perform the systematic recovery. Fall through to the next strategy.
|
||||
if self.validators.iter().any(|(c_index, _)| c_index == our_c_index) &&
|
||||
!state.received_chunks.contains_key(our_c_index)
|
||||
{
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
requesting = %self.requesting_chunks.len(),
|
||||
total_requesting = %self.requesting_chunks.total_len(),
|
||||
n_validators = %common_params.n_validators,
|
||||
chunk_index = ?our_c_index,
|
||||
"Systematic chunk recovery is not possible. We are among the systematic validators but hold an invalid chunk",
|
||||
);
|
||||
return Err(RecoveryError::Unavailable);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No need to query the validators that have the chunks we already received or that we know
|
||||
// don't have the data from previous strategies.
|
||||
self.validators.retain(|(c_index, v_index)| {
|
||||
!state.received_chunks.contains_key(c_index) &&
|
||||
state.can_retry_request(
|
||||
&(common_params.validator_authority_keys[v_index.0 as usize].clone(), *v_index),
|
||||
SYSTEMATIC_CHUNKS_REQ_RETRY_LIMIT,
|
||||
)
|
||||
});
|
||||
|
||||
let mut systematic_chunk_count = state
|
||||
.received_chunks
|
||||
.range(ChunkIndex(0)..ChunkIndex(self.threshold as u32))
|
||||
.count();
|
||||
|
||||
// Safe to `take` here, as we're consuming `self` anyway and we're not using the
|
||||
// `validators` or `backers` fields in other methods.
|
||||
let mut validators_queue: VecDeque<_> = std::mem::take(&mut self.validators)
|
||||
.into_iter()
|
||||
.map(|(_, validator_index)| {
|
||||
(
|
||||
common_params.validator_authority_keys[validator_index.0 as usize].clone(),
|
||||
validator_index,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
let mut backers: Vec<_> = std::mem::take(&mut self.backers)
|
||||
.into_iter()
|
||||
.map(|validator_index| {
|
||||
common_params.validator_authority_keys[validator_index.0 as usize].clone()
|
||||
})
|
||||
.collect();
|
||||
|
||||
loop {
|
||||
// If received_chunks has `systematic_chunk_threshold` entries, attempt to recover the
|
||||
// data.
|
||||
if systematic_chunk_count >= self.threshold {
|
||||
return self.attempt_systematic_recovery::<Sender>(state, common_params).await;
|
||||
}
|
||||
|
||||
if Self::is_unavailable(
|
||||
validators_queue.len(),
|
||||
self.requesting_chunks.total_len(),
|
||||
systematic_chunk_count,
|
||||
self.threshold,
|
||||
) {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
%systematic_chunk_count,
|
||||
requesting = %self.requesting_chunks.len(),
|
||||
total_requesting = %self.requesting_chunks.total_len(),
|
||||
n_validators = %common_params.n_validators,
|
||||
systematic_threshold = ?self.threshold,
|
||||
"Data recovery from systematic chunks is not possible",
|
||||
);
|
||||
|
||||
return Err(RecoveryError::Unavailable);
|
||||
}
|
||||
|
||||
let desired_requests_count =
|
||||
self.get_desired_request_count(systematic_chunk_count, self.threshold);
|
||||
let already_requesting_count = self.requesting_chunks.len();
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?common_params.candidate_hash,
|
||||
?desired_requests_count,
|
||||
total_received = ?systematic_chunk_count,
|
||||
systematic_threshold = ?self.threshold,
|
||||
?already_requesting_count,
|
||||
"Requesting systematic availability chunks for a candidate",
|
||||
);
|
||||
|
||||
let strategy_type = RecoveryStrategy::<Sender>::strategy_type(&*self);
|
||||
|
||||
state
|
||||
.launch_parallel_chunk_requests(
|
||||
strategy_type,
|
||||
common_params,
|
||||
sender,
|
||||
desired_requests_count,
|
||||
&mut validators_queue,
|
||||
&mut self.requesting_chunks,
|
||||
)
|
||||
.await;
|
||||
|
||||
let _ = state
|
||||
.wait_for_chunks(
|
||||
strategy_type,
|
||||
common_params,
|
||||
SYSTEMATIC_CHUNKS_REQ_RETRY_LIMIT,
|
||||
&mut validators_queue,
|
||||
&mut self.requesting_chunks,
|
||||
&mut backers,
|
||||
|unrequested_validators,
|
||||
in_flight_reqs,
|
||||
// Don't use this chunk count, as it may contain non-systematic chunks.
|
||||
_chunk_count,
|
||||
new_systematic_chunk_count| {
|
||||
systematic_chunk_count = new_systematic_chunk_count;
|
||||
|
||||
let is_unavailable = Self::is_unavailable(
|
||||
unrequested_validators,
|
||||
in_flight_reqs,
|
||||
systematic_chunk_count,
|
||||
self.threshold,
|
||||
);
|
||||
|
||||
systematic_chunk_count >= self.threshold || is_unavailable
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pezkuwi_erasure_coding::systematic_recovery_threshold;
|
||||
|
||||
#[test]
|
||||
fn test_get_desired_request_count() {
|
||||
let num_validators = 100;
|
||||
let threshold = systematic_recovery_threshold(num_validators).unwrap();
|
||||
|
||||
let systematic_chunks_task = FetchSystematicChunks::new(FetchSystematicChunksParams {
|
||||
validators: vec![(1.into(), 1.into()); num_validators],
|
||||
backers: vec![],
|
||||
});
|
||||
assert_eq!(systematic_chunks_task.get_desired_request_count(0, threshold), threshold);
|
||||
assert_eq!(systematic_chunks_task.get_desired_request_count(5, threshold), threshold - 5);
|
||||
assert_eq!(
|
||||
systematic_chunks_task.get_desired_request_count(num_validators * 2, threshold),
|
||||
0
|
||||
);
|
||||
assert_eq!(systematic_chunks_task.get_desired_request_count(0, N_PARALLEL * 2), N_PARALLEL);
|
||||
assert_eq!(systematic_chunks_task.get_desired_request_count(N_PARALLEL, N_PARALLEL + 2), 2);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user