feat: initialize Kurdistan SDK - independent fork of Polkadot SDK
This commit is contained in:
@@ -0,0 +1 @@
|
||||
This folder holds all networking subsystem implementations, each with their own crate.
|
||||
@@ -0,0 +1,56 @@
|
||||
[package]
|
||||
name = "pezkuwi-approval-distribution"
|
||||
version = "7.0.0"
|
||||
description = "Pezkuwi Approval Distribution subsystem for the distribution of assignments and approvals for approval checks on candidates over the network."
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
itertools = { workspace = true }
|
||||
pezkuwi-node-metrics = { workspace = true, default-features = true }
|
||||
pezkuwi-node-network-protocol = { workspace = true, default-features = true }
|
||||
pezkuwi-node-primitives = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem-util = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives = { workspace = true, default-features = true }
|
||||
rand = { workspace = true, default-features = true }
|
||||
|
||||
futures = { workspace = true }
|
||||
futures-timer = { workspace = true }
|
||||
gum = { workspace = true, default-features = true }
|
||||
|
||||
[dev-dependencies]
|
||||
sc-keystore = { workspace = true }
|
||||
sp-application-crypto = { workspace = true, default-features = true }
|
||||
sp-authority-discovery = { workspace = true, default-features = true }
|
||||
sp-core = { features = ["std"], workspace = true, default-features = true }
|
||||
|
||||
pezkuwi-node-subsystem-test-helpers = { workspace = true }
|
||||
pezkuwi-primitives-test-helpers = { workspace = true }
|
||||
|
||||
assert_matches = { workspace = true }
|
||||
rand_chacha = { workspace = true, default-features = true }
|
||||
schnorrkel = { workspace = true }
|
||||
# rand_core should match schnorrkel
|
||||
rand_core = { workspace = true }
|
||||
sp-tracing = { workspace = true }
|
||||
|
||||
[features]
|
||||
runtime-benchmarks = [
|
||||
"gum/runtime-benchmarks",
|
||||
"pezkuwi-node-metrics/runtime-benchmarks",
|
||||
"pezkuwi-node-network-protocol/runtime-benchmarks",
|
||||
"pezkuwi-node-primitives/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-util/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem/runtime-benchmarks",
|
||||
"pezkuwi-primitives-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-primitives/runtime-benchmarks",
|
||||
"sp-authority-discovery/runtime-benchmarks",
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,272 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_metrics::metrics::{prometheus, Metrics as MetricsTrait};
|
||||
use pezkuwi_node_primitives::approval::v2::AssignmentCertKindV2;
|
||||
|
||||
/// Approval Distribution metrics.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
assignments_imported_total: prometheus::CounterVec<prometheus::U64>,
|
||||
approvals_imported_total: prometheus::Counter<prometheus::U64>,
|
||||
unified_with_peer_total: prometheus::Counter<prometheus::U64>,
|
||||
aggression_l1_messages_total: prometheus::Counter<prometheus::U64>,
|
||||
aggression_l2_messages_total: prometheus::Counter<prometheus::U64>,
|
||||
time_unify_with_peer: prometheus::Histogram,
|
||||
time_import_pending_now_known: prometheus::Histogram,
|
||||
assignments_received_result: prometheus::CounterVec<prometheus::U64>,
|
||||
approvals_received_result: prometheus::CounterVec<prometheus::U64>,
|
||||
}
|
||||
|
||||
trait AsLabel {
|
||||
fn as_label(&self) -> &str;
|
||||
}
|
||||
|
||||
impl AsLabel for &AssignmentCertKindV2 {
|
||||
fn as_label(&self) -> &str {
|
||||
match self {
|
||||
AssignmentCertKindV2::RelayVRFDelay { .. } => "VRF Delay",
|
||||
AssignmentCertKindV2::RelayVRFModulo { .. } => "VRF Modulo",
|
||||
AssignmentCertKindV2::RelayVRFModuloCompact { .. } => "VRF Modulo Compact",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
pub(crate) fn on_assignment_imported(&self, kind: &AssignmentCertKindV2) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.assignments_imported_total.with_label_values(&[kind.as_label()]).inc();
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_approval_imported(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.approvals_imported_total.inc();
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_unify_with_peer(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.unified_with_peer_total.inc();
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn time_unify_with_peer(&self) -> Option<prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.time_unify_with_peer.start_timer())
|
||||
}
|
||||
|
||||
pub(crate) fn time_import_pending_now_known(
|
||||
&self,
|
||||
) -> Option<prometheus::prometheus::HistogramTimer> {
|
||||
self.0
|
||||
.as_ref()
|
||||
.map(|metrics| metrics.time_import_pending_now_known.start_timer())
|
||||
}
|
||||
|
||||
pub(crate) fn on_approval_recent_outdated(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.approvals_received_result.with_label_values(&["outdated"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_approval_invalid_block(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.approvals_received_result.with_label_values(&["invalidblock"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_approval_unknown_assignment(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.approvals_received_result
|
||||
.with_label_values(&["unknownassignment"])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_approval_duplicate(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.approvals_received_result.with_label_values(&["duplicate"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_approval_out_of_view(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.approvals_received_result.with_label_values(&["outofview"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_approval_good_known(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.approvals_received_result.with_label_values(&["goodknown"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_approval_bad(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.approvals_received_result.with_label_values(&["bad"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_approval_bug(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.approvals_received_result.with_label_values(&["bug"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_assignment_recent_outdated(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.assignments_received_result.with_label_values(&["outdated"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_assignment_invalid_block(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.assignments_received_result.with_label_values(&["invalidblock"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_assignment_duplicate(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.assignments_received_result.with_label_values(&["duplicate"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_assignment_out_of_view(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.assignments_received_result.with_label_values(&["outofview"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_assignment_good_known(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.assignments_received_result.with_label_values(&["goodknown"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_assignment_bad(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.assignments_received_result.with_label_values(&["bad"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_assignment_far(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.assignments_received_result.with_label_values(&["far"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_aggression_l1(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.aggression_l1_messages_total.inc();
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_aggression_l2(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.aggression_l2_messages_total.inc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MetricsTrait for Metrics {
|
||||
fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
assignments_imported_total: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_assignments_imported_total",
|
||||
"Number of valid assignments imported locally or from other peers.",
|
||||
),
|
||||
&["kind"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
approvals_imported_total: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_teyrchain_approvals_imported_total",
|
||||
"Number of valid approvals imported locally or from other peers.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
unified_with_peer_total: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_teyrchain_unified_with_peer_total",
|
||||
"Number of times `unify_with_peer` is called.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
aggression_l1_messages_total: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_teyrchain_approval_distribution_aggression_l1_messages_total",
|
||||
"Number of messages in approval distribution for which aggression L1 has been triggered",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
aggression_l2_messages_total: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_teyrchain_approval_distribution_aggression_l2_messages_total",
|
||||
"Number of messages in approval distribution for which aggression L2 has been triggered",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
time_unify_with_peer: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_time_unify_with_peer",
|
||||
"Time spent within fn `unify_with_peer`.",
|
||||
)
|
||||
.buckets(vec![
|
||||
0.000625, 0.00125, 0.0025, 0.005, 0.0075, 0.01, 0.025, 0.05, 0.1, 0.25,
|
||||
0.5, 1.0, 2.5, 5.0, 10.0,
|
||||
]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
time_import_pending_now_known: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_time_import_pending_now_known",
|
||||
"Time spent on importing pending assignments and approvals.",
|
||||
).buckets(vec![0.0001, 0.0004, 0.0016, 0.0064, 0.0256, 0.1024, 0.4096, 1.6384, 3.2768, 4.9152, 6.5536,]))?,
|
||||
registry,
|
||||
)?,
|
||||
assignments_received_result: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_assignments_received_result",
|
||||
"Result of a processed assignment",
|
||||
),
|
||||
&["status"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
approvals_received_result: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_approvals_received_result",
|
||||
"Result of a processed approval",
|
||||
),
|
||||
&["status"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,63 @@
|
||||
[package]
|
||||
name = "pezkuwi-availability-distribution"
|
||||
description = "The Availability Distribution subsystem. Requests the required availability data. Also distributes availability data and chunks to requesters."
|
||||
version = "7.0.0"
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "availability-distribution-regression-bench"
|
||||
path = "benches/availability-distribution-regression-bench.rs"
|
||||
harness = false
|
||||
required-features = ["subsystem-benchmarks"]
|
||||
|
||||
[dependencies]
|
||||
codec = { features = ["std"], workspace = true, default-features = true }
|
||||
fatality = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
gum = { workspace = true, default-features = true }
|
||||
pezkuwi-erasure-coding = { workspace = true, default-features = true }
|
||||
pezkuwi-node-network-protocol = { workspace = true, default-features = true }
|
||||
pezkuwi-node-primitives = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem-util = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives = { workspace = true, default-features = true }
|
||||
rand = { workspace = true, default-features = true }
|
||||
sc-network = { workspace = true, default-features = true }
|
||||
schnellru = { workspace = true }
|
||||
sp-core = { features = ["std"], workspace = true, default-features = true }
|
||||
sp-keystore = { workspace = true, default-features = true }
|
||||
thiserror = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = { workspace = true }
|
||||
futures-timer = { workspace = true }
|
||||
pezkuwi-node-subsystem-test-helpers = { workspace = true }
|
||||
pezkuwi-primitives-test-helpers = { workspace = true }
|
||||
pezkuwi-subsystem-bench = { workspace = true }
|
||||
rstest = { workspace = true }
|
||||
sp-keyring = { workspace = true, default-features = true }
|
||||
sp-tracing = { workspace = true, default-features = true }
|
||||
|
||||
[features]
|
||||
subsystem-benchmarks = []
|
||||
runtime-benchmarks = [
|
||||
"gum/runtime-benchmarks",
|
||||
"pezkuwi-erasure-coding/runtime-benchmarks",
|
||||
"pezkuwi-node-network-protocol/runtime-benchmarks",
|
||||
"pezkuwi-node-primitives/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-util/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem/runtime-benchmarks",
|
||||
"pezkuwi-primitives-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-primitives/runtime-benchmarks",
|
||||
"pezkuwi-subsystem-bench/runtime-benchmarks",
|
||||
"sc-network/runtime-benchmarks",
|
||||
"sp-keyring/runtime-benchmarks",
|
||||
]
|
||||
+87
@@ -0,0 +1,87 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! availability-read regression tests
|
||||
//!
|
||||
//! Availability read benchmark based on Kusama parameters and scale.
|
||||
//!
|
||||
//! Subsystems involved:
|
||||
//! - availability-distribution
|
||||
//! - bitfield-distribution
|
||||
//! - availability-store
|
||||
|
||||
use pezkuwi_subsystem_bench::{
|
||||
availability::{benchmark_availability_write, prepare_test, TestState},
|
||||
configuration::TestConfiguration,
|
||||
usage::BenchmarkUsage,
|
||||
utils::save_to_file,
|
||||
};
|
||||
use std::io::Write;
|
||||
|
||||
const BENCH_COUNT: usize = 50;
|
||||
|
||||
fn main() -> Result<(), String> {
|
||||
let mut messages = vec![];
|
||||
let mut config = TestConfiguration::default();
|
||||
// A single node effort roughly
|
||||
config.n_cores = 10;
|
||||
config.n_validators = 500;
|
||||
config.num_blocks = 3;
|
||||
config.generate_pov_sizes();
|
||||
let state = TestState::new(&config);
|
||||
|
||||
println!("Benchmarking...");
|
||||
let usages: Vec<BenchmarkUsage> = (0..BENCH_COUNT)
|
||||
.map(|n| {
|
||||
print!("\r[{}{}]", "#".repeat(n), "_".repeat(BENCH_COUNT - n));
|
||||
std::io::stdout().flush().unwrap();
|
||||
let (mut env, _cfgs) = prepare_test(
|
||||
&state,
|
||||
pezkuwi_subsystem_bench::availability::TestDataAvailability::Write,
|
||||
false,
|
||||
);
|
||||
env.runtime().block_on(benchmark_availability_write(&mut env, &state))
|
||||
})
|
||||
.collect();
|
||||
println!("\rDone!{}", " ".repeat(BENCH_COUNT));
|
||||
|
||||
let average_usage = BenchmarkUsage::average(&usages);
|
||||
save_to_file(
|
||||
"charts/availability-distribution-regression-bench.json",
|
||||
average_usage.to_chart_json().map_err(|e| e.to_string())?,
|
||||
)
|
||||
.map_err(|e| e.to_string())?;
|
||||
println!("{}", average_usage);
|
||||
|
||||
// We expect no variance for received and sent
|
||||
// but use 0.001 because we operate with floats
|
||||
messages.extend(average_usage.check_network_usage(&[
|
||||
("Received from peers", 433.3333, 0.001),
|
||||
("Sent to peers", 18479.9000, 0.001),
|
||||
]));
|
||||
messages.extend(average_usage.check_cpu_usage(&[
|
||||
("availability-distribution", 0.0131, 0.1),
|
||||
("availability-store", 0.1576, 0.1),
|
||||
("bitfield-distribution", 0.0224, 0.1),
|
||||
]));
|
||||
|
||||
if messages.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
eprintln!("{}", messages.join("\n"));
|
||||
Err("Regressions found".to_string())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,126 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
//! Error handling related code and Error/Result definitions.
|
||||
|
||||
use fatality::Nested;
|
||||
use pezkuwi_node_network_protocol::request_response::outgoing::RequestError;
|
||||
use pezkuwi_primitives::SessionIndex;
|
||||
|
||||
use futures::channel::oneshot;
|
||||
|
||||
use pezkuwi_node_subsystem::{ChainApiError, RuntimeApiError, SubsystemError};
|
||||
use pezkuwi_node_subsystem_util::runtime;
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
#[allow(missing_docs)]
|
||||
#[fatality::fatality(splitable)]
|
||||
pub enum Error {
|
||||
#[fatal]
|
||||
#[error("Spawning subsystem task failed: {0}")]
|
||||
SpawnTask(#[source] SubsystemError),
|
||||
|
||||
#[fatal]
|
||||
#[error("Erasure chunk requester stream exhausted")]
|
||||
RequesterExhausted,
|
||||
|
||||
#[fatal]
|
||||
#[error("Receive channel closed: {0}")]
|
||||
IncomingMessageChannel(#[source] SubsystemError),
|
||||
|
||||
#[fatal(forward)]
|
||||
#[error("Error while accessing runtime information: {0}")]
|
||||
Runtime(#[from] runtime::Error),
|
||||
|
||||
#[fatal]
|
||||
#[error("Oneshot for receiving response from Chain API got cancelled")]
|
||||
ChainApiSenderDropped(#[from] oneshot::Canceled),
|
||||
|
||||
#[fatal]
|
||||
#[error("Retrieving response from Chain API unexpectedly failed with error: {0}")]
|
||||
ChainApi(#[from] ChainApiError),
|
||||
|
||||
#[error("Failed to get node features from the runtime")]
|
||||
FailedNodeFeatures(#[source] RuntimeApiError),
|
||||
|
||||
// av-store will drop the sender on any error that happens.
|
||||
#[error("Response channel to obtain chunk failed")]
|
||||
QueryChunkResponseChannel(#[source] oneshot::Canceled),
|
||||
|
||||
// av-store will drop the sender on any error that happens.
|
||||
#[error("Response channel to obtain available data failed")]
|
||||
QueryAvailableDataResponseChannel(#[source] oneshot::Canceled),
|
||||
|
||||
// We tried accessing a session that was not cached.
|
||||
#[error("Session {missing_session} is not cached, cached sessions: {available_sessions:?}.")]
|
||||
NoSuchCachedSession { available_sessions: Vec<SessionIndex>, missing_session: SessionIndex },
|
||||
|
||||
// Sending request response failed (Can happen on timeouts for example).
|
||||
#[error("Sending a request's response failed.")]
|
||||
SendResponse,
|
||||
|
||||
#[error("FetchPoV request error: {0}")]
|
||||
FetchPoV(#[source] RequestError),
|
||||
|
||||
#[error("Fetched PoV does not match expected hash")]
|
||||
UnexpectedPoV,
|
||||
|
||||
#[error("Remote responded with `NoSuchPoV`")]
|
||||
NoSuchPoV,
|
||||
|
||||
#[error("Given validator index could not be found in current session")]
|
||||
InvalidValidatorIndex,
|
||||
|
||||
#[error("Erasure coding error: {0}")]
|
||||
ErasureCoding(#[from] pezkuwi_erasure_coding::Error),
|
||||
}
|
||||
|
||||
/// General result abbreviation type alias.
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error. This utility function is meant to
|
||||
/// consume top-level errors by simply logging them
|
||||
pub fn log_error(
|
||||
result: Result<()>,
|
||||
ctx: &'static str,
|
||||
warn_freq: &mut gum::Freq,
|
||||
) -> std::result::Result<(), FatalError> {
|
||||
match result.into_nested()? {
|
||||
Ok(()) => Ok(()),
|
||||
Err(jfyi) => {
|
||||
match jfyi {
|
||||
JfyiError::UnexpectedPoV |
|
||||
JfyiError::InvalidValidatorIndex |
|
||||
JfyiError::NoSuchCachedSession { .. } |
|
||||
JfyiError::QueryAvailableDataResponseChannel(_) |
|
||||
JfyiError::QueryChunkResponseChannel(_) |
|
||||
JfyiError::FailedNodeFeatures(_) |
|
||||
JfyiError::ErasureCoding(_) => gum::warn!(target: LOG_TARGET, error = %jfyi, ctx),
|
||||
JfyiError::FetchPoV(_) |
|
||||
JfyiError::SendResponse |
|
||||
JfyiError::NoSuchPoV |
|
||||
JfyiError::Runtime(_) => {
|
||||
gum::warn_if_frequent!(freq: warn_freq, max_rate: gum::Times::PerHour(100), target: LOG_TARGET, error = ?jfyi, ctx)
|
||||
},
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,199 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use futures::{future::Either, FutureExt, StreamExt, TryFutureExt};
|
||||
|
||||
use sp_keystore::KeystorePtr;
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::{
|
||||
v1, v2, IncomingRequestReceiver, ReqProtocolNames,
|
||||
};
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::AvailabilityDistributionMessage, overseer, FromOrchestra, OverseerSignal,
|
||||
SpawnedSubsystem, SubsystemError,
|
||||
};
|
||||
|
||||
/// Error and [`Result`] type for this subsystem.
|
||||
mod error;
|
||||
use error::{log_error, FatalError, Result};
|
||||
|
||||
use pezkuwi_node_subsystem_util::runtime::RuntimeInfo;
|
||||
|
||||
/// `Requester` taking care of requesting chunks for candidates pending availability.
|
||||
mod requester;
|
||||
use requester::Requester;
|
||||
|
||||
/// Handing requests for PoVs during backing.
|
||||
mod pov_requester;
|
||||
|
||||
/// Responding to erasure chunk requests:
|
||||
mod responder;
|
||||
use responder::{run_chunk_receivers, run_pov_receiver};
|
||||
|
||||
mod metrics;
|
||||
/// Prometheus `Metrics` for availability distribution.
|
||||
pub use metrics::Metrics;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
const LOG_TARGET: &'static str = "teyrchain::availability-distribution";
|
||||
|
||||
/// The availability distribution subsystem.
|
||||
pub struct AvailabilityDistributionSubsystem {
|
||||
/// Easy and efficient runtime access for this subsystem.
|
||||
runtime: RuntimeInfo,
|
||||
/// Receivers to receive messages from.
|
||||
recvs: IncomingRequestReceivers,
|
||||
/// Mapping of the req-response protocols to the full protocol names.
|
||||
req_protocol_names: ReqProtocolNames,
|
||||
/// Prometheus metrics.
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
/// Receivers to be passed into availability distribution.
|
||||
pub struct IncomingRequestReceivers {
|
||||
/// Receiver for incoming PoV requests.
|
||||
pub pov_req_receiver: IncomingRequestReceiver<v1::PoVFetchingRequest>,
|
||||
/// Receiver for incoming v1 availability chunk requests.
|
||||
pub chunk_req_v1_receiver: IncomingRequestReceiver<v1::ChunkFetchingRequest>,
|
||||
/// Receiver for incoming v2 availability chunk requests.
|
||||
pub chunk_req_v2_receiver: IncomingRequestReceiver<v2::ChunkFetchingRequest>,
|
||||
}
|
||||
|
||||
#[overseer::subsystem(AvailabilityDistribution, error=SubsystemError, prefix=self::overseer)]
|
||||
impl<Context> AvailabilityDistributionSubsystem {
|
||||
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
||||
let future = self
|
||||
.run(ctx)
|
||||
.map_err(|e| SubsystemError::with_origin("availability-distribution", e))
|
||||
.boxed();
|
||||
|
||||
SpawnedSubsystem { name: "availability-distribution-subsystem", future }
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
|
||||
impl AvailabilityDistributionSubsystem {
|
||||
/// Create a new instance of the availability distribution.
|
||||
pub fn new(
|
||||
keystore: KeystorePtr,
|
||||
recvs: IncomingRequestReceivers,
|
||||
req_protocol_names: ReqProtocolNames,
|
||||
metrics: Metrics,
|
||||
) -> Self {
|
||||
let runtime = RuntimeInfo::new(Some(keystore));
|
||||
Self { runtime, recvs, req_protocol_names, metrics }
|
||||
}
|
||||
|
||||
/// Start processing work as passed on from the Overseer.
|
||||
async fn run<Context>(self, mut ctx: Context) -> std::result::Result<(), FatalError> {
|
||||
let Self { mut runtime, recvs, metrics, req_protocol_names } = self;
|
||||
|
||||
let IncomingRequestReceivers {
|
||||
pov_req_receiver,
|
||||
chunk_req_v1_receiver,
|
||||
chunk_req_v2_receiver,
|
||||
} = recvs;
|
||||
let mut requester = Requester::new(req_protocol_names, metrics.clone()).fuse();
|
||||
let mut warn_freq = gum::Freq::new();
|
||||
|
||||
{
|
||||
let sender = ctx.sender().clone();
|
||||
ctx.spawn(
|
||||
"pov-receiver",
|
||||
run_pov_receiver(sender.clone(), pov_req_receiver, metrics.clone()).boxed(),
|
||||
)
|
||||
.map_err(FatalError::SpawnTask)?;
|
||||
|
||||
ctx.spawn(
|
||||
"chunk-receiver",
|
||||
run_chunk_receivers(
|
||||
sender,
|
||||
chunk_req_v1_receiver,
|
||||
chunk_req_v2_receiver,
|
||||
metrics.clone(),
|
||||
)
|
||||
.boxed(),
|
||||
)
|
||||
.map_err(FatalError::SpawnTask)?;
|
||||
}
|
||||
|
||||
loop {
|
||||
let action = {
|
||||
let mut subsystem_next = ctx.recv().fuse();
|
||||
futures::select! {
|
||||
subsystem_msg = subsystem_next => Either::Left(subsystem_msg),
|
||||
from_task = requester.next() => Either::Right(from_task),
|
||||
}
|
||||
};
|
||||
|
||||
// Handle task messages sending:
|
||||
let message = match action {
|
||||
Either::Left(subsystem_msg) =>
|
||||
subsystem_msg.map_err(|e| FatalError::IncomingMessageChannel(e))?,
|
||||
Either::Right(from_task) => {
|
||||
let from_task = from_task.ok_or(FatalError::RequesterExhausted)?;
|
||||
ctx.send_message(from_task).await;
|
||||
continue;
|
||||
},
|
||||
};
|
||||
match message {
|
||||
FromOrchestra::Signal(OverseerSignal::ActiveLeaves(update)) => {
|
||||
log_error(
|
||||
requester
|
||||
.get_mut()
|
||||
.update_fetching_heads(&mut ctx, &mut runtime, update)
|
||||
.await,
|
||||
"Error in Requester::update_fetching_heads",
|
||||
&mut warn_freq,
|
||||
)?;
|
||||
},
|
||||
FromOrchestra::Signal(OverseerSignal::BlockFinalized(_hash, _finalized_number)) => {
|
||||
},
|
||||
FromOrchestra::Signal(OverseerSignal::Conclude) => return Ok(()),
|
||||
FromOrchestra::Communication {
|
||||
msg:
|
||||
AvailabilityDistributionMessage::FetchPoV {
|
||||
relay_parent,
|
||||
from_validator,
|
||||
para_id,
|
||||
candidate_hash,
|
||||
pov_hash,
|
||||
tx,
|
||||
},
|
||||
} => {
|
||||
log_error(
|
||||
pov_requester::fetch_pov(
|
||||
&mut ctx,
|
||||
&mut runtime,
|
||||
relay_parent,
|
||||
from_validator,
|
||||
para_id,
|
||||
candidate_hash,
|
||||
pov_hash,
|
||||
tx,
|
||||
metrics.clone(),
|
||||
)
|
||||
.await,
|
||||
"pov_requester::fetch_pov",
|
||||
&mut warn_freq,
|
||||
)?;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_subsystem_util::{
|
||||
metrics,
|
||||
metrics::{
|
||||
prometheus,
|
||||
prometheus::{Counter, CounterVec, Opts, PrometheusError, Registry, U64},
|
||||
},
|
||||
};
|
||||
|
||||
/// Label for success counters.
|
||||
pub const SUCCEEDED: &'static str = "succeeded";
|
||||
|
||||
/// Label for fail counters.
|
||||
pub const FAILED: &'static str = "failed";
|
||||
|
||||
/// Label for chunks/PoVs that could not be served, because they were not available.
|
||||
pub const NOT_FOUND: &'static str = "not-found";
|
||||
|
||||
/// Availability Distribution metrics.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
/// Number of chunks fetched.
|
||||
///
|
||||
/// Note: The failed count gets incremented, when we were not able to fetch the chunk at all.
|
||||
/// For times, where we failed downloading, but succeeded on the next try (with different
|
||||
/// backers), see `retries`.
|
||||
fetched_chunks: CounterVec<U64>,
|
||||
|
||||
/// Number of chunks served.
|
||||
served_chunks: CounterVec<U64>,
|
||||
|
||||
/// Number of received fetch PoV responses.
|
||||
fetched_povs: CounterVec<U64>,
|
||||
|
||||
/// Number of PoVs served.
|
||||
served_povs: CounterVec<U64>,
|
||||
|
||||
/// Number of times our first set of validators did not provide the needed chunk and we had to
|
||||
/// query further validators.
|
||||
retries: Counter<U64>,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
/// Create new dummy metrics, not reporting anything.
|
||||
pub fn new_dummy() -> Self {
|
||||
Metrics(None)
|
||||
}
|
||||
|
||||
/// Increment counter on fetched labels.
|
||||
pub fn on_fetch(&self, label: &'static str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.fetched_chunks.with_label_values(&[label]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment counter on served chunks.
|
||||
pub fn on_served_chunk(&self, label: &'static str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.served_chunks.with_label_values(&[label]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment counter on fetched PoVs.
|
||||
pub fn on_fetched_pov(&self, label: &'static str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.fetched_povs.with_label_values(&[label]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment counter on served PoVs.
|
||||
pub fn on_served_pov(&self, label: &'static str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.served_povs.with_label_values(&[label]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment retry counter.
|
||||
pub fn on_retry(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.retries.inc()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(registry: &Registry) -> Result<Self, PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
fetched_chunks: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_fetched_chunks_total",
|
||||
"Total number of fetched chunks.",
|
||||
),
|
||||
&["success"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
served_chunks: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_served_chunks_total",
|
||||
"Total number of chunks served by this backer.",
|
||||
),
|
||||
&["success"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
fetched_povs: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_fetched_povs_total",
|
||||
"Total number of povs fetches by this backer.",
|
||||
),
|
||||
&["success"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
served_povs: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_served_povs_total",
|
||||
"Total number of povs served by this backer.",
|
||||
),
|
||||
&["success"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
retries: prometheus::register(
|
||||
Counter::new(
|
||||
"pezkuwi_teyrchain_fetch_retries_total",
|
||||
"Number of times we did not succeed in fetching a chunk and needed to try more backers.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,239 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! PoV requester takes care of requesting PoVs from validators of a backing group.
|
||||
|
||||
use futures::{channel::oneshot, future::BoxFuture, FutureExt};
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::{
|
||||
outgoing::{RequestError, Requests},
|
||||
v1::{PoVFetchingRequest, PoVFetchingResponse},
|
||||
OutgoingRequest, Recipient,
|
||||
};
|
||||
use pezkuwi_node_primitives::PoV;
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::{IfDisconnected, NetworkBridgeTxMessage},
|
||||
overseer,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util::runtime::RuntimeInfo;
|
||||
use pezkuwi_primitives::{AuthorityDiscoveryId, CandidateHash, Hash, Id as ParaId, ValidatorIndex};
|
||||
|
||||
use crate::{
|
||||
error::{Error, FatalError, JfyiError, Result},
|
||||
metrics::{FAILED, NOT_FOUND, SUCCEEDED},
|
||||
Metrics, LOG_TARGET,
|
||||
};
|
||||
|
||||
/// Start background worker for taking care of fetching the requested `PoV` from the network.
|
||||
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
|
||||
pub async fn fetch_pov<Context>(
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
parent: Hash,
|
||||
from_validator: ValidatorIndex,
|
||||
para_id: ParaId,
|
||||
candidate_hash: CandidateHash,
|
||||
pov_hash: Hash,
|
||||
tx: oneshot::Sender<PoV>,
|
||||
metrics: Metrics,
|
||||
) -> Result<()> {
|
||||
let info = &runtime.get_session_info(ctx.sender(), parent).await?.session_info;
|
||||
let authority_id = info
|
||||
.discovery_keys
|
||||
.get(from_validator.0 as usize)
|
||||
.ok_or(JfyiError::InvalidValidatorIndex)?
|
||||
.clone();
|
||||
let (req, pending_response) = OutgoingRequest::new(
|
||||
Recipient::Authority(authority_id.clone()),
|
||||
PoVFetchingRequest { candidate_hash },
|
||||
);
|
||||
let full_req = Requests::PoVFetchingV1(req);
|
||||
|
||||
ctx.send_message(NetworkBridgeTxMessage::SendRequests(
|
||||
vec![full_req],
|
||||
IfDisconnected::ImmediateError,
|
||||
))
|
||||
.await;
|
||||
|
||||
ctx.spawn(
|
||||
"pov-fetcher",
|
||||
fetch_pov_job(para_id, pov_hash, authority_id, pending_response.boxed(), tx, metrics)
|
||||
.boxed(),
|
||||
)
|
||||
.map_err(|e| FatalError::SpawnTask(e))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Future to be spawned for taking care of handling reception and sending of PoV.
|
||||
async fn fetch_pov_job(
|
||||
para_id: ParaId,
|
||||
pov_hash: Hash,
|
||||
authority_id: AuthorityDiscoveryId,
|
||||
pending_response: BoxFuture<'static, std::result::Result<PoVFetchingResponse, RequestError>>,
|
||||
tx: oneshot::Sender<PoV>,
|
||||
metrics: Metrics,
|
||||
) {
|
||||
if let Err(err) = do_fetch_pov(pov_hash, pending_response, tx, metrics).await {
|
||||
gum::warn!(target: LOG_TARGET, ?err, ?para_id, ?pov_hash, ?authority_id, "fetch_pov_job");
|
||||
}
|
||||
}
|
||||
|
||||
/// Do the actual work of waiting for the response.
|
||||
async fn do_fetch_pov(
|
||||
pov_hash: Hash,
|
||||
pending_response: BoxFuture<'static, std::result::Result<PoVFetchingResponse, RequestError>>,
|
||||
tx: oneshot::Sender<PoV>,
|
||||
metrics: Metrics,
|
||||
) -> Result<()> {
|
||||
let response = pending_response.await.map_err(Error::FetchPoV);
|
||||
let pov = match response {
|
||||
Ok(PoVFetchingResponse::PoV(pov)) => pov,
|
||||
Ok(PoVFetchingResponse::NoSuchPoV) => {
|
||||
metrics.on_fetched_pov(NOT_FOUND);
|
||||
return Err(Error::NoSuchPoV);
|
||||
},
|
||||
Err(err) => {
|
||||
metrics.on_fetched_pov(FAILED);
|
||||
return Err(err);
|
||||
},
|
||||
};
|
||||
if pov.hash() == pov_hash {
|
||||
metrics.on_fetched_pov(SUCCEEDED);
|
||||
tx.send(pov).map_err(|_| Error::SendResponse)
|
||||
} else {
|
||||
metrics.on_fetched_pov(FAILED);
|
||||
Err(Error::UnexpectedPoV)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use assert_matches::assert_matches;
|
||||
use futures::{executor, future};
|
||||
|
||||
use codec::Encode;
|
||||
use sc_network::ProtocolName;
|
||||
use sp_core::testing::TaskExecutor;
|
||||
|
||||
use pezkuwi_node_primitives::BlockData;
|
||||
use pezkuwi_node_subsystem::messages::{
|
||||
AllMessages, AvailabilityDistributionMessage, RuntimeApiMessage, RuntimeApiRequest,
|
||||
};
|
||||
use pezkuwi_node_subsystem_test_helpers as test_helpers;
|
||||
use pezkuwi_primitives::{CandidateHash, ExecutorParams, Hash, NodeFeatures, ValidatorIndex};
|
||||
use test_helpers::mock::make_ferdie_keystore;
|
||||
|
||||
use super::*;
|
||||
use crate::{tests::mock::make_session_info, LOG_TARGET};
|
||||
|
||||
#[test]
|
||||
fn rejects_invalid_pov() {
|
||||
sp_tracing::try_init_simple();
|
||||
let pov = PoV { block_data: BlockData(vec![1, 2, 3, 4, 5, 6]) };
|
||||
test_run(Hash::default(), pov);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_valid_pov() {
|
||||
sp_tracing::try_init_simple();
|
||||
let pov = PoV { block_data: BlockData(vec![1, 2, 3, 4, 5, 6]) };
|
||||
test_run(pov.hash(), pov);
|
||||
}
|
||||
|
||||
fn test_run(pov_hash: Hash, pov: PoV) {
|
||||
let pool = TaskExecutor::new();
|
||||
let (mut context, mut virtual_overseer) =
|
||||
pezkuwi_node_subsystem_test_helpers::make_subsystem_context::<
|
||||
AvailabilityDistributionMessage,
|
||||
TaskExecutor,
|
||||
>(pool.clone());
|
||||
let keystore = make_ferdie_keystore();
|
||||
let mut runtime = pezkuwi_node_subsystem_util::runtime::RuntimeInfo::new(Some(keystore));
|
||||
|
||||
let (tx, rx) = oneshot::channel();
|
||||
let testee = async {
|
||||
fetch_pov(
|
||||
&mut context,
|
||||
&mut runtime,
|
||||
Hash::default(),
|
||||
ValidatorIndex(0),
|
||||
ParaId::default(),
|
||||
CandidateHash::default(),
|
||||
pov_hash,
|
||||
tx,
|
||||
Metrics::new_dummy(),
|
||||
)
|
||||
.await
|
||||
.expect("Should succeed");
|
||||
};
|
||||
|
||||
let tester = async move {
|
||||
loop {
|
||||
match virtual_overseer.recv().await {
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::SessionIndexForChild(tx),
|
||||
)) => {
|
||||
tx.send(Ok(0)).unwrap();
|
||||
},
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::SessionInfo(_, tx),
|
||||
)) => {
|
||||
tx.send(Ok(Some(make_session_info()))).unwrap();
|
||||
},
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::SessionExecutorParams(_, tx),
|
||||
)) => {
|
||||
tx.send(Ok(Some(ExecutorParams::default()))).unwrap();
|
||||
},
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::NodeFeatures(_, si_tx),
|
||||
)) => {
|
||||
si_tx.send(Ok(NodeFeatures::EMPTY)).unwrap();
|
||||
},
|
||||
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendRequests(
|
||||
mut reqs,
|
||||
_,
|
||||
)) => {
|
||||
let req = assert_matches!(
|
||||
reqs.pop(),
|
||||
Some(Requests::PoVFetchingV1(outgoing)) => {outgoing}
|
||||
);
|
||||
req.pending_response
|
||||
.send(Ok((
|
||||
PoVFetchingResponse::PoV(pov.clone()).encode(),
|
||||
ProtocolName::from(""),
|
||||
)))
|
||||
.unwrap();
|
||||
break;
|
||||
},
|
||||
msg => gum::debug!(target: LOG_TARGET, msg = ?msg, "Received msg"),
|
||||
}
|
||||
}
|
||||
if pov.hash() == pov_hash {
|
||||
assert_eq!(rx.await, Ok(pov));
|
||||
} else {
|
||||
assert_eq!(rx.await, Err(oneshot::Canceled));
|
||||
}
|
||||
};
|
||||
futures::pin_mut!(testee);
|
||||
futures::pin_mut!(tester);
|
||||
executor::block_on(future::join(testee, tester));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,560 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use futures::{
|
||||
channel::{mpsc, oneshot},
|
||||
future::select,
|
||||
FutureExt, SinkExt,
|
||||
};
|
||||
|
||||
use codec::Decode;
|
||||
use pezkuwi_erasure_coding::branch_hash;
|
||||
use pezkuwi_node_network_protocol::request_response::{
|
||||
outgoing::{OutgoingRequest, Recipient, RequestError, Requests},
|
||||
v1::{self, ChunkResponse},
|
||||
v2,
|
||||
};
|
||||
use pezkuwi_node_primitives::ErasureChunk;
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::{AvailabilityStoreMessage, IfDisconnected, NetworkBridgeTxMessage},
|
||||
overseer,
|
||||
};
|
||||
use pezkuwi_primitives::{
|
||||
AuthorityDiscoveryId, BlakeTwo256, CandidateHash, ChunkIndex, GroupIndex, Hash, HashT,
|
||||
OccupiedCore, SessionIndex,
|
||||
};
|
||||
use sc_network::ProtocolName;
|
||||
|
||||
use crate::{
|
||||
error::{FatalError, Result},
|
||||
metrics::{Metrics, FAILED, SUCCEEDED},
|
||||
requester::session_cache::{BadValidators, SessionInfo},
|
||||
LOG_TARGET,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// Configuration for a `FetchTask`
|
||||
///
|
||||
/// This exists to separate preparation of a `FetchTask` from actual starting it, which is
|
||||
/// beneficial as this allows as for taking session info by reference.
|
||||
pub struct FetchTaskConfig {
|
||||
prepared_running: Option<RunningTask>,
|
||||
live_in: HashSet<Hash>,
|
||||
}
|
||||
|
||||
/// Information about a task fetching an erasure chunk.
|
||||
pub struct FetchTask {
|
||||
/// For what relay parents this task is relevant.
|
||||
///
|
||||
/// In other words, for which relay chain parents this candidate is considered live.
|
||||
/// This is updated on every `ActiveLeavesUpdate` and enables us to know when we can safely
|
||||
/// stop keeping track of that candidate/chunk.
|
||||
pub(crate) live_in: HashSet<Hash>,
|
||||
|
||||
/// We keep the task around in until `live_in` becomes empty, to make
|
||||
/// sure we won't re-fetch an already fetched candidate.
|
||||
state: FetchedState,
|
||||
}
|
||||
|
||||
/// State of a particular candidate chunk fetching process.
|
||||
enum FetchedState {
|
||||
/// Chunk fetch has started.
|
||||
///
|
||||
/// Once the contained `Sender` is dropped, any still running task will be canceled.
|
||||
Started(oneshot::Sender<()>),
|
||||
/// All relevant `live_in` have been removed, before we were able to get our chunk.
|
||||
Canceled,
|
||||
}
|
||||
|
||||
/// Messages sent from `FetchTask`s to be handled/forwarded.
|
||||
pub enum FromFetchTask {
|
||||
/// Message to other subsystem.
|
||||
Message(overseer::AvailabilityDistributionOutgoingMessages),
|
||||
|
||||
/// Concluded with result.
|
||||
///
|
||||
/// In case of `None` everything was fine, in case of `Some`, some validators in the group
|
||||
/// did not serve us our chunk as expected.
|
||||
Concluded(Option<BadValidators>),
|
||||
|
||||
/// We were not able to fetch the desired chunk for the given `CandidateHash`.
|
||||
Failed(CandidateHash),
|
||||
}
|
||||
|
||||
/// Information a running task needs.
|
||||
struct RunningTask {
|
||||
/// For what session we have been spawned.
|
||||
session_index: SessionIndex,
|
||||
|
||||
/// Index of validator group to fetch the chunk from.
|
||||
///
|
||||
/// Needed for reporting bad validators.
|
||||
group_index: GroupIndex,
|
||||
|
||||
/// Validators to request the chunk from.
|
||||
///
|
||||
/// This vector gets drained during execution of the task (it will be empty afterwards).
|
||||
group: Vec<AuthorityDiscoveryId>,
|
||||
|
||||
/// The request to send. We can store it as either v1 or v2, they have the same payload.
|
||||
request: v2::ChunkFetchingRequest,
|
||||
|
||||
/// Root hash, for verifying the chunks validity.
|
||||
erasure_root: Hash,
|
||||
|
||||
/// Relay parent of the candidate to fetch.
|
||||
relay_parent: Hash,
|
||||
|
||||
/// Sender for communicating with other subsystems and reporting results.
|
||||
sender: mpsc::Sender<FromFetchTask>,
|
||||
|
||||
/// Prometheus metrics for reporting results.
|
||||
metrics: Metrics,
|
||||
|
||||
/// Expected chunk index. We'll validate that the remote did send us the correct chunk (only
|
||||
/// important for v2 requests).
|
||||
chunk_index: ChunkIndex,
|
||||
|
||||
/// Full protocol name for ChunkFetchingV1.
|
||||
req_v1_protocol_name: ProtocolName,
|
||||
|
||||
/// Full protocol name for ChunkFetchingV2.
|
||||
req_v2_protocol_name: ProtocolName,
|
||||
}
|
||||
|
||||
impl FetchTaskConfig {
|
||||
/// Create a new configuration for a [`FetchTask`].
|
||||
///
|
||||
/// The result of this function can be passed into [`FetchTask::start`].
|
||||
pub fn new(
|
||||
leaf: Hash,
|
||||
core: &OccupiedCore,
|
||||
sender: mpsc::Sender<FromFetchTask>,
|
||||
metrics: Metrics,
|
||||
session_info: &SessionInfo,
|
||||
chunk_index: ChunkIndex,
|
||||
req_v1_protocol_name: ProtocolName,
|
||||
req_v2_protocol_name: ProtocolName,
|
||||
) -> Self {
|
||||
let live_in = vec![leaf].into_iter().collect();
|
||||
|
||||
// Don't run tasks for our backing group:
|
||||
if session_info.our_group == Some(core.group_responsible) {
|
||||
return FetchTaskConfig { live_in, prepared_running: None };
|
||||
}
|
||||
|
||||
let prepared_running = RunningTask {
|
||||
session_index: session_info.session_index,
|
||||
group_index: core.group_responsible,
|
||||
group: session_info.validator_groups.get(core.group_responsible.0 as usize)
|
||||
.expect("The responsible group of a candidate should be available in the corresponding session. qed.")
|
||||
.clone(),
|
||||
request: v2::ChunkFetchingRequest {
|
||||
candidate_hash: core.candidate_hash,
|
||||
index: session_info.our_index,
|
||||
},
|
||||
erasure_root: core.candidate_descriptor.erasure_root(),
|
||||
relay_parent: core.candidate_descriptor.relay_parent(),
|
||||
metrics,
|
||||
sender,
|
||||
chunk_index,
|
||||
req_v1_protocol_name,
|
||||
req_v2_protocol_name
|
||||
};
|
||||
FetchTaskConfig { live_in, prepared_running: Some(prepared_running) }
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
|
||||
impl FetchTask {
|
||||
/// Start fetching a chunk.
|
||||
///
|
||||
/// A task handling the fetching of the configured chunk will be spawned.
|
||||
pub async fn start<Context>(config: FetchTaskConfig, ctx: &mut Context) -> Result<Self> {
|
||||
let FetchTaskConfig { prepared_running, live_in } = config;
|
||||
|
||||
if let Some(running) = prepared_running {
|
||||
let (handle, kill) = oneshot::channel();
|
||||
|
||||
ctx.spawn("chunk-fetcher", running.run(kill).boxed())
|
||||
.map_err(|e| FatalError::SpawnTask(e))?;
|
||||
|
||||
Ok(FetchTask { live_in, state: FetchedState::Started(handle) })
|
||||
} else {
|
||||
Ok(FetchTask { live_in, state: FetchedState::Canceled })
|
||||
}
|
||||
}
|
||||
|
||||
/// Add the given leaf to the relay parents which are making this task relevant.
|
||||
///
|
||||
/// This is for book keeping, so we know we are already fetching a given chunk.
|
||||
pub fn add_leaf(&mut self, leaf: Hash) {
|
||||
self.live_in.insert(leaf);
|
||||
}
|
||||
|
||||
/// Remove leaves and cancel the task, if it was the last one and the task has still been
|
||||
/// fetching.
|
||||
pub fn remove_leaves(&mut self, leaves: &HashSet<Hash>) {
|
||||
for leaf in leaves {
|
||||
self.live_in.remove(leaf);
|
||||
}
|
||||
if self.live_in.is_empty() && !self.is_finished() {
|
||||
self.state = FetchedState::Canceled
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether there are still relay parents around with this candidate pending
|
||||
/// availability.
|
||||
pub fn is_live(&self) -> bool {
|
||||
!self.live_in.is_empty()
|
||||
}
|
||||
|
||||
/// Whether this task can be considered finished.
|
||||
///
|
||||
/// That is, it is either canceled, succeeded or failed.
|
||||
pub fn is_finished(&self) -> bool {
|
||||
match &self.state {
|
||||
FetchedState::Canceled => true,
|
||||
FetchedState::Started(sender) => sender.is_canceled(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Things that can go wrong in task execution.
|
||||
#[derive(Debug)]
|
||||
enum TaskError {
|
||||
/// The peer failed to deliver a correct chunk for some reason (has been reported as
|
||||
/// appropriate).
|
||||
PeerError,
|
||||
/// This very node is seemingly shutting down (sending of message failed).
|
||||
ShuttingDown,
|
||||
}
|
||||
|
||||
impl RunningTask {
|
||||
async fn run(self, kill: oneshot::Receiver<()>) {
|
||||
// Wait for completion/or cancel.
|
||||
let run_it = self.run_inner();
|
||||
futures::pin_mut!(run_it);
|
||||
let _ = select(run_it, kill).await;
|
||||
}
|
||||
|
||||
/// Fetch and store chunk.
|
||||
///
|
||||
/// Try validators in backing group in order.
|
||||
async fn run_inner(mut self) {
|
||||
let mut bad_validators = Vec::new();
|
||||
let mut succeeded = false;
|
||||
let mut count: u32 = 0;
|
||||
let mut network_error_freq = gum::Freq::new();
|
||||
let mut canceled_freq = gum::Freq::new();
|
||||
// Try validators in reverse order:
|
||||
while let Some(validator) = self.group.pop() {
|
||||
// Report retries:
|
||||
if count > 0 {
|
||||
self.metrics.on_retry();
|
||||
}
|
||||
count += 1;
|
||||
|
||||
// Send request:
|
||||
let resp = match self
|
||||
.do_request(&validator, &mut network_error_freq, &mut canceled_freq)
|
||||
.await
|
||||
{
|
||||
Ok(resp) => resp,
|
||||
Err(TaskError::ShuttingDown) => {
|
||||
gum::info!(
|
||||
target: LOG_TARGET,
|
||||
"Node seems to be shutting down, canceling fetch task"
|
||||
);
|
||||
self.metrics.on_fetch(FAILED);
|
||||
return;
|
||||
},
|
||||
Err(TaskError::PeerError) => {
|
||||
bad_validators.push(validator);
|
||||
continue;
|
||||
},
|
||||
};
|
||||
|
||||
let chunk = match resp {
|
||||
Some(chunk) => chunk,
|
||||
None => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
validator = ?validator,
|
||||
relay_parent = ?self.relay_parent,
|
||||
group_index = ?self.group_index,
|
||||
session_index = ?self.session_index,
|
||||
chunk_index = ?self.request.index,
|
||||
candidate_hash = ?self.request.candidate_hash,
|
||||
"Validator did not have our chunk"
|
||||
);
|
||||
bad_validators.push(validator);
|
||||
continue;
|
||||
},
|
||||
};
|
||||
|
||||
// Data genuine?
|
||||
if !self.validate_chunk(&validator, &chunk, self.chunk_index) {
|
||||
bad_validators.push(validator);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Ok, let's store it and be happy:
|
||||
self.store_chunk(chunk).await;
|
||||
succeeded = true;
|
||||
break;
|
||||
}
|
||||
if succeeded {
|
||||
self.metrics.on_fetch(SUCCEEDED);
|
||||
self.conclude(bad_validators).await;
|
||||
} else {
|
||||
self.metrics.on_fetch(FAILED);
|
||||
self.conclude_fail().await
|
||||
}
|
||||
}
|
||||
|
||||
/// Do request and return response, if successful.
|
||||
async fn do_request(
|
||||
&mut self,
|
||||
validator: &AuthorityDiscoveryId,
|
||||
network_error_freq: &mut gum::Freq,
|
||||
canceled_freq: &mut gum::Freq,
|
||||
) -> std::result::Result<Option<ErasureChunk>, TaskError> {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
origin = ?validator,
|
||||
relay_parent = ?self.relay_parent,
|
||||
group_index = ?self.group_index,
|
||||
session_index = ?self.session_index,
|
||||
chunk_index = ?self.request.index,
|
||||
candidate_hash = ?self.request.candidate_hash,
|
||||
"Starting chunk request",
|
||||
);
|
||||
|
||||
let (full_request, response_recv) = OutgoingRequest::new_with_fallback(
|
||||
Recipient::Authority(validator.clone()),
|
||||
self.request,
|
||||
// Fallback to v1, for backwards compatibility.
|
||||
v1::ChunkFetchingRequest::from(self.request),
|
||||
);
|
||||
let requests = Requests::ChunkFetching(full_request);
|
||||
|
||||
self.sender
|
||||
.send(FromFetchTask::Message(
|
||||
NetworkBridgeTxMessage::SendRequests(
|
||||
vec![requests],
|
||||
IfDisconnected::ImmediateError,
|
||||
)
|
||||
.into(),
|
||||
))
|
||||
.await
|
||||
.map_err(|_| TaskError::ShuttingDown)?;
|
||||
|
||||
match response_recv.await {
|
||||
Ok((bytes, protocol)) => match protocol {
|
||||
_ if protocol == self.req_v2_protocol_name =>
|
||||
match v2::ChunkFetchingResponse::decode(&mut &bytes[..]) {
|
||||
Ok(chunk_response) => Ok(Option::<ErasureChunk>::from(chunk_response)),
|
||||
Err(e) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
origin = ?validator,
|
||||
relay_parent = ?self.relay_parent,
|
||||
group_index = ?self.group_index,
|
||||
session_index = ?self.session_index,
|
||||
chunk_index = ?self.request.index,
|
||||
candidate_hash = ?self.request.candidate_hash,
|
||||
err = ?e,
|
||||
"Peer sent us invalid erasure chunk data (v2)"
|
||||
);
|
||||
Err(TaskError::PeerError)
|
||||
},
|
||||
},
|
||||
_ if protocol == self.req_v1_protocol_name =>
|
||||
match v1::ChunkFetchingResponse::decode(&mut &bytes[..]) {
|
||||
Ok(chunk_response) => Ok(Option::<ChunkResponse>::from(chunk_response)
|
||||
.map(|c| c.recombine_into_chunk(&self.request.into()))),
|
||||
Err(e) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
origin = ?validator,
|
||||
relay_parent = ?self.relay_parent,
|
||||
group_index = ?self.group_index,
|
||||
session_index = ?self.session_index,
|
||||
chunk_index = ?self.request.index,
|
||||
candidate_hash = ?self.request.candidate_hash,
|
||||
err = ?e,
|
||||
"Peer sent us invalid erasure chunk data"
|
||||
);
|
||||
Err(TaskError::PeerError)
|
||||
},
|
||||
},
|
||||
_ => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
origin = ?validator,
|
||||
relay_parent = ?self.relay_parent,
|
||||
group_index = ?self.group_index,
|
||||
session_index = ?self.session_index,
|
||||
chunk_index = ?self.request.index,
|
||||
candidate_hash = ?self.request.candidate_hash,
|
||||
"Peer sent us invalid erasure chunk data - unknown protocol"
|
||||
);
|
||||
Err(TaskError::PeerError)
|
||||
},
|
||||
},
|
||||
Err(RequestError::InvalidResponse(err)) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
origin = ?validator,
|
||||
relay_parent = ?self.relay_parent,
|
||||
group_index = ?self.group_index,
|
||||
session_index = ?self.session_index,
|
||||
chunk_index = ?self.request.index,
|
||||
candidate_hash = ?self.request.candidate_hash,
|
||||
err = ?err,
|
||||
"Peer sent us invalid erasure chunk data"
|
||||
);
|
||||
Err(TaskError::PeerError)
|
||||
},
|
||||
Err(RequestError::NetworkError(err)) => {
|
||||
gum::warn_if_frequent!(
|
||||
freq: network_error_freq,
|
||||
max_rate: gum::Times::PerHour(100),
|
||||
target: LOG_TARGET,
|
||||
origin = ?validator,
|
||||
relay_parent = ?self.relay_parent,
|
||||
group_index = ?self.group_index,
|
||||
session_index = ?self.session_index,
|
||||
chunk_index = ?self.request.index,
|
||||
candidate_hash = ?self.request.candidate_hash,
|
||||
err = ?err,
|
||||
"Some network error occurred when fetching erasure chunk"
|
||||
);
|
||||
Err(TaskError::PeerError)
|
||||
},
|
||||
Err(RequestError::Canceled(oneshot::Canceled)) => {
|
||||
gum::warn_if_frequent!(
|
||||
freq: canceled_freq,
|
||||
max_rate: gum::Times::PerHour(100),
|
||||
target: LOG_TARGET,
|
||||
origin = ?validator,
|
||||
relay_parent = ?self.relay_parent,
|
||||
group_index = ?self.group_index,
|
||||
session_index = ?self.session_index,
|
||||
chunk_index = ?self.request.index,
|
||||
candidate_hash = ?self.request.candidate_hash,
|
||||
"Erasure chunk request got canceled"
|
||||
);
|
||||
Err(TaskError::PeerError)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_chunk(
|
||||
&self,
|
||||
validator: &AuthorityDiscoveryId,
|
||||
chunk: &ErasureChunk,
|
||||
expected_chunk_index: ChunkIndex,
|
||||
) -> bool {
|
||||
if chunk.index != expected_chunk_index {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.request.candidate_hash,
|
||||
origin = ?validator,
|
||||
chunk_index = ?chunk.index,
|
||||
expected_chunk_index = ?expected_chunk_index,
|
||||
"Validator sent the wrong chunk",
|
||||
);
|
||||
return false;
|
||||
}
|
||||
let anticipated_hash =
|
||||
match branch_hash(&self.erasure_root, chunk.proof(), chunk.index.0 as usize) {
|
||||
Ok(hash) => hash,
|
||||
Err(e) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.request.candidate_hash,
|
||||
origin = ?validator,
|
||||
error = ?e,
|
||||
"Failed to calculate chunk merkle proof",
|
||||
);
|
||||
return false;
|
||||
},
|
||||
};
|
||||
let erasure_chunk_hash = BlakeTwo256::hash(&chunk.chunk);
|
||||
if anticipated_hash != erasure_chunk_hash {
|
||||
gum::warn!(target: LOG_TARGET, origin = ?validator, "Received chunk does not match merkle tree");
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Store given chunk and log any error.
|
||||
async fn store_chunk(&mut self, chunk: ErasureChunk) {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
let r = self
|
||||
.sender
|
||||
.send(FromFetchTask::Message(
|
||||
AvailabilityStoreMessage::StoreChunk {
|
||||
candidate_hash: self.request.candidate_hash,
|
||||
chunk,
|
||||
validator_index: self.request.index,
|
||||
tx,
|
||||
}
|
||||
.into(),
|
||||
))
|
||||
.await;
|
||||
if let Err(err) = r {
|
||||
gum::error!(target: LOG_TARGET, err= ?err, "Storing erasure chunk failed, system shutting down?");
|
||||
}
|
||||
|
||||
if let Err(oneshot::Canceled) = rx.await {
|
||||
gum::error!(target: LOG_TARGET, "Storing erasure chunk failed");
|
||||
}
|
||||
}
|
||||
|
||||
/// Tell subsystem we are done.
|
||||
async fn conclude(&mut self, bad_validators: Vec<AuthorityDiscoveryId>) {
|
||||
let payload = if bad_validators.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(BadValidators {
|
||||
session_index: self.session_index,
|
||||
group_index: self.group_index,
|
||||
bad_validators,
|
||||
})
|
||||
};
|
||||
if let Err(err) = self.sender.send(FromFetchTask::Concluded(payload)).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
err= ?err,
|
||||
"Sending concluded message for task failed"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async fn conclude_fail(&mut self) {
|
||||
if let Err(err) = self.sender.send(FromFetchTask::Failed(self.request.candidate_hash)).await
|
||||
{
|
||||
gum::warn!(target: LOG_TARGET, ?err, "Sending `Failed` message for task failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,400 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use codec::Encode;
|
||||
|
||||
use futures::{
|
||||
channel::{mpsc, oneshot},
|
||||
executor, select,
|
||||
task::{noop_waker, Context, Poll},
|
||||
Future, FutureExt, StreamExt,
|
||||
};
|
||||
use rstest::rstest;
|
||||
|
||||
use sc_network::{self as network, ProtocolName};
|
||||
use sp_keyring::Sr25519Keyring;
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::{
|
||||
v1::{self, ChunkResponse},
|
||||
Protocol, Recipient, ReqProtocolNames,
|
||||
};
|
||||
use pezkuwi_node_primitives::{BlockData, PoV, Proof};
|
||||
use pezkuwi_node_subsystem::messages::AllMessages;
|
||||
use pezkuwi_primitives::{CandidateHash, ChunkIndex, ValidatorIndex};
|
||||
|
||||
use super::*;
|
||||
use crate::{metrics::Metrics, tests::mock::get_valid_chunk_data};
|
||||
|
||||
#[test]
|
||||
fn task_can_be_canceled() {
|
||||
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
|
||||
let (task, _rx) = get_test_running_task(&req_protocol_names, 0.into(), 0.into());
|
||||
let (handle, kill) = oneshot::channel();
|
||||
std::mem::drop(handle);
|
||||
let running_task = task.run(kill);
|
||||
futures::pin_mut!(running_task);
|
||||
let waker = noop_waker();
|
||||
let mut ctx = Context::from_waker(&waker);
|
||||
assert!(running_task.poll(&mut ctx) == Poll::Ready(()), "Task is immediately finished");
|
||||
}
|
||||
|
||||
/// Make sure task won't accept a chunk that has is invalid.
|
||||
#[rstest]
|
||||
#[case(Protocol::ChunkFetchingV1)]
|
||||
#[case(Protocol::ChunkFetchingV2)]
|
||||
fn task_does_not_accept_invalid_chunk(#[case] protocol: Protocol) {
|
||||
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
|
||||
let chunk_index = ChunkIndex(1);
|
||||
let validator_index = ValidatorIndex(0);
|
||||
let (mut task, rx) = get_test_running_task(&req_protocol_names, validator_index, chunk_index);
|
||||
let validators = vec![Sr25519Keyring::Alice.public().into()];
|
||||
task.group = validators;
|
||||
let protocol_name = req_protocol_names.get_name(protocol);
|
||||
let test = TestRun {
|
||||
chunk_responses: {
|
||||
[(
|
||||
Recipient::Authority(Sr25519Keyring::Alice.public().into()),
|
||||
get_response(
|
||||
protocol,
|
||||
protocol_name.clone(),
|
||||
Some((
|
||||
vec![1, 2, 3],
|
||||
Proof::try_from(vec![vec![9, 8, 2], vec![2, 3, 4]]).unwrap(),
|
||||
chunk_index,
|
||||
)),
|
||||
),
|
||||
)]
|
||||
.into_iter()
|
||||
.collect()
|
||||
},
|
||||
valid_chunks: HashSet::new(),
|
||||
req_protocol_names,
|
||||
};
|
||||
test.run(task, rx);
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(Protocol::ChunkFetchingV1)]
|
||||
#[case(Protocol::ChunkFetchingV2)]
|
||||
fn task_stores_valid_chunk(#[case] protocol: Protocol) {
|
||||
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
|
||||
// In order for protocol version 1 to work, the chunk index needs to be equal to the validator
|
||||
// index.
|
||||
let chunk_index = ChunkIndex(0);
|
||||
let validator_index =
|
||||
if protocol == Protocol::ChunkFetchingV1 { ValidatorIndex(0) } else { ValidatorIndex(1) };
|
||||
let (mut task, rx) = get_test_running_task(&req_protocol_names, validator_index, chunk_index);
|
||||
let validators = vec![Sr25519Keyring::Alice.public().into()];
|
||||
let pov = PoV { block_data: BlockData(vec![45, 46, 47]) };
|
||||
let (root_hash, chunk) = get_valid_chunk_data(pov, 10, chunk_index);
|
||||
task.erasure_root = root_hash;
|
||||
task.group = validators;
|
||||
let protocol_name = req_protocol_names.get_name(protocol);
|
||||
|
||||
let test = TestRun {
|
||||
chunk_responses: {
|
||||
[(
|
||||
Recipient::Authority(Sr25519Keyring::Alice.public().into()),
|
||||
get_response(
|
||||
protocol,
|
||||
protocol_name.clone(),
|
||||
Some((chunk.chunk.clone(), chunk.proof, chunk_index)),
|
||||
),
|
||||
)]
|
||||
.into_iter()
|
||||
.collect()
|
||||
},
|
||||
valid_chunks: [(chunk.chunk)].into_iter().collect(),
|
||||
req_protocol_names,
|
||||
};
|
||||
test.run(task, rx);
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(Protocol::ChunkFetchingV1)]
|
||||
#[case(Protocol::ChunkFetchingV2)]
|
||||
fn task_does_not_accept_wrongly_indexed_chunk(#[case] protocol: Protocol) {
|
||||
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
|
||||
// In order for protocol version 1 to work, the chunk index needs to be equal to the validator
|
||||
// index.
|
||||
let chunk_index = ChunkIndex(0);
|
||||
let validator_index =
|
||||
if protocol == Protocol::ChunkFetchingV1 { ValidatorIndex(0) } else { ValidatorIndex(1) };
|
||||
let (mut task, rx) = get_test_running_task(&req_protocol_names, validator_index, chunk_index);
|
||||
|
||||
let validators = vec![Sr25519Keyring::Alice.public().into()];
|
||||
let pov = PoV { block_data: BlockData(vec![45, 46, 47]) };
|
||||
let (_, other_chunk) = get_valid_chunk_data(pov.clone(), 10, ChunkIndex(3));
|
||||
let (root_hash, chunk) = get_valid_chunk_data(pov, 10, ChunkIndex(0));
|
||||
task.erasure_root = root_hash;
|
||||
task.request.index = chunk.index.into();
|
||||
task.group = validators;
|
||||
let protocol_name = req_protocol_names.get_name(protocol);
|
||||
|
||||
let test = TestRun {
|
||||
chunk_responses: {
|
||||
[(
|
||||
Recipient::Authority(Sr25519Keyring::Alice.public().into()),
|
||||
get_response(
|
||||
protocol,
|
||||
protocol_name.clone(),
|
||||
Some((other_chunk.chunk.clone(), chunk.proof, other_chunk.index)),
|
||||
),
|
||||
)]
|
||||
.into_iter()
|
||||
.collect()
|
||||
},
|
||||
valid_chunks: HashSet::new(),
|
||||
req_protocol_names,
|
||||
};
|
||||
test.run(task, rx);
|
||||
}
|
||||
|
||||
/// Task stores chunk, if there is at least one validator having a valid chunk.
|
||||
#[rstest]
|
||||
#[case(Protocol::ChunkFetchingV1)]
|
||||
#[case(Protocol::ChunkFetchingV2)]
|
||||
fn task_stores_valid_chunk_if_there_is_one(#[case] protocol: Protocol) {
|
||||
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
|
||||
// In order for protocol version 1 to work, the chunk index needs to be equal to the validator
|
||||
// index.
|
||||
let chunk_index = ChunkIndex(1);
|
||||
let validator_index =
|
||||
if protocol == Protocol::ChunkFetchingV1 { ValidatorIndex(1) } else { ValidatorIndex(2) };
|
||||
let (mut task, rx) = get_test_running_task(&req_protocol_names, validator_index, chunk_index);
|
||||
let pov = PoV { block_data: BlockData(vec![45, 46, 47]) };
|
||||
|
||||
let validators = [
|
||||
// Only Alice has valid chunk - should succeed, even though she is tried last.
|
||||
Sr25519Keyring::Alice,
|
||||
Sr25519Keyring::Bob,
|
||||
Sr25519Keyring::Charlie,
|
||||
Sr25519Keyring::Dave,
|
||||
Sr25519Keyring::Eve,
|
||||
]
|
||||
.iter()
|
||||
.map(|v| v.public().into())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let (root_hash, chunk) = get_valid_chunk_data(pov, 10, chunk_index);
|
||||
task.erasure_root = root_hash;
|
||||
task.group = validators;
|
||||
let protocol_name = req_protocol_names.get_name(protocol);
|
||||
|
||||
let test = TestRun {
|
||||
chunk_responses: {
|
||||
[
|
||||
(
|
||||
Recipient::Authority(Sr25519Keyring::Alice.public().into()),
|
||||
get_response(
|
||||
protocol,
|
||||
protocol_name.clone(),
|
||||
Some((chunk.chunk.clone(), chunk.proof, chunk_index)),
|
||||
),
|
||||
),
|
||||
(
|
||||
Recipient::Authority(Sr25519Keyring::Bob.public().into()),
|
||||
get_response(protocol, protocol_name.clone(), None),
|
||||
),
|
||||
(
|
||||
Recipient::Authority(Sr25519Keyring::Charlie.public().into()),
|
||||
get_response(
|
||||
protocol,
|
||||
protocol_name.clone(),
|
||||
Some((
|
||||
vec![1, 2, 3],
|
||||
Proof::try_from(vec![vec![9, 8, 2], vec![2, 3, 4]]).unwrap(),
|
||||
chunk_index,
|
||||
)),
|
||||
),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
},
|
||||
valid_chunks: [(chunk.chunk)].into_iter().collect(),
|
||||
req_protocol_names,
|
||||
};
|
||||
test.run(task, rx);
|
||||
}
|
||||
|
||||
struct TestRun {
|
||||
/// Response to deliver for a given validator index.
|
||||
/// None means, answer with `NetworkError`.
|
||||
chunk_responses: HashMap<Recipient, (Vec<u8>, ProtocolName)>,
|
||||
/// Set of chunks that should be considered valid:
|
||||
valid_chunks: HashSet<Vec<u8>>,
|
||||
/// Request protocol names
|
||||
req_protocol_names: ReqProtocolNames,
|
||||
}
|
||||
|
||||
impl TestRun {
|
||||
fn run(self, task: RunningTask, rx: mpsc::Receiver<FromFetchTask>) {
|
||||
sp_tracing::init_for_tests();
|
||||
let mut rx = rx.fuse();
|
||||
let task = task.run_inner().fuse();
|
||||
futures::pin_mut!(task);
|
||||
executor::block_on(async {
|
||||
let mut end_ok = false;
|
||||
loop {
|
||||
let msg = select!(
|
||||
from_task = rx.next() => {
|
||||
match from_task {
|
||||
Some(msg) => msg,
|
||||
None => break,
|
||||
}
|
||||
},
|
||||
() = task =>
|
||||
break,
|
||||
);
|
||||
match msg {
|
||||
FromFetchTask::Concluded(_) => break,
|
||||
FromFetchTask::Failed(_) => break,
|
||||
FromFetchTask::Message(msg) => end_ok = self.handle_message(msg).await,
|
||||
}
|
||||
}
|
||||
if !end_ok {
|
||||
panic!("Task ended prematurely (failed to store valid chunk)!");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Returns true, if after processing of the given message it would be OK for the stream to
|
||||
/// end.
|
||||
async fn handle_message(
|
||||
&self,
|
||||
msg: overseer::AvailabilityDistributionOutgoingMessages,
|
||||
) -> bool {
|
||||
let msg = AllMessages::from(msg);
|
||||
match msg {
|
||||
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendRequests(
|
||||
reqs,
|
||||
IfDisconnected::ImmediateError,
|
||||
)) => {
|
||||
let mut valid_responses = 0;
|
||||
for req in reqs {
|
||||
let req = match req {
|
||||
Requests::ChunkFetching(req) => req,
|
||||
_ => panic!("Unexpected request"),
|
||||
};
|
||||
let response =
|
||||
self.chunk_responses.get(&req.peer).ok_or(network::RequestFailure::Refused);
|
||||
|
||||
if let Ok((resp, protocol)) = response {
|
||||
let chunk = if protocol ==
|
||||
&self.req_protocol_names.get_name(Protocol::ChunkFetchingV1)
|
||||
{
|
||||
Into::<Option<v1::ChunkResponse>>::into(
|
||||
v1::ChunkFetchingResponse::decode(&mut &resp[..]).unwrap(),
|
||||
)
|
||||
.map(|c| c.chunk)
|
||||
} else if protocol ==
|
||||
&self.req_protocol_names.get_name(Protocol::ChunkFetchingV2)
|
||||
{
|
||||
Into::<Option<ErasureChunk>>::into(
|
||||
v2::ChunkFetchingResponse::decode(&mut &resp[..]).unwrap(),
|
||||
)
|
||||
.map(|c| c.chunk)
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
if let Some(chunk) = chunk {
|
||||
if self.valid_chunks.contains(&chunk) {
|
||||
valid_responses += 1;
|
||||
}
|
||||
}
|
||||
|
||||
req.pending_response
|
||||
.send(response.cloned())
|
||||
.expect("Sending response should succeed");
|
||||
}
|
||||
}
|
||||
return (valid_responses == 0) && self.valid_chunks.is_empty();
|
||||
},
|
||||
AllMessages::AvailabilityStore(AvailabilityStoreMessage::StoreChunk {
|
||||
chunk,
|
||||
tx,
|
||||
..
|
||||
}) => {
|
||||
assert!(self.valid_chunks.contains(&chunk.chunk));
|
||||
tx.send(Ok(())).expect("Answering fetching task should work");
|
||||
return true;
|
||||
},
|
||||
_ => {
|
||||
gum::debug!(target: LOG_TARGET, "Unexpected message");
|
||||
return false;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a `RunningTask` filled with (mostly) dummy values.
|
||||
fn get_test_running_task(
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
validator_index: ValidatorIndex,
|
||||
chunk_index: ChunkIndex,
|
||||
) -> (RunningTask, mpsc::Receiver<FromFetchTask>) {
|
||||
let (tx, rx) = mpsc::channel(0);
|
||||
|
||||
(
|
||||
RunningTask {
|
||||
session_index: 0,
|
||||
group_index: GroupIndex(0),
|
||||
group: Vec::new(),
|
||||
request: v2::ChunkFetchingRequest {
|
||||
candidate_hash: CandidateHash([43u8; 32].into()),
|
||||
index: validator_index,
|
||||
},
|
||||
erasure_root: Hash::repeat_byte(99),
|
||||
relay_parent: Hash::repeat_byte(71),
|
||||
sender: tx,
|
||||
metrics: Metrics::new_dummy(),
|
||||
req_v1_protocol_name: req_protocol_names.get_name(Protocol::ChunkFetchingV1),
|
||||
req_v2_protocol_name: req_protocol_names.get_name(Protocol::ChunkFetchingV2),
|
||||
chunk_index,
|
||||
},
|
||||
rx,
|
||||
)
|
||||
}
|
||||
|
||||
/// Make a versioned ChunkFetchingResponse.
|
||||
fn get_response(
|
||||
protocol: Protocol,
|
||||
protocol_name: ProtocolName,
|
||||
chunk: Option<(Vec<u8>, Proof, ChunkIndex)>,
|
||||
) -> (Vec<u8>, ProtocolName) {
|
||||
(
|
||||
match protocol {
|
||||
Protocol::ChunkFetchingV1 => if let Some((chunk, proof, _)) = chunk {
|
||||
v1::ChunkFetchingResponse::Chunk(ChunkResponse { chunk, proof })
|
||||
} else {
|
||||
v1::ChunkFetchingResponse::NoSuchChunk
|
||||
}
|
||||
.encode(),
|
||||
Protocol::ChunkFetchingV2 => if let Some((chunk, proof, index)) = chunk {
|
||||
v2::ChunkFetchingResponse::Chunk(ErasureChunk { chunk, index, proof })
|
||||
} else {
|
||||
v2::ChunkFetchingResponse::NoSuchChunk
|
||||
}
|
||||
.encode(),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
protocol_name,
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,349 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Requester takes care of requesting erasure chunks for candidates that are pending
|
||||
//! availability.
|
||||
|
||||
use std::{
|
||||
collections::{hash_map::HashMap, hash_set::HashSet},
|
||||
iter::IntoIterator,
|
||||
pin::Pin,
|
||||
};
|
||||
|
||||
use futures::{
|
||||
channel::{mpsc, oneshot},
|
||||
task::{Context, Poll},
|
||||
Stream,
|
||||
};
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::{v1, v2, IsRequest, ReqProtocolNames};
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::{ChainApiMessage, RuntimeApiMessage},
|
||||
overseer, ActivatedLeaf, ActiveLeavesUpdate,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util::{
|
||||
availability_chunks::availability_chunk_index,
|
||||
runtime::{get_occupied_cores, RuntimeInfo},
|
||||
};
|
||||
use pezkuwi_primitives::{CandidateHash, CoreIndex, Hash, OccupiedCore, SessionIndex};
|
||||
|
||||
use super::{FatalError, Metrics, Result, LOG_TARGET};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// Cache for session information.
|
||||
mod session_cache;
|
||||
use session_cache::SessionCache;
|
||||
|
||||
/// A task fetching a particular chunk.
|
||||
mod fetch_task;
|
||||
use fetch_task::{FetchTask, FetchTaskConfig, FromFetchTask};
|
||||
|
||||
/// Requester takes care of requesting erasure chunks from backing groups and stores them in the
|
||||
/// av store.
|
||||
///
|
||||
/// It implements a stream that needs to be advanced for it making progress.
|
||||
pub struct Requester {
|
||||
/// Candidates we need to fetch our chunk for.
|
||||
///
|
||||
/// We keep those around as long as a candidate is pending availability on some leaf, so we
|
||||
/// won't fetch chunks multiple times.
|
||||
///
|
||||
/// We remove them on failure, so we get retries on the next block still pending availability.
|
||||
fetches: HashMap<CandidateHash, FetchTask>,
|
||||
|
||||
/// Localized information about sessions we are currently interested in.
|
||||
session_cache: SessionCache,
|
||||
|
||||
/// Sender to be cloned for `FetchTask`s.
|
||||
tx: mpsc::Sender<FromFetchTask>,
|
||||
|
||||
/// Receive messages from `FetchTask`.
|
||||
rx: mpsc::Receiver<FromFetchTask>,
|
||||
|
||||
/// Prometheus Metrics
|
||||
metrics: Metrics,
|
||||
|
||||
/// Mapping of the req-response protocols to the full protocol names.
|
||||
req_protocol_names: ReqProtocolNames,
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
|
||||
impl Requester {
|
||||
/// How many ancestors of the leaf should we consider along with it.
|
||||
pub(crate) const LEAF_ANCESTRY_LEN_WITHIN_SESSION: usize = 3;
|
||||
|
||||
/// Create a new `Requester`.
|
||||
///
|
||||
/// You must feed it with `ActiveLeavesUpdate` via `update_fetching_heads` and make it progress
|
||||
/// by advancing the stream.
|
||||
pub fn new(req_protocol_names: ReqProtocolNames, metrics: Metrics) -> Self {
|
||||
let (tx, rx) = mpsc::channel(1);
|
||||
Requester {
|
||||
fetches: HashMap::new(),
|
||||
session_cache: SessionCache::new(),
|
||||
tx,
|
||||
rx,
|
||||
metrics,
|
||||
req_protocol_names,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update heads that need availability distribution.
|
||||
///
|
||||
/// For all active heads we will be fetching our chunks for availability distribution.
|
||||
pub async fn update_fetching_heads<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
update: ActiveLeavesUpdate,
|
||||
) -> Result<()> {
|
||||
gum::trace!(target: LOG_TARGET, ?update, "Update fetching heads");
|
||||
let ActiveLeavesUpdate { activated, deactivated } = update;
|
||||
if let Some(leaf) = activated {
|
||||
// Order important! We need to handle activated, prior to deactivated, otherwise we
|
||||
// might cancel still needed jobs.
|
||||
self.start_requesting_chunks(ctx, runtime, leaf).await?;
|
||||
}
|
||||
|
||||
self.stop_requesting_chunks(deactivated.into_iter());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start requesting chunks for newly imported head.
|
||||
///
|
||||
/// This will also request [`SESSION_ANCESTRY_LEN`] leaf ancestors from the same session
|
||||
/// and start requesting chunks for them too.
|
||||
async fn start_requesting_chunks<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
new_head: ActivatedLeaf,
|
||||
) -> Result<()> {
|
||||
let sender = &mut ctx.sender().clone();
|
||||
let ActivatedLeaf { hash: leaf, .. } = new_head;
|
||||
let (leaf_session_index, ancestors_in_session) = get_block_ancestors_in_same_session(
|
||||
sender,
|
||||
runtime,
|
||||
leaf,
|
||||
Self::LEAF_ANCESTRY_LEN_WITHIN_SESSION,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Also spawn or bump tasks for candidates in ancestry in the same session.
|
||||
for hash in std::iter::once(leaf).chain(ancestors_in_session) {
|
||||
let cores = get_occupied_cores(sender, hash).await?;
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
occupied_cores = ?cores,
|
||||
"Query occupied core"
|
||||
);
|
||||
// Important:
|
||||
// We mark the whole ancestry as live in the **leaf** hash, so we don't need to track
|
||||
// any tasks separately.
|
||||
//
|
||||
// The next time the subsystem receives leaf update, some of spawned task will be bumped
|
||||
// to be live in fresh relay parent, while some might get dropped due to the current
|
||||
// leaf being deactivated.
|
||||
self.add_cores(ctx, runtime, leaf, leaf_session_index, cores).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop requesting chunks for obsolete heads.
|
||||
fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator<Item = Hash>) {
|
||||
let obsolete_leaves: HashSet<_> = obsolete_leaves.collect();
|
||||
self.fetches.retain(|_, task| {
|
||||
task.remove_leaves(&obsolete_leaves);
|
||||
task.is_live()
|
||||
})
|
||||
}
|
||||
|
||||
/// Add candidates corresponding for a particular relay parent.
|
||||
///
|
||||
/// Starting requests where necessary.
|
||||
///
|
||||
/// Note: The passed in `leaf` is not the same as `CandidateDescriptor::relay_parent` in the
|
||||
/// given cores. The latter is the `relay_parent` this candidate considers its parent, while the
|
||||
/// passed in leaf might be some later block where the candidate is still pending availability.
|
||||
async fn add_cores<Context>(
|
||||
&mut self,
|
||||
context: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
leaf: Hash,
|
||||
leaf_session_index: SessionIndex,
|
||||
cores: impl IntoIterator<Item = (CoreIndex, OccupiedCore)>,
|
||||
) -> Result<()> {
|
||||
for (core_index, core) in cores {
|
||||
if let Some(e) = self.fetches.get_mut(&core.candidate_hash) {
|
||||
// Just book keeping - we are already requesting that chunk:
|
||||
e.add_leaf(leaf);
|
||||
} else {
|
||||
let tx = self.tx.clone();
|
||||
let metrics = self.metrics.clone();
|
||||
|
||||
let session_info = self
|
||||
.session_cache
|
||||
.get_session_info(
|
||||
context,
|
||||
runtime,
|
||||
// We use leaf here, the relay_parent must be in the same session as
|
||||
// the leaf. This is guaranteed by runtime which ensures that cores are
|
||||
// cleared at session boundaries. At the same time, only leaves are
|
||||
// guaranteed to be fetchable by the state trie.
|
||||
leaf,
|
||||
leaf_session_index,
|
||||
)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
error = ?err,
|
||||
"Failed to spawn a fetch task"
|
||||
);
|
||||
err
|
||||
})?;
|
||||
|
||||
if let Some(session_info) = session_info {
|
||||
let n_validators =
|
||||
session_info.validator_groups.iter().fold(0usize, |mut acc, group| {
|
||||
acc = acc.saturating_add(group.len());
|
||||
acc
|
||||
});
|
||||
let chunk_index = availability_chunk_index(
|
||||
session_info.node_features.as_ref(),
|
||||
n_validators,
|
||||
core_index,
|
||||
session_info.our_index,
|
||||
)?;
|
||||
|
||||
let task_cfg = FetchTaskConfig::new(
|
||||
leaf,
|
||||
&core,
|
||||
tx,
|
||||
metrics,
|
||||
session_info,
|
||||
chunk_index,
|
||||
self.req_protocol_names.get_name(v1::ChunkFetchingRequest::PROTOCOL),
|
||||
self.req_protocol_names.get_name(v2::ChunkFetchingRequest::PROTOCOL),
|
||||
);
|
||||
|
||||
self.fetches
|
||||
.insert(core.candidate_hash, FetchTask::start(task_cfg, context).await?);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for Requester {
|
||||
type Item = overseer::AvailabilityDistributionOutgoingMessages;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
loop {
|
||||
match Pin::new(&mut self.rx).poll_next(ctx) {
|
||||
Poll::Ready(Some(FromFetchTask::Message(m))) => return Poll::Ready(Some(m)),
|
||||
Poll::Ready(Some(FromFetchTask::Concluded(Some(bad_boys)))) => {
|
||||
self.session_cache.report_bad_log(bad_boys);
|
||||
continue;
|
||||
},
|
||||
Poll::Ready(Some(FromFetchTask::Concluded(None))) => continue,
|
||||
Poll::Ready(Some(FromFetchTask::Failed(candidate_hash))) => {
|
||||
// Make sure we retry on next block still pending availability.
|
||||
self.fetches.remove(&candidate_hash);
|
||||
},
|
||||
Poll::Ready(None) => return Poll::Ready(None),
|
||||
Poll::Pending => return Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Requests up to `limit` ancestor hashes of relay parent in the same session.
|
||||
///
|
||||
/// Also returns session index of the `head`.
|
||||
async fn get_block_ancestors_in_same_session<Sender>(
|
||||
sender: &mut Sender,
|
||||
runtime: &mut RuntimeInfo,
|
||||
head: Hash,
|
||||
limit: usize,
|
||||
) -> Result<(SessionIndex, Vec<Hash>)>
|
||||
where
|
||||
Sender:
|
||||
overseer::SubsystemSender<RuntimeApiMessage> + overseer::SubsystemSender<ChainApiMessage>,
|
||||
{
|
||||
// The order is parent, grandparent, ...
|
||||
//
|
||||
// `limit + 1` since a session index for the last element in ancestry
|
||||
// is obtained through its parent. It always gets truncated because
|
||||
// `session_ancestry_len` can only be incremented `ancestors.len() - 1` times.
|
||||
let mut ancestors = get_block_ancestors(sender, head, limit + 1).await?;
|
||||
let mut ancestors_iter = ancestors.iter();
|
||||
|
||||
// `head` is the child of the first block in `ancestors`, request its session index.
|
||||
let head_session_index = match ancestors_iter.next() {
|
||||
Some(parent) => runtime.get_session_index_for_child(sender, *parent).await?,
|
||||
None => {
|
||||
// No first element, i.e. empty.
|
||||
return Ok((0, ancestors));
|
||||
},
|
||||
};
|
||||
|
||||
let mut session_ancestry_len = 0;
|
||||
// The first parent is skipped.
|
||||
for parent in ancestors_iter {
|
||||
// Parent is the i-th ancestor, request session index for its child -- (i-1)th element.
|
||||
let session_index = runtime.get_session_index_for_child(sender, *parent).await?;
|
||||
if session_index == head_session_index {
|
||||
session_ancestry_len += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Drop the rest.
|
||||
ancestors.truncate(session_ancestry_len);
|
||||
|
||||
Ok((head_session_index, ancestors))
|
||||
}
|
||||
|
||||
/// Request up to `limit` ancestor hashes of relay parent from the Chain API.
|
||||
async fn get_block_ancestors<Sender>(
|
||||
sender: &mut Sender,
|
||||
relay_parent: Hash,
|
||||
limit: usize,
|
||||
) -> Result<Vec<Hash>>
|
||||
where
|
||||
Sender: overseer::SubsystemSender<ChainApiMessage>,
|
||||
{
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender
|
||||
.send_message(ChainApiMessage::Ancestors {
|
||||
hash: relay_parent,
|
||||
k: limit,
|
||||
response_channel: tx,
|
||||
})
|
||||
.await;
|
||||
|
||||
let ancestors = rx
|
||||
.await
|
||||
.map_err(FatalError::ChainApiSenderDropped)?
|
||||
.map_err(FatalError::ChainApi)?;
|
||||
Ok(ancestors)
|
||||
}
|
||||
@@ -0,0 +1,221 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use rand::{seq::SliceRandom, thread_rng};
|
||||
use schnellru::{ByLength, LruMap};
|
||||
|
||||
use pezkuwi_node_subsystem::overseer;
|
||||
use pezkuwi_node_subsystem_util::{request_node_features, runtime::RuntimeInfo};
|
||||
use pezkuwi_primitives::{
|
||||
AuthorityDiscoveryId, GroupIndex, Hash, NodeFeatures, SessionIndex, ValidatorIndex,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
error::{Error, Result},
|
||||
LOG_TARGET,
|
||||
};
|
||||
|
||||
/// Caching of session info as needed by availability chunk distribution.
|
||||
///
|
||||
/// It should be ensured that a cached session stays live in the cache as long as we might need it.
|
||||
pub struct SessionCache {
|
||||
/// Look up cached sessions by `SessionIndex`.
|
||||
///
|
||||
/// Note: Performance of fetching is really secondary here, but we need to ensure we are going
|
||||
/// to get any existing cache entry, before fetching new information, as we should not mess up
|
||||
/// the order of validators in `SessionInfo::validator_groups`.
|
||||
session_info_cache: LruMap<SessionIndex, SessionInfo>,
|
||||
}
|
||||
|
||||
/// Localized session information, tailored for the needs of availability distribution.
|
||||
#[derive(Clone)]
|
||||
pub struct SessionInfo {
|
||||
/// The index of this session.
|
||||
pub session_index: SessionIndex,
|
||||
|
||||
/// Validator groups of the current session.
|
||||
///
|
||||
/// Each group's order is randomized. This way we achieve load balancing when requesting
|
||||
/// chunks, as the validators in a group will be tried in that randomized order. Each node
|
||||
/// should arrive at a different order, therefore we distribute the load on individual
|
||||
/// validators.
|
||||
pub validator_groups: Vec<Vec<AuthorityDiscoveryId>>,
|
||||
|
||||
/// Information about ourselves:
|
||||
pub our_index: ValidatorIndex,
|
||||
|
||||
/// Remember to which group we belong, so we won't start fetching chunks for candidates with
|
||||
/// our group being responsible. (We should have that chunk already.)
|
||||
///
|
||||
/// `None`, if we are not in fact part of any group.
|
||||
pub our_group: Option<GroupIndex>,
|
||||
|
||||
/// Node features.
|
||||
pub node_features: NodeFeatures,
|
||||
}
|
||||
|
||||
/// Report of bad validators.
|
||||
///
|
||||
/// Fetching tasks will report back validators that did not respond as expected, so we can re-order
|
||||
/// them.
|
||||
pub struct BadValidators {
|
||||
/// The session index that was used.
|
||||
pub session_index: SessionIndex,
|
||||
/// The group, the not properly responding validators belong to.
|
||||
pub group_index: GroupIndex,
|
||||
/// The list of bad validators.
|
||||
pub bad_validators: Vec<AuthorityDiscoveryId>,
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
|
||||
impl SessionCache {
|
||||
/// Create a new `SessionCache`.
|
||||
pub fn new() -> Self {
|
||||
SessionCache {
|
||||
// We need to cache the current and the last session the most:
|
||||
session_info_cache: LruMap::new(ByLength::new(2)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Tries to retrieve `SessionInfo`.
|
||||
/// If this node is not a validator, the function will return `None`.
|
||||
pub async fn get_session_info<'a, Context>(
|
||||
&'a mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
parent: Hash,
|
||||
session_index: SessionIndex,
|
||||
) -> Result<Option<&'a SessionInfo>> {
|
||||
gum::trace!(target: LOG_TARGET, session_index, "Calling `get_session_info`");
|
||||
|
||||
if self.session_info_cache.get(&session_index).is_none() {
|
||||
if let Some(info) =
|
||||
Self::query_info_from_runtime(ctx, runtime, parent, session_index).await?
|
||||
{
|
||||
gum::trace!(target: LOG_TARGET, session_index, "Storing session info in lru!");
|
||||
self.session_info_cache.insert(session_index, info);
|
||||
} else {
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(self.session_info_cache.get(&session_index).map(|i| &*i))
|
||||
}
|
||||
|
||||
/// Variant of `report_bad` that never fails, but just logs errors.
|
||||
///
|
||||
/// Not being able to report bad validators is not fatal, so we should not shutdown the
|
||||
/// subsystem on this.
|
||||
pub fn report_bad_log(&mut self, report: BadValidators) {
|
||||
if let Err(err) = self.report_bad(report) {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
err = ?err,
|
||||
"Reporting bad validators failed with error"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Make sure we try unresponsive or misbehaving validators last.
|
||||
///
|
||||
/// We assume validators in a group are tried in reverse order, so the reported bad validators
|
||||
/// will be put at the beginning of the group.
|
||||
pub fn report_bad(&mut self, report: BadValidators) -> Result<()> {
|
||||
let available_sessions = self.session_info_cache.iter().map(|(k, _)| *k).collect();
|
||||
let session = self.session_info_cache.get(&report.session_index).ok_or(
|
||||
Error::NoSuchCachedSession {
|
||||
available_sessions,
|
||||
missing_session: report.session_index,
|
||||
},
|
||||
)?;
|
||||
let group = session.validator_groups.get_mut(report.group_index.0 as usize).expect(
|
||||
"A bad validator report must contain a valid group for the reported session. qed.",
|
||||
);
|
||||
let bad_set = report.bad_validators.iter().collect::<HashSet<_>>();
|
||||
|
||||
// Get rid of bad boys:
|
||||
group.retain(|v| !bad_set.contains(v));
|
||||
|
||||
// We are trying validators in reverse order, so bad ones should be first:
|
||||
let mut new_group = report.bad_validators;
|
||||
new_group.append(group);
|
||||
*group = new_group;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Query needed information from runtime.
|
||||
///
|
||||
/// We need to pass in the relay parent for our call to `request_session_info`. We should
|
||||
/// actually don't need that: I suppose it is used for internal caching based on relay parents,
|
||||
/// which we don't use here. It should not do any harm though.
|
||||
///
|
||||
/// Returns: `None` if not a validator.
|
||||
async fn query_info_from_runtime<Context>(
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
relay_parent: Hash,
|
||||
session_index: SessionIndex,
|
||||
) -> Result<Option<SessionInfo>> {
|
||||
let info = runtime
|
||||
.get_session_info_by_index(ctx.sender(), relay_parent, session_index)
|
||||
.await?;
|
||||
|
||||
let node_features = request_node_features(relay_parent, session_index, ctx.sender())
|
||||
.await
|
||||
.await?
|
||||
.map_err(Error::FailedNodeFeatures)?;
|
||||
|
||||
let discovery_keys = info.session_info.discovery_keys.clone();
|
||||
let mut validator_groups = info.session_info.validator_groups.clone();
|
||||
|
||||
if let Some(our_index) = info.validator_info.our_index {
|
||||
// Get our group index:
|
||||
let our_group = info.validator_info.our_group;
|
||||
|
||||
// Shuffle validators in groups:
|
||||
let mut rng = thread_rng();
|
||||
for g in validator_groups.iter_mut() {
|
||||
g.shuffle(&mut rng)
|
||||
}
|
||||
// Look up `AuthorityDiscoveryId`s right away:
|
||||
let validator_groups: Vec<Vec<_>> = validator_groups
|
||||
.into_iter()
|
||||
.map(|group| {
|
||||
group
|
||||
.into_iter()
|
||||
.map(|index| {
|
||||
discovery_keys.get(index.0 as usize)
|
||||
.expect("There should be a discovery key for each validator of each validator group. qed.")
|
||||
.clone()
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let info = SessionInfo {
|
||||
validator_groups,
|
||||
our_index,
|
||||
session_index,
|
||||
our_group,
|
||||
node_features,
|
||||
};
|
||||
return Ok(Some(info));
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,322 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use futures::FutureExt;
|
||||
use std::future::Future;
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::ReqProtocolNames;
|
||||
use pezkuwi_node_primitives::{BlockData, ErasureChunk, PoV};
|
||||
use pezkuwi_node_subsystem_util::runtime::RuntimeInfo;
|
||||
use pezkuwi_primitives::{
|
||||
BlockNumber, ChunkIndex, CoreState, ExecutorParams, GroupIndex, Hash, Id as ParaId,
|
||||
ScheduledCore, SessionIndex, SessionInfo,
|
||||
};
|
||||
use sp_core::{testing::TaskExecutor, traits::SpawnNamed};
|
||||
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::{
|
||||
AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage,
|
||||
NetworkBridgeTxMessage, RuntimeApiMessage, RuntimeApiRequest,
|
||||
},
|
||||
ActiveLeavesUpdate, SpawnGlue,
|
||||
};
|
||||
use pezkuwi_node_subsystem_test_helpers::{
|
||||
make_subsystem_context,
|
||||
mock::{make_ferdie_keystore, new_leaf},
|
||||
TestSubsystemContext, TestSubsystemContextHandle,
|
||||
};
|
||||
|
||||
use crate::tests::{
|
||||
mock::{get_valid_chunk_data, make_session_info, OccupiedCoreBuilder},
|
||||
node_features_with_mapping_enabled,
|
||||
};
|
||||
|
||||
use super::Requester;
|
||||
|
||||
fn get_erasure_chunk() -> ErasureChunk {
|
||||
let pov = PoV { block_data: BlockData(vec![45, 46, 47]) };
|
||||
get_valid_chunk_data(pov, 10, ChunkIndex(0)).1
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct TestState {
|
||||
/// Simulated relay chain heads. For each block except genesis
|
||||
/// there exists a single corresponding candidate, handled in [`spawn_virtual_overseer`].
|
||||
pub relay_chain: Vec<Hash>,
|
||||
pub session_info: SessionInfo,
|
||||
// Defines a way to compute a session index for the block with
|
||||
// a given number. Returns 1 for all blocks by default.
|
||||
pub session_index_for_block: fn(BlockNumber) -> SessionIndex,
|
||||
}
|
||||
|
||||
impl TestState {
|
||||
fn new() -> Self {
|
||||
let relay_chain: Vec<_> = (0u8..10).map(Hash::repeat_byte).collect();
|
||||
let session_info = make_session_info();
|
||||
let session_index_for_block = |_| 1;
|
||||
Self { relay_chain, session_info, session_index_for_block }
|
||||
}
|
||||
}
|
||||
|
||||
fn spawn_virtual_overseer(
|
||||
pool: TaskExecutor,
|
||||
test_state: TestState,
|
||||
mut ctx_handle: TestSubsystemContextHandle<AvailabilityDistributionMessage>,
|
||||
) {
|
||||
pool.spawn(
|
||||
"virtual-overseer",
|
||||
None,
|
||||
async move {
|
||||
loop {
|
||||
let msg = ctx_handle.try_recv().await;
|
||||
if msg.is_none() {
|
||||
break;
|
||||
}
|
||||
match msg.unwrap() {
|
||||
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendRequests(..)) => {},
|
||||
AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryChunk(
|
||||
..,
|
||||
tx,
|
||||
)) => {
|
||||
let chunk = get_erasure_chunk();
|
||||
tx.send(Some(chunk)).expect("Receiver is expected to be alive");
|
||||
},
|
||||
AllMessages::AvailabilityStore(AvailabilityStoreMessage::StoreChunk {
|
||||
tx,
|
||||
..
|
||||
}) => {
|
||||
// Silently accept it.
|
||||
tx.send(Ok(())).expect("Receiver is expected to be alive");
|
||||
},
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(hash, req)) => {
|
||||
match req {
|
||||
RuntimeApiRequest::SessionIndexForChild(tx) => {
|
||||
let chain = &test_state.relay_chain;
|
||||
let block_number = chain
|
||||
.iter()
|
||||
.position(|h| *h == hash)
|
||||
.expect("Invalid session index request");
|
||||
// Compute session index.
|
||||
let session_index_for_block = test_state.session_index_for_block;
|
||||
|
||||
tx.send(Ok(session_index_for_block(block_number as u32 + 1)))
|
||||
.expect("Receiver should still be alive");
|
||||
},
|
||||
RuntimeApiRequest::SessionInfo(_, tx) => {
|
||||
tx.send(Ok(Some(test_state.session_info.clone())))
|
||||
.expect("Receiver should be alive.");
|
||||
},
|
||||
RuntimeApiRequest::SessionExecutorParams(_, tx) => {
|
||||
tx.send(Ok(Some(ExecutorParams::default())))
|
||||
.expect("Receiver should be alive.");
|
||||
},
|
||||
RuntimeApiRequest::NodeFeatures(_, tx) => {
|
||||
tx.send(Ok(node_features_with_mapping_enabled()))
|
||||
.expect("Receiver should be alive.");
|
||||
},
|
||||
RuntimeApiRequest::AvailabilityCores(tx) => {
|
||||
let para_id = ParaId::from(1_u32);
|
||||
let maybe_block_position =
|
||||
test_state.relay_chain.iter().position(|h| *h == hash);
|
||||
let cores = match maybe_block_position {
|
||||
Some(block_num) => {
|
||||
let core = if block_num == 0 {
|
||||
CoreState::Scheduled(ScheduledCore {
|
||||
para_id,
|
||||
collator: None,
|
||||
})
|
||||
} else {
|
||||
CoreState::Occupied(
|
||||
OccupiedCoreBuilder {
|
||||
group_responsible: GroupIndex(1),
|
||||
para_id,
|
||||
relay_parent: hash,
|
||||
n_validators: 10,
|
||||
chunk_index: ChunkIndex(0),
|
||||
}
|
||||
.build()
|
||||
.0,
|
||||
)
|
||||
};
|
||||
vec![core]
|
||||
},
|
||||
None => Vec::new(),
|
||||
};
|
||||
tx.send(Ok(cores)).expect("Receiver should be alive.")
|
||||
},
|
||||
_ => {
|
||||
panic!("Unexpected runtime request: {:?}", req);
|
||||
},
|
||||
}
|
||||
},
|
||||
AllMessages::ChainApi(ChainApiMessage::Ancestors {
|
||||
hash,
|
||||
k,
|
||||
response_channel,
|
||||
}) => {
|
||||
let chain = &test_state.relay_chain;
|
||||
let maybe_block_position = chain.iter().position(|h| *h == hash);
|
||||
let ancestors = maybe_block_position
|
||||
.map(|idx| chain[..idx].iter().rev().take(k).copied().collect())
|
||||
.unwrap_or_default();
|
||||
response_channel
|
||||
.send(Ok(ancestors))
|
||||
.expect("Receiver is expected to be alive");
|
||||
},
|
||||
msg => panic!("Unexpected overseer message: {:?}", msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
}
|
||||
|
||||
fn test_harness<T: Future<Output = ()>>(
|
||||
test_state: TestState,
|
||||
test_fx: impl FnOnce(
|
||||
TestSubsystemContext<AvailabilityDistributionMessage, SpawnGlue<TaskExecutor>>,
|
||||
) -> T,
|
||||
) {
|
||||
let pool = TaskExecutor::new();
|
||||
let (ctx, ctx_handle) = make_subsystem_context(pool.clone());
|
||||
|
||||
spawn_virtual_overseer(pool, test_state, ctx_handle);
|
||||
|
||||
futures::executor::block_on(test_fx(ctx));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_ancestry_lookup_in_same_session() {
|
||||
let test_state = TestState::new();
|
||||
let mut requester =
|
||||
Requester::new(ReqProtocolNames::new(&Hash::repeat_byte(0xff), None), Default::default());
|
||||
let keystore = make_ferdie_keystore();
|
||||
let mut runtime = RuntimeInfo::new(Some(keystore));
|
||||
|
||||
test_harness(test_state.clone(), |mut ctx| async move {
|
||||
let chain = &test_state.relay_chain;
|
||||
let block_number = 1;
|
||||
let update = ActiveLeavesUpdate {
|
||||
activated: Some(new_leaf(chain[block_number], block_number as u32)),
|
||||
deactivated: Vec::new().into(),
|
||||
};
|
||||
|
||||
requester
|
||||
.update_fetching_heads(&mut ctx, &mut runtime, update)
|
||||
.await
|
||||
.expect("Leaf processing failed");
|
||||
let fetch_tasks = &requester.fetches;
|
||||
assert_eq!(fetch_tasks.len(), 1);
|
||||
let block_1_candidate =
|
||||
*fetch_tasks.keys().next().expect("A task is checked to be present; qed");
|
||||
|
||||
let block_number = 2;
|
||||
let update = ActiveLeavesUpdate {
|
||||
activated: Some(new_leaf(chain[block_number], block_number as u32)),
|
||||
deactivated: Vec::new().into(),
|
||||
};
|
||||
|
||||
requester
|
||||
.update_fetching_heads(&mut ctx, &mut runtime, update)
|
||||
.await
|
||||
.expect("Leaf processing failed");
|
||||
let fetch_tasks = &requester.fetches;
|
||||
assert_eq!(fetch_tasks.len(), 2);
|
||||
let task = fetch_tasks.get(&block_1_candidate).expect("Leaf hasn't been deactivated yet");
|
||||
// The task should be live in both blocks 1 and 2.
|
||||
assert_eq!(task.live_in.len(), 2);
|
||||
let block_2_candidate = *fetch_tasks
|
||||
.keys()
|
||||
.find(|hash| **hash != block_1_candidate)
|
||||
.expect("Two tasks are present, the first one corresponds to block 1 candidate; qed");
|
||||
|
||||
// Deactivate both blocks but keep the second task as a
|
||||
// part of ancestry.
|
||||
let block_number = 2 + Requester::LEAF_ANCESTRY_LEN_WITHIN_SESSION;
|
||||
let update = ActiveLeavesUpdate {
|
||||
activated: Some(new_leaf(chain[block_number], block_number as u32)),
|
||||
deactivated: vec![chain[1], chain[2]].into(),
|
||||
};
|
||||
requester
|
||||
.update_fetching_heads(&mut ctx, &mut runtime, update)
|
||||
.await
|
||||
.expect("Leaf processing failed");
|
||||
let fetch_tasks = &requester.fetches;
|
||||
// The leaf + K its ancestors.
|
||||
assert_eq!(fetch_tasks.len(), Requester::LEAF_ANCESTRY_LEN_WITHIN_SESSION + 1);
|
||||
|
||||
let block_2_task = fetch_tasks
|
||||
.get(&block_2_candidate)
|
||||
.expect("Expected to be live as a part of ancestry");
|
||||
assert_eq!(block_2_task.live_in.len(), 1);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_ancestry_lookup_in_different_sessions() {
|
||||
let mut test_state = TestState::new();
|
||||
let mut requester =
|
||||
Requester::new(ReqProtocolNames::new(&Hash::repeat_byte(0xff), None), Default::default());
|
||||
let keystore = make_ferdie_keystore();
|
||||
let mut runtime = RuntimeInfo::new(Some(keystore));
|
||||
|
||||
test_state.session_index_for_block = |block_number| match block_number {
|
||||
0..=3 => 1,
|
||||
_ => 2,
|
||||
};
|
||||
|
||||
test_harness(test_state.clone(), |mut ctx| async move {
|
||||
let chain = &test_state.relay_chain;
|
||||
let block_number = 3;
|
||||
let update = ActiveLeavesUpdate {
|
||||
activated: Some(new_leaf(chain[block_number], block_number as u32)),
|
||||
deactivated: Vec::new().into(),
|
||||
};
|
||||
|
||||
requester
|
||||
.update_fetching_heads(&mut ctx, &mut runtime, update)
|
||||
.await
|
||||
.expect("Leaf processing failed");
|
||||
let fetch_tasks = &requester.fetches;
|
||||
assert_eq!(fetch_tasks.len(), 3.min(Requester::LEAF_ANCESTRY_LEN_WITHIN_SESSION + 1));
|
||||
|
||||
let block_number = 4;
|
||||
let update = ActiveLeavesUpdate {
|
||||
activated: Some(new_leaf(chain[block_number], block_number as u32)),
|
||||
deactivated: vec![chain[1], chain[2], chain[3]].into(),
|
||||
};
|
||||
|
||||
requester
|
||||
.update_fetching_heads(&mut ctx, &mut runtime, update)
|
||||
.await
|
||||
.expect("Leaf processing failed");
|
||||
let fetch_tasks = &requester.fetches;
|
||||
assert_eq!(fetch_tasks.len(), 1);
|
||||
|
||||
let block_number = 5;
|
||||
let update = ActiveLeavesUpdate {
|
||||
activated: Some(new_leaf(chain[block_number], block_number as u32)),
|
||||
deactivated: vec![chain[4]].into(),
|
||||
};
|
||||
|
||||
requester
|
||||
.update_fetching_heads(&mut ctx, &mut runtime, update)
|
||||
.await
|
||||
.expect("Leaf processing failed");
|
||||
let fetch_tasks = &requester.fetches;
|
||||
assert_eq!(fetch_tasks.len(), 2.min(Requester::LEAF_ANCESTRY_LEN_WITHIN_SESSION + 1));
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,296 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Answer requests for availability chunks.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::{channel::oneshot, select, FutureExt};
|
||||
|
||||
use codec::{Decode, Encode};
|
||||
use fatality::Nested;
|
||||
use pezkuwi_node_network_protocol::{
|
||||
request_response::{v1, v2, IncomingRequest, IncomingRequestReceiver, IsRequest},
|
||||
UnifiedReputationChange as Rep,
|
||||
};
|
||||
use pezkuwi_node_primitives::{AvailableData, ErasureChunk};
|
||||
use pezkuwi_node_subsystem::{messages::AvailabilityStoreMessage, SubsystemSender};
|
||||
use pezkuwi_primitives::{CandidateHash, ValidatorIndex};
|
||||
|
||||
use crate::{
|
||||
error::{JfyiError, Result},
|
||||
metrics::{Metrics, FAILED, NOT_FOUND, SUCCEEDED},
|
||||
LOG_TARGET,
|
||||
};
|
||||
|
||||
const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded.");
|
||||
|
||||
/// Receiver task to be forked as a separate task to handle PoV requests.
|
||||
pub async fn run_pov_receiver<Sender>(
|
||||
mut sender: Sender,
|
||||
mut receiver: IncomingRequestReceiver<v1::PoVFetchingRequest>,
|
||||
metrics: Metrics,
|
||||
) where
|
||||
Sender: SubsystemSender<AvailabilityStoreMessage>,
|
||||
{
|
||||
loop {
|
||||
match receiver.recv(|| vec![COST_INVALID_REQUEST]).await.into_nested() {
|
||||
Ok(Ok(msg)) => {
|
||||
answer_pov_request_log(&mut sender, msg, &metrics).await;
|
||||
},
|
||||
Err(fatal) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
error = ?fatal,
|
||||
"Shutting down POV receiver."
|
||||
);
|
||||
return;
|
||||
},
|
||||
Ok(Err(jfyi)) => {
|
||||
gum::debug!(target: LOG_TARGET, error = ?jfyi, "Error decoding incoming PoV request.");
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Receiver task to be forked as a separate task to handle chunk requests.
|
||||
pub async fn run_chunk_receivers<Sender>(
|
||||
mut sender: Sender,
|
||||
mut receiver_v1: IncomingRequestReceiver<v1::ChunkFetchingRequest>,
|
||||
mut receiver_v2: IncomingRequestReceiver<v2::ChunkFetchingRequest>,
|
||||
metrics: Metrics,
|
||||
) where
|
||||
Sender: SubsystemSender<AvailabilityStoreMessage>,
|
||||
{
|
||||
let make_resp_v1 = |chunk: Option<ErasureChunk>| match chunk {
|
||||
None => v1::ChunkFetchingResponse::NoSuchChunk,
|
||||
Some(chunk) => v1::ChunkFetchingResponse::Chunk(chunk.into()),
|
||||
};
|
||||
|
||||
let make_resp_v2 = |chunk: Option<ErasureChunk>| match chunk {
|
||||
None => v2::ChunkFetchingResponse::NoSuchChunk,
|
||||
Some(chunk) => v2::ChunkFetchingResponse::Chunk(chunk.into()),
|
||||
};
|
||||
|
||||
loop {
|
||||
select! {
|
||||
res = receiver_v1.recv(|| vec![COST_INVALID_REQUEST]).fuse() => match res.into_nested() {
|
||||
Ok(Ok(msg)) => {
|
||||
answer_chunk_request_log(&mut sender, msg, make_resp_v1, &metrics).await;
|
||||
},
|
||||
Err(fatal) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
error = ?fatal,
|
||||
"Shutting down chunk receiver."
|
||||
);
|
||||
return
|
||||
},
|
||||
Ok(Err(jfyi)) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
error = ?jfyi,
|
||||
"Error decoding incoming chunk request."
|
||||
);
|
||||
}
|
||||
},
|
||||
res = receiver_v2.recv(|| vec![COST_INVALID_REQUEST]).fuse() => match res.into_nested() {
|
||||
Ok(Ok(msg)) => {
|
||||
answer_chunk_request_log(&mut sender, msg.into(), make_resp_v2, &metrics).await;
|
||||
},
|
||||
Err(fatal) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
error = ?fatal,
|
||||
"Shutting down chunk receiver."
|
||||
);
|
||||
return
|
||||
},
|
||||
Ok(Err(jfyi)) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
error = ?jfyi,
|
||||
"Error decoding incoming chunk request."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Variant of `answer_pov_request` that does Prometheus metric and logging on errors.
|
||||
///
|
||||
/// Any errors of `answer_pov_request` will simply be logged.
|
||||
pub async fn answer_pov_request_log<Sender>(
|
||||
sender: &mut Sender,
|
||||
req: IncomingRequest<v1::PoVFetchingRequest>,
|
||||
metrics: &Metrics,
|
||||
) where
|
||||
Sender: SubsystemSender<AvailabilityStoreMessage>,
|
||||
{
|
||||
let res = answer_pov_request(sender, req).await;
|
||||
match res {
|
||||
Ok(result) => metrics.on_served_pov(if result { SUCCEEDED } else { NOT_FOUND }),
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
err= ?err,
|
||||
"Serving PoV failed with error"
|
||||
);
|
||||
metrics.on_served_pov(FAILED);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Variant of `answer_chunk_request` that does Prometheus metric and logging on errors.
|
||||
///
|
||||
/// Any errors of `answer_request` will simply be logged.
|
||||
pub async fn answer_chunk_request_log<Sender, Req, MakeResp>(
|
||||
sender: &mut Sender,
|
||||
req: IncomingRequest<Req>,
|
||||
make_response: MakeResp,
|
||||
metrics: &Metrics,
|
||||
) where
|
||||
Req: IsRequest + Decode + Encode + Into<v1::ChunkFetchingRequest>,
|
||||
Req::Response: Encode,
|
||||
Sender: SubsystemSender<AvailabilityStoreMessage>,
|
||||
MakeResp: Fn(Option<ErasureChunk>) -> Req::Response,
|
||||
{
|
||||
let res = answer_chunk_request(sender, req, make_response).await;
|
||||
match res {
|
||||
Ok(result) => metrics.on_served_chunk(if result { SUCCEEDED } else { NOT_FOUND }),
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
err= ?err,
|
||||
"Serving chunk failed with error"
|
||||
);
|
||||
metrics.on_served_chunk(FAILED);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Answer an incoming PoV fetch request by querying the av store.
|
||||
///
|
||||
/// Returns: `Ok(true)` if chunk was found and served.
|
||||
pub async fn answer_pov_request<Sender>(
|
||||
sender: &mut Sender,
|
||||
req: IncomingRequest<v1::PoVFetchingRequest>,
|
||||
) -> Result<bool>
|
||||
where
|
||||
Sender: SubsystemSender<AvailabilityStoreMessage>,
|
||||
{
|
||||
let av_data = query_available_data(sender, req.payload.candidate_hash).await?;
|
||||
|
||||
let result = av_data.is_some();
|
||||
|
||||
let response = match av_data {
|
||||
None => v1::PoVFetchingResponse::NoSuchPoV,
|
||||
Some(av_data) => {
|
||||
let pov = Arc::try_unwrap(av_data.pov).unwrap_or_else(|a| (&*a).clone());
|
||||
v1::PoVFetchingResponse::PoV(pov)
|
||||
},
|
||||
};
|
||||
|
||||
req.send_response(response).map_err(|_| JfyiError::SendResponse)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Answer an incoming chunk request by querying the av store.
|
||||
///
|
||||
/// Returns: `Ok(true)` if chunk was found and served.
|
||||
pub async fn answer_chunk_request<Sender, Req, MakeResp>(
|
||||
sender: &mut Sender,
|
||||
req: IncomingRequest<Req>,
|
||||
make_response: MakeResp,
|
||||
) -> Result<bool>
|
||||
where
|
||||
Sender: SubsystemSender<AvailabilityStoreMessage>,
|
||||
Req: IsRequest + Decode + Encode + Into<v1::ChunkFetchingRequest>,
|
||||
Req::Response: Encode,
|
||||
MakeResp: Fn(Option<ErasureChunk>) -> Req::Response,
|
||||
{
|
||||
// V1 and V2 requests have the same payload, so decoding into either one will work. It's the
|
||||
// responses that differ, hence the `MakeResp` generic.
|
||||
let payload: v1::ChunkFetchingRequest = req.payload.into();
|
||||
|
||||
let chunk = query_chunk(sender, payload.candidate_hash, payload.index).await?;
|
||||
|
||||
let result = chunk.is_some();
|
||||
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
hash = ?payload.candidate_hash,
|
||||
index = ?payload.index,
|
||||
peer = ?req.peer,
|
||||
has_data = ?chunk.is_some(),
|
||||
"Serving chunk",
|
||||
);
|
||||
|
||||
let response = make_response(chunk);
|
||||
|
||||
req.pending_response
|
||||
.send_response(response)
|
||||
.map_err(|_| JfyiError::SendResponse)?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Query chunk from the availability store.
|
||||
async fn query_chunk<Sender>(
|
||||
sender: &mut Sender,
|
||||
candidate_hash: CandidateHash,
|
||||
validator_index: ValidatorIndex,
|
||||
) -> std::result::Result<Option<ErasureChunk>, JfyiError>
|
||||
where
|
||||
Sender: SubsystemSender<AvailabilityStoreMessage>,
|
||||
{
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender
|
||||
.send_message(
|
||||
AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx).into(),
|
||||
)
|
||||
.await;
|
||||
|
||||
let result = rx.await.map_err(|e| {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?validator_index,
|
||||
?candidate_hash,
|
||||
error = ?e,
|
||||
"Error retrieving chunk",
|
||||
);
|
||||
JfyiError::QueryChunkResponseChannel(e)
|
||||
})?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Query PoV from the availability store.
|
||||
async fn query_available_data<Sender>(
|
||||
sender: &mut Sender,
|
||||
candidate_hash: CandidateHash,
|
||||
) -> Result<Option<AvailableData>>
|
||||
where
|
||||
Sender: SubsystemSender<AvailabilityStoreMessage>,
|
||||
{
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender
|
||||
.send_message(AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx).into())
|
||||
.await;
|
||||
|
||||
let result = rx.await.map_err(JfyiError::QueryAvailableDataResponseChannel)?;
|
||||
Ok(result)
|
||||
}
|
||||
@@ -0,0 +1,166 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Helper functions and tools to generate mock data useful for testing this subsystem.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use sp_keyring::Sr25519Keyring;
|
||||
|
||||
use pezkuwi_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks};
|
||||
use pezkuwi_node_primitives::{AvailableData, BlockData, ErasureChunk, PoV, Proof};
|
||||
use pezkuwi_primitives::{
|
||||
CandidateCommitments, CandidateHash, ChunkIndex, CommittedCandidateReceiptV2, GroupIndex, Hash,
|
||||
HeadData, Id as ParaId, IndexedVec, OccupiedCore, PersistedValidationData, SessionInfo,
|
||||
ValidatorIndex,
|
||||
};
|
||||
use pezkuwi_primitives_test_helpers::{
|
||||
dummy_collator, dummy_collator_signature, dummy_hash, dummy_validation_code,
|
||||
CandidateDescriptor, CommittedCandidateReceipt,
|
||||
};
|
||||
|
||||
/// Create dummy session info with two validator groups.
|
||||
pub fn make_session_info() -> SessionInfo {
|
||||
let validators = vec![
|
||||
Sr25519Keyring::Ferdie, // <- this node, role: validator
|
||||
Sr25519Keyring::Alice,
|
||||
Sr25519Keyring::Bob,
|
||||
Sr25519Keyring::Charlie,
|
||||
Sr25519Keyring::Dave,
|
||||
Sr25519Keyring::Eve,
|
||||
Sr25519Keyring::One,
|
||||
];
|
||||
|
||||
let validator_groups: IndexedVec<GroupIndex, Vec<ValidatorIndex>> =
|
||||
[vec![5, 0, 3], vec![1, 6, 2, 4]]
|
||||
.iter()
|
||||
.map(|g| g.into_iter().map(|v| ValidatorIndex(*v)).collect())
|
||||
.collect();
|
||||
|
||||
SessionInfo {
|
||||
discovery_keys: validators.iter().map(|k| k.public().into()).collect(),
|
||||
// Not used:
|
||||
n_cores: validator_groups.len() as u32,
|
||||
validator_groups,
|
||||
// Not used values:
|
||||
validators: validators.iter().map(|k| k.public().into()).collect(),
|
||||
assignment_keys: Vec::new(),
|
||||
zeroth_delay_tranche_width: 0,
|
||||
relay_vrf_modulo_samples: 0,
|
||||
n_delay_tranches: 0,
|
||||
no_show_slots: 0,
|
||||
needed_approvals: 0,
|
||||
active_validator_indices: Vec::new(),
|
||||
dispute_period: 6,
|
||||
random_seed: [0u8; 32],
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for constructing occupied cores.
|
||||
///
|
||||
/// Takes all the values we care about and fills the rest with dummy values on `build`.
|
||||
pub struct OccupiedCoreBuilder {
|
||||
pub group_responsible: GroupIndex,
|
||||
pub para_id: ParaId,
|
||||
pub relay_parent: Hash,
|
||||
pub n_validators: usize,
|
||||
pub chunk_index: ChunkIndex,
|
||||
}
|
||||
|
||||
impl OccupiedCoreBuilder {
|
||||
pub fn build(self) -> (OccupiedCore, (CandidateHash, ErasureChunk)) {
|
||||
let pov = PoV { block_data: BlockData(vec![45, 46, 47]) };
|
||||
let pov_hash = pov.hash();
|
||||
let (erasure_root, chunk) =
|
||||
get_valid_chunk_data(pov.clone(), self.n_validators, self.chunk_index);
|
||||
let candidate_receipt = TestCandidateBuilder {
|
||||
para_id: self.para_id,
|
||||
pov_hash,
|
||||
relay_parent: self.relay_parent,
|
||||
erasure_root,
|
||||
..Default::default()
|
||||
}
|
||||
.build();
|
||||
let core = OccupiedCore {
|
||||
next_up_on_available: None,
|
||||
occupied_since: 0,
|
||||
time_out_at: 0,
|
||||
next_up_on_time_out: None,
|
||||
availability: Default::default(),
|
||||
group_responsible: self.group_responsible,
|
||||
candidate_hash: candidate_receipt.hash(),
|
||||
candidate_descriptor: candidate_receipt.descriptor.clone(),
|
||||
};
|
||||
(core, (candidate_receipt.hash(), chunk))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct TestCandidateBuilder {
|
||||
para_id: ParaId,
|
||||
head_data: HeadData,
|
||||
pov_hash: Hash,
|
||||
relay_parent: Hash,
|
||||
erasure_root: Hash,
|
||||
}
|
||||
|
||||
impl TestCandidateBuilder {
|
||||
pub fn build(self) -> CommittedCandidateReceiptV2 {
|
||||
CommittedCandidateReceipt {
|
||||
descriptor: CandidateDescriptor {
|
||||
para_id: self.para_id,
|
||||
pov_hash: self.pov_hash,
|
||||
relay_parent: self.relay_parent,
|
||||
erasure_root: self.erasure_root,
|
||||
collator: dummy_collator(),
|
||||
persisted_validation_data_hash: dummy_hash(),
|
||||
signature: dummy_collator_signature(),
|
||||
para_head: dummy_hash(),
|
||||
validation_code_hash: dummy_validation_code().hash(),
|
||||
},
|
||||
commitments: CandidateCommitments { head_data: self.head_data, ..Default::default() },
|
||||
}
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
// Get chunk for index 0
|
||||
pub fn get_valid_chunk_data(
|
||||
pov: PoV,
|
||||
n_validators: usize,
|
||||
chunk_index: ChunkIndex,
|
||||
) -> (Hash, ErasureChunk) {
|
||||
let persisted = PersistedValidationData {
|
||||
parent_head: HeadData(vec![7, 8, 9]),
|
||||
relay_parent_number: Default::default(),
|
||||
max_pov_size: 1024,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
};
|
||||
let available_data = AvailableData { validation_data: persisted, pov: Arc::new(pov) };
|
||||
let chunks = obtain_chunks(n_validators, &available_data).unwrap();
|
||||
let branches = branches(chunks.as_ref());
|
||||
let root = branches.root();
|
||||
let chunk = branches
|
||||
.enumerate()
|
||||
.map(|(index, (proof, chunk))| ErasureChunk {
|
||||
chunk: chunk.to_vec(),
|
||||
index: ChunkIndex(index as _),
|
||||
proof: Proof::try_from(proof).unwrap(),
|
||||
})
|
||||
.nth(chunk_index.0 as usize)
|
||||
.expect("There really should be enough chunks.");
|
||||
(root, chunk)
|
||||
}
|
||||
@@ -0,0 +1,201 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use futures::{executor, future, Future};
|
||||
use rstest::rstest;
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::{
|
||||
IncomingRequest, Protocol, ReqProtocolNames,
|
||||
};
|
||||
use pezkuwi_primitives::{node_features, Block, CoreState, Hash, NodeFeatures};
|
||||
use sp_keystore::KeystorePtr;
|
||||
|
||||
use super::*;
|
||||
|
||||
mod state;
|
||||
/// State for test harnesses.
|
||||
use state::{TestHarness, TestState};
|
||||
|
||||
/// Mock data useful for testing.
|
||||
pub(crate) mod mock;
|
||||
|
||||
fn test_harness<T: Future<Output = ()>>(
|
||||
keystore: KeystorePtr,
|
||||
req_protocol_names: ReqProtocolNames,
|
||||
test_fx: impl FnOnce(TestHarness) -> T,
|
||||
) -> std::result::Result<(), FatalError> {
|
||||
sp_tracing::init_for_tests();
|
||||
|
||||
let pool = sp_core::testing::TaskExecutor::new();
|
||||
let (context, virtual_overseer) =
|
||||
pezkuwi_node_subsystem_test_helpers::make_subsystem_context(pool.clone());
|
||||
|
||||
let (pov_req_receiver, _pov_req_cfg) = IncomingRequest::get_config_receiver::<
|
||||
Block,
|
||||
sc_network::NetworkWorker<Block, Hash>,
|
||||
>(&req_protocol_names);
|
||||
let (chunk_req_v1_receiver, chunk_req_v1_cfg) = IncomingRequest::get_config_receiver::<
|
||||
Block,
|
||||
sc_network::NetworkWorker<Block, Hash>,
|
||||
>(&req_protocol_names);
|
||||
let (chunk_req_v2_receiver, chunk_req_v2_cfg) = IncomingRequest::get_config_receiver::<
|
||||
Block,
|
||||
sc_network::NetworkWorker<Block, Hash>,
|
||||
>(&req_protocol_names);
|
||||
let subsystem = AvailabilityDistributionSubsystem::new(
|
||||
keystore,
|
||||
IncomingRequestReceivers { pov_req_receiver, chunk_req_v1_receiver, chunk_req_v2_receiver },
|
||||
req_protocol_names,
|
||||
Default::default(),
|
||||
);
|
||||
let subsystem = subsystem.run(context);
|
||||
|
||||
let test_fut =
|
||||
test_fx(TestHarness { virtual_overseer, chunk_req_v1_cfg, chunk_req_v2_cfg, pool });
|
||||
|
||||
futures::pin_mut!(test_fut);
|
||||
futures::pin_mut!(subsystem);
|
||||
|
||||
executor::block_on(future::join(test_fut, subsystem)).1
|
||||
}
|
||||
|
||||
pub fn node_features_with_mapping_enabled() -> NodeFeatures {
|
||||
let mut node_features = NodeFeatures::new();
|
||||
node_features.resize(node_features::FeatureIndex::AvailabilityChunkMapping as usize + 1, false);
|
||||
node_features.set(node_features::FeatureIndex::AvailabilityChunkMapping as u8 as usize, true);
|
||||
node_features
|
||||
}
|
||||
|
||||
/// Simple basic check, whether the subsystem works as expected.
|
||||
///
|
||||
/// Exceptional cases are tested as unit tests in `fetch_task`.
|
||||
#[rstest]
|
||||
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV1)]
|
||||
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV2)]
|
||||
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV1)]
|
||||
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV2)]
|
||||
fn check_basic(#[case] node_features: NodeFeatures, #[case] chunk_resp_protocol: Protocol) {
|
||||
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
|
||||
let state =
|
||||
TestState::new(node_features.clone(), req_protocol_names.clone(), chunk_resp_protocol);
|
||||
|
||||
if node_features == node_features_with_mapping_enabled() &&
|
||||
chunk_resp_protocol == Protocol::ChunkFetchingV1
|
||||
{
|
||||
// For this specific case, chunk fetching is not possible, because the ValidatorIndex is not
|
||||
// equal to the ChunkIndex and the peer does not send back the actual ChunkIndex.
|
||||
let _ = test_harness(state.keystore.clone(), req_protocol_names, move |harness| {
|
||||
state.run_assert_timeout(harness)
|
||||
});
|
||||
} else {
|
||||
test_harness(state.keystore.clone(), req_protocol_names, move |harness| state.run(harness))
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether requester tries all validators in group.
|
||||
#[rstest]
|
||||
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV1)]
|
||||
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV2)]
|
||||
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV1)]
|
||||
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV2)]
|
||||
fn check_fetch_tries_all(
|
||||
#[case] node_features: NodeFeatures,
|
||||
#[case] chunk_resp_protocol: Protocol,
|
||||
) {
|
||||
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
|
||||
let mut state =
|
||||
TestState::new(node_features.clone(), req_protocol_names.clone(), chunk_resp_protocol);
|
||||
for (_, v) in state.chunks.iter_mut() {
|
||||
// 4 validators in group, so this should still succeed:
|
||||
v.push(None);
|
||||
v.push(None);
|
||||
v.push(None);
|
||||
}
|
||||
|
||||
if node_features == node_features_with_mapping_enabled() &&
|
||||
chunk_resp_protocol == Protocol::ChunkFetchingV1
|
||||
{
|
||||
// For this specific case, chunk fetching is not possible, because the ValidatorIndex is not
|
||||
// equal to the ChunkIndex and the peer does not send back the actual ChunkIndex.
|
||||
let _ = test_harness(state.keystore.clone(), req_protocol_names, move |harness| {
|
||||
state.run_assert_timeout(harness)
|
||||
});
|
||||
} else {
|
||||
test_harness(state.keystore.clone(), req_protocol_names, move |harness| state.run(harness))
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether requester tries all validators in group
|
||||
///
|
||||
/// Check that requester will retry the fetch on error on the next block still pending
|
||||
/// availability.
|
||||
#[rstest]
|
||||
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV1)]
|
||||
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV2)]
|
||||
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV1)]
|
||||
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV2)]
|
||||
fn check_fetch_retry(#[case] node_features: NodeFeatures, #[case] chunk_resp_protocol: Protocol) {
|
||||
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
|
||||
let mut state =
|
||||
TestState::new(node_features.clone(), req_protocol_names.clone(), chunk_resp_protocol);
|
||||
state
|
||||
.cores
|
||||
.insert(state.relay_chain[2], state.cores.get(&state.relay_chain[1]).unwrap().clone());
|
||||
// We only care about the first three blocks.
|
||||
// 1. scheduled
|
||||
// 2. occupied
|
||||
// 3. still occupied
|
||||
state.relay_chain.truncate(3);
|
||||
|
||||
// Get rid of unused valid chunks:
|
||||
let valid_candidate_hashes: HashSet<_> = state
|
||||
.cores
|
||||
.get(&state.relay_chain[1])
|
||||
.iter()
|
||||
.flat_map(|v| v.iter())
|
||||
.filter_map(|c| match c {
|
||||
CoreState::Occupied(core) => Some(core.candidate_hash),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
state.valid_chunks.retain(|(ch, _)| valid_candidate_hashes.contains(ch));
|
||||
|
||||
for (_, v) in state.chunks.iter_mut() {
|
||||
// This should still succeed as cores are still pending availability on next block.
|
||||
v.push(None);
|
||||
v.push(None);
|
||||
v.push(None);
|
||||
v.push(None);
|
||||
v.push(None);
|
||||
}
|
||||
|
||||
if node_features == node_features_with_mapping_enabled() &&
|
||||
chunk_resp_protocol == Protocol::ChunkFetchingV1
|
||||
{
|
||||
// For this specific case, chunk fetching is not possible, because the ValidatorIndex is not
|
||||
// equal to the ChunkIndex and the peer does not send back the actual ChunkIndex.
|
||||
let _ = test_harness(state.keystore.clone(), req_protocol_names, move |harness| {
|
||||
state.run_assert_timeout(harness)
|
||||
});
|
||||
} else {
|
||||
test_harness(state.keystore.clone(), req_protocol_names, move |harness| state.run(harness))
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,450 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use network::{request_responses::OutgoingResponse, ProtocolName, RequestFailure};
|
||||
use pezkuwi_node_subsystem_test_helpers::TestSubsystemContextHandle;
|
||||
use pezkuwi_node_subsystem_util::{availability_chunks::availability_chunk_index, TimeoutExt};
|
||||
|
||||
use futures::{
|
||||
channel::{mpsc, oneshot},
|
||||
FutureExt, SinkExt, StreamExt,
|
||||
};
|
||||
use futures_timer::Delay;
|
||||
|
||||
use sc_network as network;
|
||||
use sc_network::{config as netconfig, config::RequestResponseConfig, IfDisconnected};
|
||||
use sp_core::{testing::TaskExecutor, traits::SpawnNamed};
|
||||
use sp_keystore::KeystorePtr;
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::{
|
||||
v1, v2, IncomingRequest, OutgoingRequest, Protocol, ReqProtocolNames, Requests,
|
||||
};
|
||||
use pezkuwi_node_primitives::ErasureChunk;
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::{
|
||||
AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage,
|
||||
NetworkBridgeTxMessage, RuntimeApiMessage, RuntimeApiRequest,
|
||||
},
|
||||
ActiveLeavesUpdate, FromOrchestra, OverseerSignal,
|
||||
};
|
||||
use pezkuwi_node_subsystem_test_helpers as test_helpers;
|
||||
use pezkuwi_primitives::{
|
||||
CandidateHash, ChunkIndex, CoreIndex, CoreState, ExecutorParams, GroupIndex, Hash,
|
||||
Id as ParaId, NodeFeatures, ScheduledCore, SessionInfo, ValidatorIndex,
|
||||
};
|
||||
use test_helpers::mock::{make_ferdie_keystore, new_leaf};
|
||||
|
||||
use super::mock::{make_session_info, OccupiedCoreBuilder};
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
type VirtualOverseer = pezkuwi_node_subsystem_test_helpers::TestSubsystemContextHandle<
|
||||
AvailabilityDistributionMessage,
|
||||
>;
|
||||
pub struct TestHarness {
|
||||
pub virtual_overseer: VirtualOverseer,
|
||||
pub chunk_req_v1_cfg: RequestResponseConfig,
|
||||
pub chunk_req_v2_cfg: RequestResponseConfig,
|
||||
pub pool: TaskExecutor,
|
||||
}
|
||||
|
||||
/// `TestState` for mocking execution of this subsystem.
|
||||
///
|
||||
/// The `Default` instance provides data, which makes the system succeed by providing a couple of
|
||||
/// valid occupied cores. You can tune the data before calling `TestState::run`. E.g. modify some
|
||||
/// chunks to be invalid, the test will then still pass if you remove that chunk from
|
||||
/// `valid_chunks`.
|
||||
#[derive(Clone)]
|
||||
pub struct TestState {
|
||||
/// Simulated relay chain heads:
|
||||
pub relay_chain: Vec<Hash>,
|
||||
/// Whenever the subsystem tries to fetch an erasure chunk one item of the given vec will be
|
||||
/// popped. So you can experiment with serving invalid chunks or no chunks on request and see
|
||||
/// whether the subsystem still succeeds with its goal.
|
||||
pub chunks: HashMap<(CandidateHash, ValidatorIndex), Vec<Option<ErasureChunk>>>,
|
||||
/// All chunks that are valid and should be accepted.
|
||||
pub valid_chunks: HashSet<(CandidateHash, ValidatorIndex)>,
|
||||
pub session_info: SessionInfo,
|
||||
/// Cores per relay chain block.
|
||||
pub cores: HashMap<Hash, Vec<CoreState>>,
|
||||
pub keystore: KeystorePtr,
|
||||
pub node_features: NodeFeatures,
|
||||
pub chunk_response_protocol: Protocol,
|
||||
pub req_protocol_names: ReqProtocolNames,
|
||||
pub our_chunk_index: ChunkIndex,
|
||||
}
|
||||
|
||||
impl TestState {
|
||||
/// Initialize a default test state.
|
||||
pub fn new(
|
||||
node_features: NodeFeatures,
|
||||
req_protocol_names: ReqProtocolNames,
|
||||
chunk_response_protocol: Protocol,
|
||||
) -> Self {
|
||||
let relay_chain: Vec<_> = (1u8..10).map(Hash::repeat_byte).collect();
|
||||
let chain_a = ParaId::from(1);
|
||||
let chain_b = ParaId::from(2);
|
||||
|
||||
let chain_ids = vec![chain_a, chain_b];
|
||||
|
||||
let keystore = make_ferdie_keystore();
|
||||
|
||||
let session_info = make_session_info();
|
||||
|
||||
let our_chunk_index = availability_chunk_index(
|
||||
&node_features,
|
||||
session_info.validators.len(),
|
||||
CoreIndex(1),
|
||||
ValidatorIndex(0),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let (cores, chunks) = {
|
||||
let mut cores = HashMap::new();
|
||||
let mut chunks = HashMap::new();
|
||||
|
||||
cores.insert(
|
||||
relay_chain[0],
|
||||
vec![
|
||||
CoreState::Scheduled(ScheduledCore { para_id: chain_ids[0], collator: None }),
|
||||
CoreState::Scheduled(ScheduledCore { para_id: chain_ids[1], collator: None }),
|
||||
],
|
||||
);
|
||||
|
||||
let heads = {
|
||||
let mut advanced = relay_chain.iter();
|
||||
advanced.next();
|
||||
relay_chain.iter().zip(advanced)
|
||||
};
|
||||
for (relay_parent, relay_child) in heads {
|
||||
let (p_cores, p_chunks): (Vec<_>, Vec<_>) = chain_ids
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, para_id)| {
|
||||
let (core, chunk) = OccupiedCoreBuilder {
|
||||
group_responsible: GroupIndex(i as _),
|
||||
para_id: *para_id,
|
||||
relay_parent: *relay_parent,
|
||||
n_validators: session_info.validators.len(),
|
||||
chunk_index: our_chunk_index,
|
||||
}
|
||||
.build();
|
||||
(CoreState::Occupied(core), chunk)
|
||||
})
|
||||
.unzip();
|
||||
cores.insert(*relay_child, p_cores);
|
||||
// Skip chunks for our own group (won't get fetched):
|
||||
let mut chunks_other_groups = p_chunks.into_iter();
|
||||
chunks_other_groups.next();
|
||||
for (candidate, chunk) in chunks_other_groups {
|
||||
chunks.insert((candidate, ValidatorIndex(0)), vec![Some(chunk)]);
|
||||
}
|
||||
}
|
||||
(cores, chunks)
|
||||
};
|
||||
Self {
|
||||
relay_chain,
|
||||
valid_chunks: chunks.clone().keys().map(Clone::clone).collect(),
|
||||
chunks,
|
||||
session_info,
|
||||
cores,
|
||||
keystore,
|
||||
node_features,
|
||||
chunk_response_protocol,
|
||||
req_protocol_names,
|
||||
our_chunk_index,
|
||||
}
|
||||
}
|
||||
|
||||
/// Run, but fail after some timeout.
|
||||
pub async fn run(self, harness: TestHarness) {
|
||||
// Make sure test won't run forever.
|
||||
let f = self.run_inner(harness).timeout(Duration::from_secs(5));
|
||||
assert!(f.await.is_some(), "Test ran into timeout");
|
||||
}
|
||||
|
||||
/// Run, and assert an expected timeout.
|
||||
pub async fn run_assert_timeout(self, harness: TestHarness) {
|
||||
// Make sure test won't run forever.
|
||||
let f = self.run_inner(harness).timeout(Duration::from_secs(5));
|
||||
assert!(f.await.is_none(), "Test should have run into timeout");
|
||||
}
|
||||
|
||||
/// Run tests with the given mock values in `TestState`.
|
||||
///
|
||||
/// This will simply advance through the simulated chain and examines whether the subsystem
|
||||
/// behaves as expected: It will succeed if all valid chunks of other backing groups get stored
|
||||
/// and no other.
|
||||
///
|
||||
/// We try to be as agnostic about details as possible, how the subsystem achieves those goals
|
||||
/// should not be a matter to this test suite.
|
||||
async fn run_inner(mut self, mut harness: TestHarness) {
|
||||
// We skip genesis here (in reality ActiveLeavesUpdate can also skip a block):
|
||||
let updates = {
|
||||
let mut advanced = self.relay_chain.iter();
|
||||
advanced.next();
|
||||
self.relay_chain
|
||||
.iter()
|
||||
.zip(advanced)
|
||||
.map(|(old, new)| ActiveLeavesUpdate {
|
||||
activated: Some(new_leaf(*new, 1)),
|
||||
deactivated: vec![*old].into(),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
// We should be storing all valid chunks during execution:
|
||||
//
|
||||
// Test will fail if this does not happen until timeout.
|
||||
let mut remaining_stores = self.valid_chunks.len();
|
||||
|
||||
let TestSubsystemContextHandle { tx, mut rx, .. } = harness.virtual_overseer;
|
||||
|
||||
// Spawning necessary as incoming queue can only hold a single item, we don't want to dead
|
||||
// lock ;-)
|
||||
let update_tx = tx.clone();
|
||||
harness.pool.spawn(
|
||||
"sending-active-leaves-updates",
|
||||
None,
|
||||
async move {
|
||||
for update in updates {
|
||||
overseer_signal(update_tx.clone(), OverseerSignal::ActiveLeaves(update)).await;
|
||||
// We need to give the subsystem a little time to do its job, otherwise it will
|
||||
// cancel jobs as obsolete:
|
||||
Delay::new(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
|
||||
while remaining_stores > 0 {
|
||||
gum::trace!(target: LOG_TARGET, remaining_stores, "Stores left to go");
|
||||
let msg = overseer_recv(&mut rx).await;
|
||||
match msg {
|
||||
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendRequests(
|
||||
reqs,
|
||||
IfDisconnected::ImmediateError,
|
||||
)) => {
|
||||
for req in reqs {
|
||||
// Forward requests:
|
||||
match self.chunk_response_protocol {
|
||||
Protocol::ChunkFetchingV1 => {
|
||||
let in_req = to_incoming_req_v1(
|
||||
&harness.pool,
|
||||
req,
|
||||
self.req_protocol_names.get_name(Protocol::ChunkFetchingV1),
|
||||
);
|
||||
|
||||
harness
|
||||
.chunk_req_v1_cfg
|
||||
.inbound_queue
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.send(in_req.into_raw())
|
||||
.await
|
||||
.unwrap();
|
||||
},
|
||||
Protocol::ChunkFetchingV2 => {
|
||||
let in_req = to_incoming_req_v2(
|
||||
&harness.pool,
|
||||
req,
|
||||
self.req_protocol_names.get_name(Protocol::ChunkFetchingV2),
|
||||
);
|
||||
|
||||
harness
|
||||
.chunk_req_v2_cfg
|
||||
.inbound_queue
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.send(in_req.into_raw())
|
||||
.await
|
||||
.unwrap();
|
||||
},
|
||||
_ => panic!("Unexpected protocol"),
|
||||
}
|
||||
}
|
||||
},
|
||||
AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryChunk(
|
||||
candidate_hash,
|
||||
validator_index,
|
||||
tx,
|
||||
)) => {
|
||||
let chunk = self
|
||||
.chunks
|
||||
.get_mut(&(candidate_hash, validator_index))
|
||||
.and_then(Vec::pop)
|
||||
.flatten();
|
||||
tx.send(chunk).expect("Receiver is expected to be alive");
|
||||
},
|
||||
AllMessages::AvailabilityStore(AvailabilityStoreMessage::StoreChunk {
|
||||
candidate_hash,
|
||||
chunk,
|
||||
validator_index,
|
||||
tx,
|
||||
..
|
||||
}) => {
|
||||
assert!(
|
||||
self.valid_chunks.contains(&(candidate_hash, validator_index)),
|
||||
"Only valid chunks should ever get stored."
|
||||
);
|
||||
assert_eq!(self.our_chunk_index, chunk.index);
|
||||
|
||||
tx.send(Ok(())).expect("Receiver is expected to be alive");
|
||||
gum::trace!(target: LOG_TARGET, "'Stored' fetched chunk.");
|
||||
remaining_stores -= 1;
|
||||
},
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(hash, req)) => {
|
||||
match req {
|
||||
RuntimeApiRequest::SessionIndexForChild(tx) => {
|
||||
// Always session index 1 for now:
|
||||
tx.send(Ok(1)).expect("Receiver should still be alive");
|
||||
},
|
||||
RuntimeApiRequest::SessionInfo(_, tx) => {
|
||||
tx.send(Ok(Some(self.session_info.clone())))
|
||||
.expect("Receiver should be alive.");
|
||||
},
|
||||
RuntimeApiRequest::SessionExecutorParams(_, tx) => {
|
||||
tx.send(Ok(Some(ExecutorParams::default())))
|
||||
.expect("Receiver should be alive.");
|
||||
},
|
||||
RuntimeApiRequest::AvailabilityCores(tx) => {
|
||||
gum::trace!(target: LOG_TARGET, cores= ?self.cores[&hash], hash = ?hash, "Sending out cores for hash");
|
||||
tx.send(Ok(self.cores[&hash].clone()))
|
||||
.expect("Receiver should still be alive");
|
||||
},
|
||||
RuntimeApiRequest::NodeFeatures(_, tx) => {
|
||||
tx.send(Ok(self.node_features.clone()))
|
||||
.expect("Receiver should still be alive");
|
||||
},
|
||||
_ => {
|
||||
panic!("Unexpected runtime request: {:?}", req);
|
||||
},
|
||||
}
|
||||
},
|
||||
AllMessages::ChainApi(ChainApiMessage::Ancestors { hash, k, response_channel }) => {
|
||||
let chain = &self.relay_chain;
|
||||
let maybe_block_position = chain.iter().position(|h| *h == hash);
|
||||
let ancestors = maybe_block_position
|
||||
.map(|idx| chain[..idx].iter().rev().take(k).copied().collect())
|
||||
.unwrap_or_default();
|
||||
response_channel.send(Ok(ancestors)).expect("Receiver is expected to be alive");
|
||||
},
|
||||
|
||||
_ => {
|
||||
panic!("Received unexpected message")
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
overseer_signal(tx, OverseerSignal::Conclude).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn overseer_signal(
|
||||
mut tx: mpsc::Sender<FromOrchestra<AvailabilityDistributionMessage>>,
|
||||
msg: impl Into<OverseerSignal>,
|
||||
) {
|
||||
let msg = msg.into();
|
||||
gum::trace!(target: LOG_TARGET, msg = ?msg, "sending message");
|
||||
tx.send(FromOrchestra::Signal(msg))
|
||||
.await
|
||||
.expect("Test subsystem no longer live");
|
||||
}
|
||||
|
||||
async fn overseer_recv(rx: &mut mpsc::UnboundedReceiver<AllMessages>) -> AllMessages {
|
||||
gum::trace!(target: LOG_TARGET, "waiting for message ...");
|
||||
rx.next().await.expect("Test subsystem no longer live")
|
||||
}
|
||||
|
||||
fn to_incoming_req_v1(
|
||||
executor: &TaskExecutor,
|
||||
outgoing: Requests,
|
||||
protocol_name: ProtocolName,
|
||||
) -> IncomingRequest<v1::ChunkFetchingRequest> {
|
||||
match outgoing {
|
||||
Requests::ChunkFetching(OutgoingRequest {
|
||||
pending_response,
|
||||
fallback_request: Some((fallback_request, fallback_protocol)),
|
||||
..
|
||||
}) => {
|
||||
assert_eq!(fallback_protocol, Protocol::ChunkFetchingV1);
|
||||
|
||||
let tx = spawn_message_forwarding(executor, protocol_name, pending_response);
|
||||
|
||||
IncomingRequest::new(
|
||||
// We don't really care:
|
||||
network::PeerId::random().into(),
|
||||
fallback_request,
|
||||
tx,
|
||||
)
|
||||
},
|
||||
_ => panic!("Unexpected request!"),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_incoming_req_v2(
|
||||
executor: &TaskExecutor,
|
||||
outgoing: Requests,
|
||||
protocol_name: ProtocolName,
|
||||
) -> IncomingRequest<v2::ChunkFetchingRequest> {
|
||||
match outgoing {
|
||||
Requests::ChunkFetching(OutgoingRequest {
|
||||
payload,
|
||||
pending_response,
|
||||
fallback_request: Some((_, fallback_protocol)),
|
||||
..
|
||||
}) => {
|
||||
assert_eq!(fallback_protocol, Protocol::ChunkFetchingV1);
|
||||
|
||||
let tx = spawn_message_forwarding(executor, protocol_name, pending_response);
|
||||
|
||||
IncomingRequest::new(
|
||||
// We don't really care:
|
||||
network::PeerId::random().into(),
|
||||
payload,
|
||||
tx,
|
||||
)
|
||||
},
|
||||
_ => panic!("Unexpected request!"),
|
||||
}
|
||||
}
|
||||
|
||||
fn spawn_message_forwarding(
|
||||
executor: &TaskExecutor,
|
||||
protocol_name: ProtocolName,
|
||||
pending_response: oneshot::Sender<Result<(Vec<u8>, ProtocolName), RequestFailure>>,
|
||||
) -> oneshot::Sender<OutgoingResponse> {
|
||||
let (tx, rx): (oneshot::Sender<netconfig::OutgoingResponse>, oneshot::Receiver<_>) =
|
||||
oneshot::channel();
|
||||
executor.spawn(
|
||||
"message-forwarding",
|
||||
None,
|
||||
async {
|
||||
let response = rx.await;
|
||||
let payload = response.expect("Unexpected canceled request").result;
|
||||
pending_response
|
||||
.send(payload.map_err(|_| RequestFailure::Refused).map(|r| (r, protocol_name)))
|
||||
.expect("Sending response is expected to work");
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
|
||||
tx
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
[package]
|
||||
name = "pezkuwi-availability-recovery"
|
||||
description = "The Availability Recovery subsystem. Handles requests for recovering the availability data of included candidates."
|
||||
version = "7.0.0"
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "availability-recovery-regression-bench"
|
||||
path = "benches/availability-recovery-regression-bench.rs"
|
||||
harness = false
|
||||
required-features = ["subsystem-benchmarks"]
|
||||
|
||||
[dependencies]
|
||||
async-trait = { workspace = true }
|
||||
fatality = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
gum = { workspace = true, default-features = true }
|
||||
rand = { workspace = true, default-features = true }
|
||||
schnellru = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true, default-features = true }
|
||||
|
||||
codec = { features = ["derive"], workspace = true }
|
||||
pezkuwi-erasure-coding = { workspace = true, default-features = true }
|
||||
pezkuwi-node-network-protocol = { workspace = true, default-features = true }
|
||||
pezkuwi-node-primitives = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem-util = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives = { workspace = true, default-features = true }
|
||||
sc-network = { workspace = true, default-features = true }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = { workspace = true }
|
||||
futures-timer = { workspace = true }
|
||||
rstest = { workspace = true }
|
||||
|
||||
sp-core = { workspace = true, default-features = true }
|
||||
sp-keyring = { workspace = true, default-features = true }
|
||||
sp-tracing = { workspace = true, default-features = true }
|
||||
|
||||
pezkuwi-node-subsystem-test-helpers = { workspace = true }
|
||||
pezkuwi-primitives-test-helpers = { workspace = true }
|
||||
pezkuwi-subsystem-bench = { workspace = true }
|
||||
|
||||
[features]
|
||||
subsystem-benchmarks = []
|
||||
runtime-benchmarks = [
|
||||
"gum/runtime-benchmarks",
|
||||
"pezkuwi-erasure-coding/runtime-benchmarks",
|
||||
"pezkuwi-node-network-protocol/runtime-benchmarks",
|
||||
"pezkuwi-node-primitives/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-util/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem/runtime-benchmarks",
|
||||
"pezkuwi-primitives-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-primitives/runtime-benchmarks",
|
||||
"pezkuwi-subsystem-bench/runtime-benchmarks",
|
||||
"sc-network/runtime-benchmarks",
|
||||
"sp-keyring/runtime-benchmarks",
|
||||
]
|
||||
+81
@@ -0,0 +1,81 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! availability-read regression tests
|
||||
//!
|
||||
//! Availability read benchmark based on Kusama parameters and scale.
|
||||
//!
|
||||
//! Subsystems involved:
|
||||
//! - availability-recovery
|
||||
|
||||
use pezkuwi_subsystem_bench::{
|
||||
availability::{
|
||||
benchmark_availability_read, prepare_test, DataAvailabilityReadOptions, Strategy,
|
||||
TestDataAvailability, TestState,
|
||||
},
|
||||
configuration::TestConfiguration,
|
||||
usage::BenchmarkUsage,
|
||||
utils::save_to_file,
|
||||
};
|
||||
use std::io::Write;
|
||||
|
||||
const BENCH_COUNT: usize = 10;
|
||||
|
||||
fn main() -> Result<(), String> {
|
||||
let mut messages = vec![];
|
||||
|
||||
let options = DataAvailabilityReadOptions { strategy: Strategy::FullFromBackers };
|
||||
let mut config = TestConfiguration::default();
|
||||
config.num_blocks = 3;
|
||||
config.generate_pov_sizes();
|
||||
|
||||
let state = TestState::new(&config);
|
||||
|
||||
println!("Benchmarking...");
|
||||
let usages: Vec<BenchmarkUsage> = (0..BENCH_COUNT)
|
||||
.map(|n| {
|
||||
print!("\r[{}{}]", "#".repeat(n), "_".repeat(BENCH_COUNT - n));
|
||||
std::io::stdout().flush().unwrap();
|
||||
let (mut env, _cfgs) =
|
||||
prepare_test(&state, TestDataAvailability::Read(options.clone()), false);
|
||||
env.runtime().block_on(benchmark_availability_read(&mut env, &state))
|
||||
})
|
||||
.collect();
|
||||
println!("\rDone!{}", " ".repeat(BENCH_COUNT));
|
||||
|
||||
let average_usage = BenchmarkUsage::average(&usages);
|
||||
save_to_file(
|
||||
"charts/availability-recovery-regression-bench.json",
|
||||
average_usage.to_chart_json().map_err(|e| e.to_string())?,
|
||||
)
|
||||
.map_err(|e| e.to_string())?;
|
||||
println!("{}", average_usage);
|
||||
|
||||
// We expect no variance for received and sent
|
||||
// but use 0.001 because we operate with floats
|
||||
messages.extend(average_usage.check_network_usage(&[
|
||||
("Received from peers", 307203.0000, 0.001),
|
||||
("Sent to peers", 1.6667, 0.001),
|
||||
]));
|
||||
messages.extend(average_usage.check_cpu_usage(&[("availability-recovery", 11.2758, 0.1)]));
|
||||
|
||||
if messages.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
eprintln!("{}", messages.join("\n"));
|
||||
Err("Regressions found".to_string())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! The `Error` and `Result` types used by the subsystem.
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use fatality::{fatality, Nested};
|
||||
use futures::channel::oneshot;
|
||||
use pezkuwi_node_network_protocol::request_response::incoming;
|
||||
use pezkuwi_node_subsystem::{RecoveryError, SubsystemError};
|
||||
use pezkuwi_primitives::Hash;
|
||||
|
||||
/// Error type used by the Availability Recovery subsystem.
|
||||
#[fatality(splitable)]
|
||||
pub enum Error {
|
||||
#[fatal]
|
||||
#[error("Spawning subsystem task failed: {0}")]
|
||||
SpawnTask(#[source] SubsystemError),
|
||||
|
||||
/// Receiving subsystem message from overseer failed.
|
||||
#[fatal]
|
||||
#[error("Receiving message from overseer failed: {0}")]
|
||||
SubsystemReceive(#[source] SubsystemError),
|
||||
|
||||
#[fatal]
|
||||
#[error("failed to query full data from store")]
|
||||
CanceledQueryFullData(#[source] oneshot::Canceled),
|
||||
|
||||
#[error("`SessionInfo` is `None` at {0}")]
|
||||
SessionInfoUnavailable(Hash),
|
||||
|
||||
#[error("failed to query node features from runtime")]
|
||||
RequestNodeFeatures(#[source] pezkuwi_node_subsystem_util::runtime::Error),
|
||||
|
||||
#[error("failed to send response")]
|
||||
CanceledResponseSender,
|
||||
|
||||
#[error(transparent)]
|
||||
Runtime(#[from] pezkuwi_node_subsystem::errors::RuntimeApiError),
|
||||
|
||||
#[error(transparent)]
|
||||
Erasure(#[from] pezkuwi_erasure_coding::Error),
|
||||
|
||||
#[fatal]
|
||||
#[error(transparent)]
|
||||
Oneshot(#[from] oneshot::Canceled),
|
||||
|
||||
#[fatal(forward)]
|
||||
#[error("Error during recovery: {0}")]
|
||||
Recovery(#[from] RecoveryError),
|
||||
|
||||
#[fatal(forward)]
|
||||
#[error("Retrieving next incoming request failed: {0}")]
|
||||
IncomingRequest(#[from] incoming::Error),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error, unless the error is fatal for the entire
|
||||
/// subsystem.
|
||||
pub fn log_error(result: Result<()>) -> std::result::Result<(), FatalError> {
|
||||
match result.into_nested()? {
|
||||
Ok(()) => Ok(()),
|
||||
Err(jfyi) => {
|
||||
jfyi.log();
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
impl JfyiError {
|
||||
/// Log a `JfyiError`.
|
||||
pub fn log(self) {
|
||||
gum::warn!(target: LOG_TARGET, "{}", self);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,236 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! FuturesUndead: A `FuturesUnordered` with support for semi canceled futures. Those undead
|
||||
//! futures will still get polled, but will not count towards length. So length will only count
|
||||
//! futures, which are still considered live.
|
||||
//!
|
||||
//! Use case: If futures take longer than we would like them too, we may be able to request the data
|
||||
//! from somewhere else as well. We don't really want to cancel the old future, because maybe it
|
||||
//! was almost done, thus we would have wasted time with our impatience. By simply making them
|
||||
//! not count towards length, we can make sure to have enough "live" requests ongoing, while at the
|
||||
//! same time taking advantage of some maybe "late" response from the undead.
|
||||
|
||||
use std::{
|
||||
pin::Pin,
|
||||
task::{Context, Poll},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use futures::{future::BoxFuture, stream::FuturesUnordered, Future, Stream, StreamExt};
|
||||
use pezkuwi_node_subsystem_util::TimeoutExt;
|
||||
|
||||
/// FuturesUndead - `FuturesUnordered` with semi canceled (undead) futures.
|
||||
///
|
||||
/// Limitations: Keeps track of undead futures by means of a counter, which is limited to 64
|
||||
/// bits, so after `1.8*10^19` pushed futures, this implementation will panic.
|
||||
pub struct FuturesUndead<Output> {
|
||||
/// Actual `FuturesUnordered`.
|
||||
inner: FuturesUnordered<Undead<Output>>,
|
||||
/// Next sequence number to assign to the next future that gets pushed.
|
||||
next_sequence: SequenceNumber,
|
||||
/// Sequence number of first future considered live.
|
||||
first_live: Option<SequenceNumber>,
|
||||
/// How many undead are there right now.
|
||||
undead: usize,
|
||||
}
|
||||
|
||||
/// All futures get a number, to determine which are live.
|
||||
#[derive(Eq, PartialEq, Copy, Clone, Debug, PartialOrd)]
|
||||
struct SequenceNumber(usize);
|
||||
|
||||
struct Undead<Output> {
|
||||
inner: BoxFuture<'static, Output>,
|
||||
our_sequence: SequenceNumber,
|
||||
}
|
||||
|
||||
impl<Output> FuturesUndead<Output> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
inner: FuturesUnordered::new(),
|
||||
next_sequence: SequenceNumber(0),
|
||||
first_live: None,
|
||||
undead: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push(&mut self, f: BoxFuture<'static, Output>) {
|
||||
self.inner.push(Undead { inner: f, our_sequence: self.next_sequence });
|
||||
self.next_sequence.inc();
|
||||
}
|
||||
|
||||
/// Make all contained futures undead.
|
||||
///
|
||||
/// They will no longer be counted on a call to `len`.
|
||||
pub fn soft_cancel(&mut self) {
|
||||
self.undead = self.inner.len();
|
||||
self.first_live = Some(self.next_sequence);
|
||||
}
|
||||
|
||||
/// Number of contained futures minus undead.
|
||||
pub fn len(&self) -> usize {
|
||||
self.inner.len() - self.undead
|
||||
}
|
||||
|
||||
/// Total number of futures, including undead.
|
||||
pub fn total_len(&self) -> usize {
|
||||
self.inner.len()
|
||||
}
|
||||
|
||||
/// Wait for next future to return with timeout.
|
||||
///
|
||||
/// When timeout passes, return `None` and make all currently contained futures undead.
|
||||
pub async fn next_with_timeout(&mut self, timeout: Duration) -> Option<Output> {
|
||||
match self.next().timeout(timeout).await {
|
||||
// Timeout:
|
||||
None => {
|
||||
self.soft_cancel();
|
||||
None
|
||||
},
|
||||
Some(inner) => inner,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Output> Stream for FuturesUndead<Output> {
|
||||
type Item = Output;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
match self.inner.poll_next_unpin(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
Poll::Ready(Some((sequence, v))) => {
|
||||
// Cleanup in case we became completely empty:
|
||||
if self.inner.len() == 0 {
|
||||
*self = Self::new();
|
||||
return Poll::Ready(Some(v));
|
||||
}
|
||||
|
||||
let first_live = match self.first_live {
|
||||
None => return Poll::Ready(Some(v)),
|
||||
Some(first_live) => first_live,
|
||||
};
|
||||
// An undead came back:
|
||||
if sequence < first_live {
|
||||
self.undead = self.undead.saturating_sub(1);
|
||||
}
|
||||
Poll::Ready(Some(v))
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SequenceNumber {
|
||||
pub fn inc(&mut self) {
|
||||
self.0 = self.0.checked_add(1).expect(
|
||||
"We don't expect an `UndeadFuture` to live long enough for 2^64 entries ever getting inserted."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Future for Undead<T> {
|
||||
type Output = (SequenceNumber, T);
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
match self.inner.as_mut().poll(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(v) => Poll::Ready((self.our_sequence, v)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use futures::{executor, pending, FutureExt};
|
||||
|
||||
#[test]
|
||||
fn cancel_sets_len_to_zero() {
|
||||
let mut undead = FuturesUndead::new();
|
||||
undead.push((async { () }).boxed());
|
||||
assert_eq!(undead.len(), 1);
|
||||
undead.soft_cancel();
|
||||
assert_eq!(undead.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn finished_undead_does_not_change_len() {
|
||||
executor::block_on(async {
|
||||
let mut undead = FuturesUndead::new();
|
||||
undead.push(async { 1_i32 }.boxed());
|
||||
undead.push(async { 2_i32 }.boxed());
|
||||
assert_eq!(undead.len(), 2);
|
||||
undead.soft_cancel();
|
||||
assert_eq!(undead.len(), 0);
|
||||
undead.push(
|
||||
async {
|
||||
pending!();
|
||||
0_i32
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
undead.next().await;
|
||||
assert_eq!(undead.len(), 1);
|
||||
undead.push(async { 9_i32 }.boxed());
|
||||
undead.soft_cancel();
|
||||
assert_eq!(undead.len(), 0);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn len_stays_correct_when_live_future_ends() {
|
||||
executor::block_on(async {
|
||||
let mut undead = FuturesUndead::new();
|
||||
undead.push(
|
||||
async {
|
||||
pending!();
|
||||
1_i32
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
undead.push(
|
||||
async {
|
||||
pending!();
|
||||
2_i32
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
assert_eq!(undead.len(), 2);
|
||||
undead.soft_cancel();
|
||||
assert_eq!(undead.len(), 0);
|
||||
undead.push(async { 0_i32 }.boxed());
|
||||
undead.push(async { 1_i32 }.boxed());
|
||||
undead.next().await;
|
||||
assert_eq!(undead.len(), 1);
|
||||
undead.next().await;
|
||||
assert_eq!(undead.len(), 0);
|
||||
undead.push(async { 9_i32 }.boxed());
|
||||
assert_eq!(undead.len(), 1);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cleanup_works() {
|
||||
executor::block_on(async {
|
||||
let mut undead = FuturesUndead::new();
|
||||
undead.push(async { 1_i32 }.boxed());
|
||||
undead.soft_cancel();
|
||||
undead.push(async { 2_i32 }.boxed());
|
||||
undead.next().await;
|
||||
undead.next().await;
|
||||
assert_eq!(undead.first_live, None);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,925 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Availability Recovery Subsystem of Pezkuwi.
|
||||
|
||||
#![warn(missing_docs)]
|
||||
|
||||
use std::{
|
||||
collections::{BTreeMap, VecDeque},
|
||||
iter::Iterator,
|
||||
num::NonZeroUsize,
|
||||
pin::Pin,
|
||||
};
|
||||
|
||||
use futures::{
|
||||
channel::oneshot,
|
||||
future::{Future, FutureExt, RemoteHandle},
|
||||
pin_mut,
|
||||
prelude::*,
|
||||
sink::SinkExt,
|
||||
stream::{FuturesUnordered, StreamExt},
|
||||
task::{Context, Poll},
|
||||
};
|
||||
use sc_network::ProtocolName;
|
||||
use schnellru::{ByLength, LruMap};
|
||||
use task::{
|
||||
FetchChunks, FetchChunksParams, FetchFull, FetchFullParams, FetchSystematicChunks,
|
||||
FetchSystematicChunksParams,
|
||||
};
|
||||
|
||||
use pezkuwi_erasure_coding::{
|
||||
branches, obtain_chunks_v1, recovery_threshold, systematic_recovery_threshold,
|
||||
Error as ErasureEncodingError,
|
||||
};
|
||||
use task::{RecoveryParams, RecoveryStrategy, RecoveryTask};
|
||||
|
||||
use error::{log_error, Error, FatalError, Result};
|
||||
use pezkuwi_node_network_protocol::{
|
||||
request_response::{
|
||||
v1 as request_v1, v2 as request_v2, IncomingRequestReceiver, IsRequest, ReqProtocolNames,
|
||||
},
|
||||
UnifiedReputationChange as Rep,
|
||||
};
|
||||
use pezkuwi_node_primitives::AvailableData;
|
||||
use pezkuwi_node_subsystem::{
|
||||
errors::RecoveryError,
|
||||
messages::{AvailabilityRecoveryMessage, AvailabilityStoreMessage},
|
||||
overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem,
|
||||
SubsystemContext, SubsystemError,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util::{
|
||||
availability_chunks::availability_chunk_indices,
|
||||
runtime::{ExtendedSessionInfo, RuntimeInfo},
|
||||
};
|
||||
use pezkuwi_primitives::{
|
||||
node_features, BlockNumber, CandidateHash, CandidateReceiptV2 as CandidateReceipt, ChunkIndex,
|
||||
CoreIndex, GroupIndex, Hash, SessionIndex, ValidatorIndex,
|
||||
};
|
||||
|
||||
mod error;
|
||||
mod futures_undead;
|
||||
mod metrics;
|
||||
mod task;
|
||||
pub use metrics::Metrics;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
type RecoveryResult = std::result::Result<AvailableData, RecoveryError>;
|
||||
|
||||
const LOG_TARGET: &str = "teyrchain::availability-recovery";
|
||||
|
||||
// Size of the LRU cache where we keep recovered data.
|
||||
const LRU_SIZE: u32 = 16;
|
||||
|
||||
const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Peer sent unparsable request");
|
||||
|
||||
/// PoV size limit in bytes for which prefer fetching from backers. (conservative, Pezkuwi for now)
|
||||
pub(crate) const CONSERVATIVE_FETCH_CHUNKS_THRESHOLD: usize = 1 * 1024 * 1024;
|
||||
/// PoV size limit in bytes for which prefer fetching from backers. (Kusama and all testnets)
|
||||
pub const FETCH_CHUNKS_THRESHOLD: usize = 4 * 1024 * 1024;
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
/// The strategy we use to recover the PoV.
|
||||
pub enum RecoveryStrategyKind {
|
||||
/// We try the backing group first if PoV size is lower than specified, then fallback to
|
||||
/// validator chunks.
|
||||
BackersFirstIfSizeLower(usize),
|
||||
/// We try the backing group first if PoV size is lower than specified, then fallback to
|
||||
/// systematic chunks. Regular chunk recovery as a last resort.
|
||||
BackersFirstIfSizeLowerThenSystematicChunks(usize),
|
||||
|
||||
/// The following variants are only helpful for integration tests.
|
||||
///
|
||||
/// We always try the backing group first, then fallback to validator chunks.
|
||||
#[allow(dead_code)]
|
||||
BackersFirstAlways,
|
||||
/// We always recover using validator chunks.
|
||||
#[allow(dead_code)]
|
||||
ChunksAlways,
|
||||
/// First try the backing group. Then systematic chunks.
|
||||
#[allow(dead_code)]
|
||||
BackersThenSystematicChunks,
|
||||
/// Always recover using systematic chunks, fall back to regular chunks.
|
||||
#[allow(dead_code)]
|
||||
SystematicChunks,
|
||||
}
|
||||
|
||||
/// The Availability Recovery Subsystem.
|
||||
pub struct AvailabilityRecoverySubsystem {
|
||||
/// PoV recovery strategy to use.
|
||||
recovery_strategy_kind: RecoveryStrategyKind,
|
||||
// If this is true, do not request data from the availability store.
|
||||
/// This is the useful for nodes where the
|
||||
/// availability-store subsystem is not expected to run,
|
||||
/// such as collators.
|
||||
bypass_availability_store: bool,
|
||||
/// Receiver for available data requests.
|
||||
req_receiver: IncomingRequestReceiver<request_v1::AvailableDataFetchingRequest>,
|
||||
/// Metrics for this subsystem.
|
||||
metrics: Metrics,
|
||||
/// The type of check to perform after available data was recovered.
|
||||
post_recovery_check: PostRecoveryCheck,
|
||||
/// Full protocol name for ChunkFetchingV1.
|
||||
req_v1_protocol_name: ProtocolName,
|
||||
/// Full protocol name for ChunkFetchingV2.
|
||||
req_v2_protocol_name: ProtocolName,
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Debug)]
|
||||
/// The type of check to perform after available data was recovered.
|
||||
enum PostRecoveryCheck {
|
||||
/// Reencode the data and check erasure root. For validators.
|
||||
Reencode,
|
||||
/// Only check the pov hash. For collators only.
|
||||
PovHash,
|
||||
}
|
||||
|
||||
/// Expensive erasure coding computations that we want to run on a blocking thread.
|
||||
enum ErasureTask {
|
||||
/// Reconstructs `AvailableData` from chunks given `n_validators`.
|
||||
Reconstruct(
|
||||
usize,
|
||||
BTreeMap<ChunkIndex, Vec<u8>>,
|
||||
oneshot::Sender<std::result::Result<AvailableData, ErasureEncodingError>>,
|
||||
),
|
||||
/// Re-encode `AvailableData` into erasure chunks in order to verify the provided root hash of
|
||||
/// the Merkle tree.
|
||||
Reencode(usize, Hash, AvailableData, oneshot::Sender<Option<AvailableData>>),
|
||||
}
|
||||
|
||||
/// Re-encode the data into erasure chunks in order to verify
|
||||
/// the root hash of the provided Merkle tree, which is built
|
||||
/// on-top of the encoded chunks.
|
||||
///
|
||||
/// This (expensive) check is necessary, as otherwise we can't be sure that some chunks won't have
|
||||
/// been tampered with by the backers, which would result in some validators considering the data
|
||||
/// valid and some invalid as having fetched different set of chunks. The checking of the Merkle
|
||||
/// proof for individual chunks only gives us guarantees, that we have fetched a chunk belonging to
|
||||
/// a set the backers have committed to.
|
||||
///
|
||||
/// NOTE: It is fine to do this check with already decoded data, because if the decoding failed for
|
||||
/// some validators, we can be sure that chunks have been tampered with (by the backers) or the
|
||||
/// data was invalid to begin with. In the former case, validators fetching valid chunks will see
|
||||
/// invalid data as well, because the root won't match. In the latter case the situation is the
|
||||
/// same for anyone anyways.
|
||||
fn reconstructed_data_matches_root(
|
||||
n_validators: usize,
|
||||
expected_root: &Hash,
|
||||
data: &AvailableData,
|
||||
metrics: &Metrics,
|
||||
) -> bool {
|
||||
let _timer = metrics.time_reencode_chunks();
|
||||
|
||||
let chunks = match obtain_chunks_v1(n_validators, data) {
|
||||
Ok(chunks) => chunks,
|
||||
Err(e) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
err = ?e,
|
||||
"Failed to obtain chunks",
|
||||
);
|
||||
return false;
|
||||
},
|
||||
};
|
||||
|
||||
let branches = branches(&chunks);
|
||||
|
||||
branches.root() == *expected_root
|
||||
}
|
||||
|
||||
/// Accumulate all awaiting sides for some particular `AvailableData`.
|
||||
struct RecoveryHandle {
|
||||
candidate_hash: CandidateHash,
|
||||
remote: RemoteHandle<RecoveryResult>,
|
||||
awaiting: Vec<oneshot::Sender<RecoveryResult>>,
|
||||
}
|
||||
|
||||
impl Future for RecoveryHandle {
|
||||
type Output = Option<(CandidateHash, RecoveryResult)>;
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
let mut indices_to_remove = Vec::new();
|
||||
for (i, awaiting) in self.awaiting.iter_mut().enumerate().rev() {
|
||||
if let Poll::Ready(()) = awaiting.poll_canceled(cx) {
|
||||
indices_to_remove.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
// these are reverse order, so remove is fine.
|
||||
for index in indices_to_remove {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.candidate_hash,
|
||||
"Receiver for available data dropped.",
|
||||
);
|
||||
|
||||
self.awaiting.swap_remove(index);
|
||||
}
|
||||
|
||||
if self.awaiting.is_empty() {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.candidate_hash,
|
||||
"All receivers for available data dropped.",
|
||||
);
|
||||
|
||||
return Poll::Ready(None);
|
||||
}
|
||||
|
||||
let remote = &mut self.remote;
|
||||
futures::pin_mut!(remote);
|
||||
let result = futures::ready!(remote.poll(cx));
|
||||
|
||||
for awaiting in self.awaiting.drain(..) {
|
||||
let _ = awaiting.send(result.clone());
|
||||
}
|
||||
|
||||
Poll::Ready(Some((self.candidate_hash, result)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Cached result of an availability recovery operation.
|
||||
#[derive(Debug, Clone)]
|
||||
enum CachedRecovery {
|
||||
/// Availability was successfully retrieved before.
|
||||
Valid(AvailableData),
|
||||
/// Availability was successfully retrieved before, but was found to be invalid.
|
||||
Invalid,
|
||||
}
|
||||
|
||||
impl CachedRecovery {
|
||||
/// Convert back to `Result` to deliver responses.
|
||||
fn into_result(self) -> RecoveryResult {
|
||||
match self {
|
||||
Self::Valid(d) => Ok(d),
|
||||
Self::Invalid => Err(RecoveryError::Invalid),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<RecoveryResult> for CachedRecovery {
|
||||
type Error = ();
|
||||
fn try_from(o: RecoveryResult) -> std::result::Result<CachedRecovery, Self::Error> {
|
||||
match o {
|
||||
Ok(d) => Ok(Self::Valid(d)),
|
||||
Err(RecoveryError::Invalid) => Ok(Self::Invalid),
|
||||
// We don't want to cache unavailable state, as that state might change, so if
|
||||
// requested again we want to try again!
|
||||
Err(RecoveryError::Unavailable) => Err(()),
|
||||
Err(RecoveryError::ChannelClosed) => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct State {
|
||||
/// Each recovery task is implemented as its own async task,
|
||||
/// and these handles are for communicating with them.
|
||||
ongoing_recoveries: FuturesUnordered<RecoveryHandle>,
|
||||
|
||||
/// A recent block hash for which state should be available.
|
||||
live_block: (BlockNumber, Hash),
|
||||
|
||||
/// An LRU cache of recently recovered data.
|
||||
availability_lru: LruMap<CandidateHash, CachedRecovery>,
|
||||
|
||||
/// Cached runtime info.
|
||||
runtime_info: RuntimeInfo,
|
||||
}
|
||||
|
||||
impl Default for State {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ongoing_recoveries: FuturesUnordered::new(),
|
||||
live_block: (0, Hash::default()),
|
||||
availability_lru: LruMap::new(ByLength::new(LRU_SIZE)),
|
||||
runtime_info: RuntimeInfo::new(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::subsystem(AvailabilityRecovery, error=SubsystemError, prefix=self::overseer)]
|
||||
impl<Context> AvailabilityRecoverySubsystem {
|
||||
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
||||
let future = self
|
||||
.run(ctx)
|
||||
.map_err(|e| SubsystemError::with_origin("availability-recovery", e))
|
||||
.boxed();
|
||||
SpawnedSubsystem { name: "availability-recovery-subsystem", future }
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles a signal from the overseer.
|
||||
/// Returns true if subsystem receives a deadly signal.
|
||||
async fn handle_signal(state: &mut State, signal: OverseerSignal) -> bool {
|
||||
match signal {
|
||||
OverseerSignal::Conclude => true,
|
||||
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { activated, .. }) => {
|
||||
// if activated is non-empty, set state.live_block to the highest block in `activated`
|
||||
if let Some(activated) = activated {
|
||||
if activated.number > state.live_block.0 {
|
||||
state.live_block = (activated.number, activated.hash)
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
},
|
||||
OverseerSignal::BlockFinalized(_, _) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Machinery around launching recovery tasks into the background.
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
async fn launch_recovery_task<Context>(
|
||||
state: &mut State,
|
||||
ctx: &mut Context,
|
||||
response_sender: oneshot::Sender<RecoveryResult>,
|
||||
recovery_strategies: VecDeque<Box<dyn RecoveryStrategy<<Context as SubsystemContext>::Sender>>>,
|
||||
params: RecoveryParams,
|
||||
) -> Result<()> {
|
||||
let candidate_hash = params.candidate_hash;
|
||||
let recovery_task = RecoveryTask::new(ctx.sender().clone(), params, recovery_strategies);
|
||||
|
||||
let (remote, remote_handle) = recovery_task.run().remote_handle();
|
||||
|
||||
state.ongoing_recoveries.push(RecoveryHandle {
|
||||
candidate_hash,
|
||||
remote: remote_handle,
|
||||
awaiting: vec![response_sender],
|
||||
});
|
||||
|
||||
ctx.spawn("recovery-task", Box::pin(remote))
|
||||
.map_err(|err| Error::SpawnTask(err))
|
||||
}
|
||||
|
||||
/// Handles an availability recovery request.
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
async fn handle_recover<Context>(
|
||||
state: &mut State,
|
||||
ctx: &mut Context,
|
||||
receipt: CandidateReceipt,
|
||||
session_index: SessionIndex,
|
||||
backing_group: Option<GroupIndex>,
|
||||
response_sender: oneshot::Sender<RecoveryResult>,
|
||||
metrics: &Metrics,
|
||||
erasure_task_tx: futures::channel::mpsc::Sender<ErasureTask>,
|
||||
recovery_strategy_kind: RecoveryStrategyKind,
|
||||
bypass_availability_store: bool,
|
||||
post_recovery_check: PostRecoveryCheck,
|
||||
maybe_core_index: Option<CoreIndex>,
|
||||
req_v1_protocol_name: ProtocolName,
|
||||
req_v2_protocol_name: ProtocolName,
|
||||
) -> Result<()> {
|
||||
let candidate_hash = receipt.hash();
|
||||
|
||||
if let Some(result) =
|
||||
state.availability_lru.get(&candidate_hash).cloned().map(|v| v.into_result())
|
||||
{
|
||||
return response_sender.send(result).map_err(|_| Error::CanceledResponseSender);
|
||||
}
|
||||
|
||||
if let Some(i) =
|
||||
state.ongoing_recoveries.iter_mut().find(|i| i.candidate_hash == candidate_hash)
|
||||
{
|
||||
i.awaiting.push(response_sender);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let session_info_res = state
|
||||
.runtime_info
|
||||
.get_session_info_by_index(ctx.sender(), state.live_block.1, session_index)
|
||||
.await;
|
||||
|
||||
match session_info_res {
|
||||
Ok(ExtendedSessionInfo { session_info, node_features, .. }) => {
|
||||
let mut backer_group = None;
|
||||
let n_validators = session_info.validators.len();
|
||||
let systematic_threshold = systematic_recovery_threshold(n_validators)?;
|
||||
let mut recovery_strategies: VecDeque<
|
||||
Box<dyn RecoveryStrategy<<Context as SubsystemContext>::Sender>>,
|
||||
> = VecDeque::with_capacity(3);
|
||||
|
||||
if let Some(backing_group) = backing_group {
|
||||
if let Some(backing_validators) = session_info.validator_groups.get(backing_group) {
|
||||
let mut small_pov_size = true;
|
||||
|
||||
match recovery_strategy_kind {
|
||||
RecoveryStrategyKind::BackersFirstIfSizeLower(fetch_chunks_threshold) |
|
||||
RecoveryStrategyKind::BackersFirstIfSizeLowerThenSystematicChunks(
|
||||
fetch_chunks_threshold,
|
||||
) => {
|
||||
// Get our own chunk size to get an estimate of the PoV size.
|
||||
let chunk_size: Result<Option<usize>> =
|
||||
query_chunk_size(ctx, candidate_hash).await;
|
||||
if let Ok(Some(chunk_size)) = chunk_size {
|
||||
let pov_size_estimate = chunk_size * systematic_threshold;
|
||||
small_pov_size = pov_size_estimate < fetch_chunks_threshold;
|
||||
|
||||
if small_pov_size {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?candidate_hash,
|
||||
pov_size_estimate,
|
||||
fetch_chunks_threshold,
|
||||
"Prefer fetch from backing group",
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// we have a POV limit but were not able to query the chunk size, so
|
||||
// don't use the backing group.
|
||||
small_pov_size = false;
|
||||
}
|
||||
},
|
||||
_ => {},
|
||||
};
|
||||
|
||||
match (&recovery_strategy_kind, small_pov_size) {
|
||||
(RecoveryStrategyKind::BackersFirstAlways, _) |
|
||||
(RecoveryStrategyKind::BackersFirstIfSizeLower(_), true) |
|
||||
(
|
||||
RecoveryStrategyKind::BackersFirstIfSizeLowerThenSystematicChunks(_),
|
||||
true,
|
||||
) |
|
||||
(RecoveryStrategyKind::BackersThenSystematicChunks, _) =>
|
||||
recovery_strategies.push_back(Box::new(FetchFull::new(
|
||||
FetchFullParams { validators: backing_validators.to_vec() },
|
||||
))),
|
||||
_ => {},
|
||||
};
|
||||
|
||||
backer_group = Some(backing_validators);
|
||||
}
|
||||
}
|
||||
|
||||
let chunk_mapping_enabled = if let Some(&true) = node_features
|
||||
.get(usize::from(node_features::FeatureIndex::AvailabilityChunkMapping as u8))
|
||||
.as_deref()
|
||||
{
|
||||
true
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
// We can only attempt systematic recovery if we received the core index of the
|
||||
// candidate and chunk mapping is enabled.
|
||||
if let Some(core_index) = maybe_core_index {
|
||||
if matches!(
|
||||
recovery_strategy_kind,
|
||||
RecoveryStrategyKind::BackersThenSystematicChunks |
|
||||
RecoveryStrategyKind::SystematicChunks |
|
||||
RecoveryStrategyKind::BackersFirstIfSizeLowerThenSystematicChunks(_)
|
||||
) && chunk_mapping_enabled
|
||||
{
|
||||
let chunk_indices =
|
||||
availability_chunk_indices(node_features, n_validators, core_index)?;
|
||||
|
||||
let chunk_indices: VecDeque<_> = chunk_indices
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(v_index, c_index)| {
|
||||
(
|
||||
*c_index,
|
||||
ValidatorIndex(
|
||||
u32::try_from(v_index)
|
||||
.expect("validator count should not exceed u32"),
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Only get the validators according to the threshold.
|
||||
let validators = chunk_indices
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|(c_index, _)| {
|
||||
usize::try_from(c_index.0)
|
||||
.expect("usize is at least u32 bytes on all modern targets.") <
|
||||
systematic_threshold
|
||||
})
|
||||
.collect();
|
||||
|
||||
recovery_strategies.push_back(Box::new(FetchSystematicChunks::new(
|
||||
FetchSystematicChunksParams {
|
||||
validators,
|
||||
backers: backer_group.map(|v| v.to_vec()).unwrap_or_else(|| vec![]),
|
||||
},
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
recovery_strategies.push_back(Box::new(FetchChunks::new(FetchChunksParams {
|
||||
n_validators: session_info.validators.len(),
|
||||
})));
|
||||
|
||||
let session_info = session_info.clone();
|
||||
|
||||
let n_validators = session_info.validators.len();
|
||||
|
||||
launch_recovery_task(
|
||||
state,
|
||||
ctx,
|
||||
response_sender,
|
||||
recovery_strategies,
|
||||
RecoveryParams {
|
||||
validator_authority_keys: session_info.discovery_keys.clone(),
|
||||
n_validators,
|
||||
threshold: recovery_threshold(n_validators)?,
|
||||
systematic_threshold,
|
||||
candidate_hash,
|
||||
erasure_root: receipt.descriptor.erasure_root(),
|
||||
metrics: metrics.clone(),
|
||||
bypass_availability_store,
|
||||
post_recovery_check,
|
||||
pov_hash: receipt.descriptor.pov_hash(),
|
||||
req_v1_protocol_name,
|
||||
req_v2_protocol_name,
|
||||
chunk_mapping_enabled,
|
||||
erasure_task_tx,
|
||||
},
|
||||
)
|
||||
.await
|
||||
},
|
||||
Err(_) => {
|
||||
response_sender
|
||||
.send(Err(RecoveryError::Unavailable))
|
||||
.map_err(|_| Error::CanceledResponseSender)?;
|
||||
|
||||
Err(Error::SessionInfoUnavailable(state.live_block.1))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Queries the full `AvailableData` from av-store.
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
async fn query_full_data<Context>(
|
||||
ctx: &mut Context,
|
||||
candidate_hash: CandidateHash,
|
||||
) -> Result<Option<AvailableData>> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
ctx.send_message(AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx))
|
||||
.await;
|
||||
|
||||
rx.await.map_err(Error::CanceledQueryFullData)
|
||||
}
|
||||
|
||||
/// Queries a chunk from av-store.
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
async fn query_chunk_size<Context>(
|
||||
ctx: &mut Context,
|
||||
candidate_hash: CandidateHash,
|
||||
) -> Result<Option<usize>> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
ctx.send_message(AvailabilityStoreMessage::QueryChunkSize(candidate_hash, tx))
|
||||
.await;
|
||||
|
||||
rx.await.map_err(Error::CanceledQueryFullData)
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
impl AvailabilityRecoverySubsystem {
|
||||
/// Create a new instance of `AvailabilityRecoverySubsystem` suitable for collator nodes,
|
||||
/// which never requests the `AvailabilityStoreSubsystem` subsystem and only checks the POV hash
|
||||
/// instead of reencoding the available data.
|
||||
pub fn for_collator(
|
||||
fetch_chunks_threshold: Option<usize>,
|
||||
req_receiver: IncomingRequestReceiver<request_v1::AvailableDataFetchingRequest>,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
metrics: Metrics,
|
||||
) -> Self {
|
||||
Self {
|
||||
recovery_strategy_kind: RecoveryStrategyKind::BackersFirstIfSizeLower(
|
||||
fetch_chunks_threshold.unwrap_or(CONSERVATIVE_FETCH_CHUNKS_THRESHOLD),
|
||||
),
|
||||
bypass_availability_store: true,
|
||||
post_recovery_check: PostRecoveryCheck::PovHash,
|
||||
req_receiver,
|
||||
metrics,
|
||||
req_v1_protocol_name: req_protocol_names
|
||||
.get_name(request_v1::ChunkFetchingRequest::PROTOCOL),
|
||||
req_v2_protocol_name: req_protocol_names
|
||||
.get_name(request_v2::ChunkFetchingRequest::PROTOCOL),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an optimised new instance of `AvailabilityRecoverySubsystem` suitable for validator
|
||||
/// nodes, which:
|
||||
/// - for small POVs (over the `fetch_chunks_threshold` or the
|
||||
/// `CONSERVATIVE_FETCH_CHUNKS_THRESHOLD`), it attempts full recovery from backers, if backing
|
||||
/// group supplied.
|
||||
/// - for large POVs, attempts systematic recovery, if core_index supplied and
|
||||
/// AvailabilityChunkMapping node feature is enabled.
|
||||
/// - as a last resort, attempt regular chunk recovery from all validators.
|
||||
pub fn for_validator(
|
||||
fetch_chunks_threshold: Option<usize>,
|
||||
req_receiver: IncomingRequestReceiver<request_v1::AvailableDataFetchingRequest>,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
metrics: Metrics,
|
||||
) -> Self {
|
||||
Self {
|
||||
recovery_strategy_kind:
|
||||
RecoveryStrategyKind::BackersFirstIfSizeLowerThenSystematicChunks(
|
||||
fetch_chunks_threshold.unwrap_or(CONSERVATIVE_FETCH_CHUNKS_THRESHOLD),
|
||||
),
|
||||
bypass_availability_store: false,
|
||||
post_recovery_check: PostRecoveryCheck::Reencode,
|
||||
req_receiver,
|
||||
metrics,
|
||||
req_v1_protocol_name: req_protocol_names
|
||||
.get_name(request_v1::ChunkFetchingRequest::PROTOCOL),
|
||||
req_v2_protocol_name: req_protocol_names
|
||||
.get_name(request_v2::ChunkFetchingRequest::PROTOCOL),
|
||||
}
|
||||
}
|
||||
|
||||
/// Customise the recovery strategy kind
|
||||
/// Currently only useful for tests.
|
||||
#[cfg(any(test, feature = "subsystem-benchmarks"))]
|
||||
pub fn with_recovery_strategy_kind(
|
||||
req_receiver: IncomingRequestReceiver<request_v1::AvailableDataFetchingRequest>,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
metrics: Metrics,
|
||||
recovery_strategy_kind: RecoveryStrategyKind,
|
||||
) -> Self {
|
||||
Self {
|
||||
recovery_strategy_kind,
|
||||
bypass_availability_store: false,
|
||||
post_recovery_check: PostRecoveryCheck::Reencode,
|
||||
req_receiver,
|
||||
metrics,
|
||||
req_v1_protocol_name: req_protocol_names
|
||||
.get_name(request_v1::ChunkFetchingRequest::PROTOCOL),
|
||||
req_v2_protocol_name: req_protocol_names
|
||||
.get_name(request_v2::ChunkFetchingRequest::PROTOCOL),
|
||||
}
|
||||
}
|
||||
|
||||
/// Starts the inner subsystem loop.
|
||||
pub async fn run<Context>(self, mut ctx: Context) -> std::result::Result<(), FatalError> {
|
||||
let mut state = State::default();
|
||||
let Self {
|
||||
mut req_receiver,
|
||||
metrics,
|
||||
recovery_strategy_kind,
|
||||
bypass_availability_store,
|
||||
post_recovery_check,
|
||||
req_v1_protocol_name,
|
||||
req_v2_protocol_name,
|
||||
} = self;
|
||||
|
||||
let (erasure_task_tx, erasure_task_rx) = futures::channel::mpsc::channel(16);
|
||||
let mut erasure_task_rx = erasure_task_rx.fuse();
|
||||
|
||||
// `ThreadPoolBuilder` spawns the tasks using `spawn_blocking`. For each worker there will
|
||||
// be a `mpsc` channel created. Each of these workers take the `Receiver` and poll it in an
|
||||
// infinite loop. All of the sender ends of the channel are sent as a vec which we then use
|
||||
// to create a `Cycle` iterator. We use this iterator to assign work in a round-robin
|
||||
// fashion to the workers in the pool.
|
||||
//
|
||||
// How work is dispatched to the pool from the recovery tasks:
|
||||
// - Once a recovery task finishes retrieving the availability data, it needs to reconstruct
|
||||
// from chunks and/or
|
||||
// re-encode the data which are heavy CPU computations.
|
||||
// To do so it sends an `ErasureTask` to the main loop via the `erasure_task` channel, and
|
||||
// waits for the results over a `oneshot` channel.
|
||||
// - In the subsystem main loop we poll the `erasure_task_rx` receiver.
|
||||
// - We forward the received `ErasureTask` to the `next()` sender yielded by the `Cycle`
|
||||
// iterator.
|
||||
// - Some worker thread handles it and sends the response over the `oneshot` channel.
|
||||
|
||||
// Create a thread pool with 2 workers.
|
||||
let mut to_pool = ThreadPoolBuilder::build(
|
||||
// Pool is guaranteed to have at least 1 worker thread.
|
||||
NonZeroUsize::new(2).expect("There are 2 threads; qed"),
|
||||
metrics.clone(),
|
||||
&mut ctx,
|
||||
)
|
||||
.into_iter()
|
||||
.cycle();
|
||||
|
||||
loop {
|
||||
let recv_req = req_receiver.recv(|| vec![COST_INVALID_REQUEST]).fuse();
|
||||
pin_mut!(recv_req);
|
||||
let res = futures::select! {
|
||||
erasure_task = erasure_task_rx.next() => {
|
||||
match erasure_task {
|
||||
Some(task) => {
|
||||
to_pool
|
||||
.next()
|
||||
.expect("Pool size is `NonZeroUsize`; qed")
|
||||
.send(task)
|
||||
.await
|
||||
.map_err(|_| RecoveryError::ChannelClosed)
|
||||
},
|
||||
None => {
|
||||
Err(RecoveryError::ChannelClosed)
|
||||
}
|
||||
}.map_err(Into::into)
|
||||
}
|
||||
signal = ctx.recv().fuse() => {
|
||||
match signal {
|
||||
Ok(signal) => {
|
||||
match signal {
|
||||
FromOrchestra::Signal(signal) => if handle_signal(
|
||||
&mut state,
|
||||
signal,
|
||||
).await {
|
||||
gum::debug!(target: LOG_TARGET, "subsystem concluded");
|
||||
return Ok(());
|
||||
} else {
|
||||
Ok(())
|
||||
},
|
||||
FromOrchestra::Communication {
|
||||
msg: AvailabilityRecoveryMessage::RecoverAvailableData(
|
||||
receipt,
|
||||
session_index,
|
||||
maybe_backing_group,
|
||||
maybe_core_index,
|
||||
response_sender,
|
||||
)
|
||||
} => handle_recover(
|
||||
&mut state,
|
||||
&mut ctx,
|
||||
receipt,
|
||||
session_index,
|
||||
maybe_backing_group,
|
||||
response_sender,
|
||||
&metrics,
|
||||
erasure_task_tx.clone(),
|
||||
recovery_strategy_kind.clone(),
|
||||
bypass_availability_store,
|
||||
post_recovery_check.clone(),
|
||||
maybe_core_index,
|
||||
req_v1_protocol_name.clone(),
|
||||
req_v2_protocol_name.clone(),
|
||||
).await
|
||||
}
|
||||
},
|
||||
Err(e) => Err(Error::SubsystemReceive(e))
|
||||
}
|
||||
}
|
||||
in_req = recv_req => {
|
||||
match in_req {
|
||||
Ok(req) => {
|
||||
if bypass_availability_store {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
"Skipping request to availability-store.",
|
||||
);
|
||||
let _ = req.send_response(None.into());
|
||||
Ok(())
|
||||
} else {
|
||||
match query_full_data(&mut ctx, req.payload.candidate_hash).await {
|
||||
Ok(res) => {
|
||||
let _ = req.send_response(res.into());
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = req.send_response(None.into());
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => Err(Error::IncomingRequest(e))
|
||||
}
|
||||
}
|
||||
output = state.ongoing_recoveries.select_next_some() => {
|
||||
let mut res = Ok(());
|
||||
if let Some((candidate_hash, result)) = output {
|
||||
if let Err(ref e) = result {
|
||||
res = Err(Error::Recovery(e.clone()));
|
||||
}
|
||||
|
||||
if let Ok(recovery) = CachedRecovery::try_from(result) {
|
||||
state.availability_lru.insert(candidate_hash, recovery);
|
||||
}
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
};
|
||||
|
||||
// Only bubble up fatal errors, but log all of them.
|
||||
if let Err(e) = res {
|
||||
log_error(Err(e))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A simple thread pool implementation using `spawn_blocking` threads.
|
||||
struct ThreadPoolBuilder;
|
||||
|
||||
const MAX_THREADS: NonZeroUsize = match NonZeroUsize::new(4) {
|
||||
Some(max_threads) => max_threads,
|
||||
None => panic!("MAX_THREADS must be non-zero"),
|
||||
};
|
||||
|
||||
impl ThreadPoolBuilder {
|
||||
// Creates a pool of `size` workers, where 1 <= `size` <= `MAX_THREADS`.
|
||||
//
|
||||
// Each worker is created by `spawn_blocking` and takes the receiver side of a channel
|
||||
// while all of the senders are returned to the caller. Each worker runs `erasure_task_thread`
|
||||
// that polls the `Receiver` for an `ErasureTask` which is expected to be CPU intensive. The
|
||||
// larger the input (more or larger chunks/availability data), the more CPU cycles will be
|
||||
// spent.
|
||||
//
|
||||
// For example, for 32KB PoVs, we'd expect re-encode to eat as much as 90ms and 500ms for
|
||||
// 2.5MiB.
|
||||
//
|
||||
// After executing such a task, the worker sends the response via a provided `oneshot` sender.
|
||||
//
|
||||
// The caller is responsible for routing work to the workers.
|
||||
#[overseer::contextbounds(AvailabilityRecovery, prefix = self::overseer)]
|
||||
pub fn build<Context>(
|
||||
size: NonZeroUsize,
|
||||
metrics: Metrics,
|
||||
ctx: &mut Context,
|
||||
) -> Vec<futures::channel::mpsc::Sender<ErasureTask>> {
|
||||
// At least 1 task, at most `MAX_THREADS.
|
||||
let size = std::cmp::min(size, MAX_THREADS);
|
||||
let mut senders = Vec::new();
|
||||
|
||||
for index in 0..size.into() {
|
||||
let (tx, rx) = futures::channel::mpsc::channel(8);
|
||||
senders.push(tx);
|
||||
|
||||
if let Err(e) = ctx
|
||||
.spawn_blocking("erasure-task", Box::pin(erasure_task_thread(metrics.clone(), rx)))
|
||||
{
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
err = ?e,
|
||||
index,
|
||||
"Failed to spawn a erasure task",
|
||||
);
|
||||
}
|
||||
}
|
||||
senders
|
||||
}
|
||||
}
|
||||
|
||||
// Handles CPU intensive operation on a dedicated blocking thread.
|
||||
async fn erasure_task_thread(
|
||||
metrics: Metrics,
|
||||
mut ingress: futures::channel::mpsc::Receiver<ErasureTask>,
|
||||
) {
|
||||
loop {
|
||||
match ingress.next().await {
|
||||
Some(ErasureTask::Reconstruct(n_validators, chunks, sender)) => {
|
||||
let _ = sender.send(pezkuwi_erasure_coding::reconstruct_v1(
|
||||
n_validators,
|
||||
chunks.iter().map(|(c_index, chunk)| {
|
||||
(
|
||||
&chunk[..],
|
||||
usize::try_from(c_index.0)
|
||||
.expect("usize is at least u32 bytes on all modern targets."),
|
||||
)
|
||||
}),
|
||||
));
|
||||
},
|
||||
Some(ErasureTask::Reencode(n_validators, root, available_data, sender)) => {
|
||||
let metrics = metrics.clone();
|
||||
|
||||
let maybe_data = if reconstructed_data_matches_root(
|
||||
n_validators,
|
||||
&root,
|
||||
&available_data,
|
||||
&metrics,
|
||||
) {
|
||||
Some(available_data)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let _ = sender.send(maybe_data);
|
||||
},
|
||||
None => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
"Erasure task channel closed. Node shutting down ?",
|
||||
);
|
||||
break;
|
||||
},
|
||||
}
|
||||
|
||||
// In benchmarks this is a very hot loop not yielding at all.
|
||||
// To update CPU metrics for the task we need to yield.
|
||||
#[cfg(feature = "subsystem-benchmarks")]
|
||||
tokio::task::yield_now().await;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,409 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_subsystem::prometheus::HistogramVec;
|
||||
use pezkuwi_node_subsystem_util::metrics::{
|
||||
self,
|
||||
prometheus::{
|
||||
self, prometheus::HistogramTimer, Counter, CounterVec, Histogram, Opts, PrometheusError,
|
||||
Registry, U64,
|
||||
},
|
||||
};
|
||||
|
||||
/// Availability Distribution metrics.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
/// Number of sent chunk requests.
|
||||
///
|
||||
/// Gets incremented on each sent chunk requests.
|
||||
///
|
||||
/// Split by chunk type:
|
||||
/// - `regular_chunks`
|
||||
/// - `systematic_chunks`
|
||||
chunk_requests_issued: CounterVec<U64>,
|
||||
|
||||
/// Total number of bytes recovered
|
||||
///
|
||||
/// Gets incremented on each successful recovery
|
||||
recovered_bytes_total: Counter<U64>,
|
||||
|
||||
/// A counter for finished chunk requests.
|
||||
///
|
||||
/// Split by the chunk type (`regular_chunks` or `systematic_chunks`)
|
||||
///
|
||||
/// Also split by result:
|
||||
/// - `no_such_chunk` ... peer did not have the requested chunk
|
||||
/// - `timeout` ... request timed out.
|
||||
/// - `error` ... Some networking issue except timeout
|
||||
/// - `invalid` ... Chunk was received, but not valid.
|
||||
/// - `success`
|
||||
chunk_requests_finished: CounterVec<U64>,
|
||||
|
||||
/// A counter for successful chunk requests, split by the network protocol version.
|
||||
chunk_request_protocols: CounterVec<U64>,
|
||||
|
||||
/// Number of sent available data requests.
|
||||
full_data_requests_issued: Counter<U64>,
|
||||
|
||||
/// Counter for finished available data requests.
|
||||
///
|
||||
/// Split by the result type:
|
||||
///
|
||||
/// - `no_such_data` ... peer did not have the requested data
|
||||
/// - `timeout` ... request timed out.
|
||||
/// - `error` ... Some networking issue except timeout
|
||||
/// - `invalid` ... data was received, but not valid.
|
||||
/// - `success`
|
||||
full_data_requests_finished: CounterVec<U64>,
|
||||
|
||||
/// The duration of request to response.
|
||||
///
|
||||
/// Split by chunk type (`regular_chunks` or `systematic_chunks`).
|
||||
time_chunk_request: HistogramVec,
|
||||
|
||||
/// The duration between the pure recovery and verification.
|
||||
///
|
||||
/// Split by recovery type (`regular_chunks`, `systematic_chunks` or `full_from_backers`).
|
||||
time_erasure_recovery: HistogramVec,
|
||||
|
||||
/// How much time it takes to reconstruct the available data from chunks.
|
||||
///
|
||||
/// Split by chunk type (`regular_chunks` or `systematic_chunks`), as the algorithms are
|
||||
/// different.
|
||||
time_erasure_reconstruct: HistogramVec,
|
||||
|
||||
/// How much time it takes to re-encode the data into erasure chunks in order to verify
|
||||
/// the root hash of the provided Merkle tree. See `reconstructed_data_matches_root`.
|
||||
time_reencode_chunks: Histogram,
|
||||
|
||||
/// Time of a full recovery, including erasure decoding or until we gave
|
||||
/// up.
|
||||
time_full_recovery: Histogram,
|
||||
|
||||
/// Number of full recoveries that have been finished one way or the other.
|
||||
///
|
||||
/// Split by recovery `strategy_type` (`full_from_backers, systematic_chunks, regular_chunks,
|
||||
/// all`). `all` is used for failed recoveries that tried all available strategies.
|
||||
/// Also split by `result` type.
|
||||
full_recoveries_finished: CounterVec<U64>,
|
||||
|
||||
/// Number of full recoveries that have been started on this subsystem.
|
||||
///
|
||||
/// Note: Those are only recoveries which could not get served locally already - so in other
|
||||
/// words: Only real recoveries.
|
||||
full_recoveries_started: Counter<U64>,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
/// Create new dummy metrics, not reporting anything.
|
||||
pub fn new_dummy() -> Self {
|
||||
Metrics(None)
|
||||
}
|
||||
|
||||
/// Increment counter for chunk requests.
|
||||
pub fn on_chunk_request_issued(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.chunk_requests_issued.with_label_values(&[chunk_type]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment counter for full data requests.
|
||||
pub fn on_full_request_issued(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_issued.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk request timed out.
|
||||
pub fn on_chunk_request_timeout(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.chunk_requests_finished
|
||||
.with_label_values(&[chunk_type, "timeout"])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full data request timed out.
|
||||
pub fn on_full_request_timeout(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_finished.with_label_values(&["timeout"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk request failed because validator did not have its chunk.
|
||||
pub fn on_chunk_request_no_such_chunk(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.chunk_requests_finished
|
||||
.with_label_values(&[chunk_type, "no_such_chunk"])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full data request failed because the validator did not have it.
|
||||
pub fn on_full_request_no_such_data(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_finished.with_label_values(&["no_such_data"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk request failed for some non timeout related network error.
|
||||
pub fn on_chunk_request_error(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.chunk_requests_finished.with_label_values(&[chunk_type, "error"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full data request failed for some non timeout related network error.
|
||||
pub fn on_full_request_error(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_finished.with_label_values(&["error"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk request succeeded, but was not valid.
|
||||
pub fn on_chunk_request_invalid(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.chunk_requests_finished
|
||||
.with_label_values(&[chunk_type, "invalid"])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full data request succeeded, but was not valid.
|
||||
pub fn on_full_request_invalid(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_finished.with_label_values(&["invalid"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk request succeeded.
|
||||
pub fn on_chunk_request_succeeded(&self, chunk_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.chunk_requests_finished
|
||||
.with_label_values(&[chunk_type, "success"])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk response was received on the v1 protocol.
|
||||
pub fn on_chunk_response_v1(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.chunk_request_protocols.with_label_values(&["v1"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk response was received on the v2 protocol.
|
||||
pub fn on_chunk_response_v2(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.chunk_request_protocols.with_label_values(&["v2"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full data request succeeded.
|
||||
pub fn on_full_request_succeeded(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_data_requests_finished.with_label_values(&["success"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a timer to time request/response duration.
|
||||
pub fn time_chunk_request(&self, chunk_type: &str) -> Option<HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics.time_chunk_request.with_label_values(&[chunk_type]).start_timer()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a timer to time erasure code recover.
|
||||
pub fn time_erasure_recovery(&self, chunk_type: &str) -> Option<HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics.time_erasure_recovery.with_label_values(&[chunk_type]).start_timer()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a timer for available data reconstruction.
|
||||
pub fn time_erasure_reconstruct(&self, chunk_type: &str) -> Option<HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics.time_erasure_reconstruct.with_label_values(&[chunk_type]).start_timer()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a timer to time chunk encoding.
|
||||
pub fn time_reencode_chunks(&self) -> Option<HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.time_reencode_chunks.start_timer())
|
||||
}
|
||||
|
||||
/// Get a timer to measure the time of the complete recovery process.
|
||||
pub fn time_full_recovery(&self) -> Option<HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.time_full_recovery.start_timer())
|
||||
}
|
||||
|
||||
/// A full recovery succeeded.
|
||||
pub fn on_recovery_succeeded(&self, strategy_type: &str, bytes: usize) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.full_recoveries_finished
|
||||
.with_label_values(&["success", strategy_type])
|
||||
.inc();
|
||||
metrics.recovered_bytes_total.inc_by(bytes as u64)
|
||||
}
|
||||
}
|
||||
|
||||
/// A full recovery failed (data not available).
|
||||
pub fn on_recovery_failed(&self, strategy_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.full_recoveries_finished
|
||||
.with_label_values(&["failure", strategy_type])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A full recovery failed (data was recovered, but invalid).
|
||||
pub fn on_recovery_invalid(&self, strategy_type: &str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.full_recoveries_finished
|
||||
.with_label_values(&["invalid", strategy_type])
|
||||
.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// A recover was started.
|
||||
pub fn on_recovery_started(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.full_recoveries_started.inc()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(registry: &Registry) -> Result<Self, PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
chunk_requests_issued: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new("pezkuwi_teyrchain_availability_recovery_chunk_requests_issued",
|
||||
"Total number of issued chunk requests."),
|
||||
&["type"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
full_data_requests_issued: prometheus::register(
|
||||
Counter::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_full_data_requests_issued",
|
||||
"Total number of issued full data requests.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
recovered_bytes_total: prometheus::register(
|
||||
Counter::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_bytes_total",
|
||||
"Total number of bytes recovered",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
chunk_requests_finished: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_chunk_requests_finished",
|
||||
"Total number of chunk requests finished.",
|
||||
),
|
||||
&["result", "type"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
chunk_request_protocols: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_chunk_request_protocols",
|
||||
"Total number of successful chunk requests, mapped by the protocol version (v1 or v2).",
|
||||
),
|
||||
&["protocol"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
full_data_requests_finished: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_full_data_requests_finished",
|
||||
"Total number of full data requests finished.",
|
||||
),
|
||||
&["result"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
time_chunk_request: prometheus::register(
|
||||
prometheus::HistogramVec::new(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_time_chunk_request",
|
||||
"Time spent waiting for a response to a chunk request",
|
||||
), &["type"])?,
|
||||
registry,
|
||||
)?,
|
||||
time_erasure_recovery: prometheus::register(
|
||||
prometheus::HistogramVec::new(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_time_erasure_recovery",
|
||||
"Time spent to recover the erasure code and verify the merkle root by re-encoding as erasure chunks",
|
||||
), &["type"])?,
|
||||
registry,
|
||||
)?,
|
||||
time_erasure_reconstruct: prometheus::register(
|
||||
prometheus::HistogramVec::new(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_time_erasure_reconstruct",
|
||||
"Time spent to reconstruct the data from chunks",
|
||||
), &["type"])?,
|
||||
registry,
|
||||
)?,
|
||||
time_reencode_chunks: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_availability_reencode_chunks",
|
||||
"Time spent re-encoding the data as erasure chunks",
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
time_full_recovery: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_time_total",
|
||||
"Time a full recovery process took, either until failure or successful erasure decoding.",
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
full_recoveries_finished: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_recoveries_finished",
|
||||
"Total number of recoveries that finished.",
|
||||
),
|
||||
&["result", "strategy_type"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
full_recoveries_started: prometheus::register(
|
||||
Counter::new(
|
||||
"pezkuwi_teyrchain_availability_recovery_recoveries_started",
|
||||
"Total number of started recoveries.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,197 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Main recovery task logic. Runs recovery strategies.
|
||||
|
||||
#![warn(missing_docs)]
|
||||
|
||||
mod strategy;
|
||||
|
||||
pub use self::strategy::{
|
||||
FetchChunks, FetchChunksParams, FetchFull, FetchFullParams, FetchSystematicChunks,
|
||||
FetchSystematicChunksParams, RecoveryStrategy, State,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
pub use self::strategy::{REGULAR_CHUNKS_REQ_RETRY_LIMIT, SYSTEMATIC_CHUNKS_REQ_RETRY_LIMIT};
|
||||
|
||||
use crate::{metrics::Metrics, ErasureTask, PostRecoveryCheck, LOG_TARGET};
|
||||
|
||||
use codec::Encode;
|
||||
use pezkuwi_node_primitives::AvailableData;
|
||||
use pezkuwi_node_subsystem::{messages::AvailabilityStoreMessage, overseer, RecoveryError};
|
||||
use pezkuwi_primitives::{AuthorityDiscoveryId, CandidateHash, Hash};
|
||||
use sc_network::ProtocolName;
|
||||
|
||||
use futures::channel::{mpsc, oneshot};
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// Recovery parameters common to all strategies in a `RecoveryTask`.
|
||||
#[derive(Clone)]
|
||||
pub struct RecoveryParams {
|
||||
/// Discovery ids of `validators`.
|
||||
pub validator_authority_keys: Vec<AuthorityDiscoveryId>,
|
||||
|
||||
/// Number of validators.
|
||||
pub n_validators: usize,
|
||||
|
||||
/// The number of regular chunks needed.
|
||||
pub threshold: usize,
|
||||
|
||||
/// The number of systematic chunks needed.
|
||||
pub systematic_threshold: usize,
|
||||
|
||||
/// A hash of the relevant candidate.
|
||||
pub candidate_hash: CandidateHash,
|
||||
|
||||
/// The root of the erasure encoding of the candidate.
|
||||
pub erasure_root: Hash,
|
||||
|
||||
/// Metrics to report.
|
||||
pub metrics: Metrics,
|
||||
|
||||
/// Do not request data from availability-store. Useful for collators.
|
||||
pub bypass_availability_store: bool,
|
||||
|
||||
/// The type of check to perform after available data was recovered.
|
||||
pub post_recovery_check: PostRecoveryCheck,
|
||||
|
||||
/// The blake2-256 hash of the PoV.
|
||||
pub pov_hash: Hash,
|
||||
|
||||
/// Protocol name for ChunkFetchingV1.
|
||||
pub req_v1_protocol_name: ProtocolName,
|
||||
|
||||
/// Protocol name for ChunkFetchingV2.
|
||||
pub req_v2_protocol_name: ProtocolName,
|
||||
|
||||
/// Whether or not chunk mapping is enabled.
|
||||
pub chunk_mapping_enabled: bool,
|
||||
|
||||
/// Channel to the erasure task handler.
|
||||
pub erasure_task_tx: mpsc::Sender<ErasureTask>,
|
||||
}
|
||||
|
||||
/// A stateful reconstruction of availability data in reference to
|
||||
/// a candidate hash.
|
||||
pub struct RecoveryTask<Sender: overseer::AvailabilityRecoverySenderTrait> {
|
||||
sender: Sender,
|
||||
params: RecoveryParams,
|
||||
strategies: VecDeque<Box<dyn RecoveryStrategy<Sender>>>,
|
||||
state: State,
|
||||
}
|
||||
|
||||
impl<Sender> RecoveryTask<Sender>
|
||||
where
|
||||
Sender: overseer::AvailabilityRecoverySenderTrait,
|
||||
{
|
||||
/// Instantiate a new recovery task.
|
||||
pub fn new(
|
||||
sender: Sender,
|
||||
params: RecoveryParams,
|
||||
strategies: VecDeque<Box<dyn RecoveryStrategy<Sender>>>,
|
||||
) -> Self {
|
||||
Self { sender, params, strategies, state: State::new() }
|
||||
}
|
||||
|
||||
async fn in_availability_store(&mut self) -> Option<AvailableData> {
|
||||
if !self.params.bypass_availability_store {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.sender
|
||||
.send_message(AvailabilityStoreMessage::QueryAvailableData(
|
||||
self.params.candidate_hash,
|
||||
tx,
|
||||
))
|
||||
.await;
|
||||
|
||||
match rx.await {
|
||||
Ok(Some(data)) => return Some(data),
|
||||
Ok(None) => {},
|
||||
Err(oneshot::Canceled) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.params.candidate_hash,
|
||||
"Failed to reach the availability store",
|
||||
)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Run this recovery task to completion. It will loop through the configured strategies
|
||||
/// in-order and return whenever the first one recovers the full `AvailableData`.
|
||||
pub async fn run(mut self) -> Result<AvailableData, RecoveryError> {
|
||||
if let Some(data) = self.in_availability_store().await {
|
||||
return Ok(data);
|
||||
}
|
||||
|
||||
self.params.metrics.on_recovery_started();
|
||||
|
||||
let _timer = self.params.metrics.time_full_recovery();
|
||||
|
||||
while let Some(current_strategy) = self.strategies.pop_front() {
|
||||
let display_name = current_strategy.display_name();
|
||||
let strategy_type = current_strategy.strategy_type();
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.params.candidate_hash,
|
||||
"Starting `{}` strategy",
|
||||
display_name
|
||||
);
|
||||
|
||||
let res = current_strategy.run(&mut self.state, &mut self.sender, &self.params).await;
|
||||
|
||||
match res {
|
||||
Err(RecoveryError::Unavailable) =>
|
||||
if self.strategies.front().is_some() {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.params.candidate_hash,
|
||||
"Recovery strategy `{}` did not conclude. Trying the next one.",
|
||||
display_name
|
||||
);
|
||||
continue;
|
||||
},
|
||||
Err(err) => {
|
||||
match &err {
|
||||
RecoveryError::Invalid =>
|
||||
self.params.metrics.on_recovery_invalid(strategy_type),
|
||||
_ => self.params.metrics.on_recovery_failed(strategy_type),
|
||||
}
|
||||
return Err(err);
|
||||
},
|
||||
Ok(data) => {
|
||||
self.params.metrics.on_recovery_succeeded(strategy_type, data.encoded_size());
|
||||
return Ok(data);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// We have no other strategies to try.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?self.params.candidate_hash,
|
||||
"Recovery of available data failed.",
|
||||
);
|
||||
|
||||
self.params.metrics.on_recovery_failed("all");
|
||||
|
||||
Err(RecoveryError::Unavailable)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,334 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
futures_undead::FuturesUndead,
|
||||
task::{
|
||||
strategy::{
|
||||
do_post_recovery_check, is_unavailable, OngoingRequests, N_PARALLEL,
|
||||
REGULAR_CHUNKS_REQ_RETRY_LIMIT,
|
||||
},
|
||||
RecoveryParams, State,
|
||||
},
|
||||
ErasureTask, RecoveryStrategy, LOG_TARGET,
|
||||
};
|
||||
|
||||
use pezkuwi_node_primitives::AvailableData;
|
||||
use pezkuwi_node_subsystem::{overseer, RecoveryError};
|
||||
use pezkuwi_primitives::ValidatorIndex;
|
||||
|
||||
use futures::{channel::oneshot, SinkExt};
|
||||
use rand::seq::SliceRandom;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// Parameters specific to the `FetchChunks` strategy.
|
||||
pub struct FetchChunksParams {
|
||||
pub n_validators: usize,
|
||||
}
|
||||
|
||||
/// `RecoveryStrategy` that requests chunks from validators, in parallel.
|
||||
pub struct FetchChunks {
|
||||
/// How many requests have been unsuccessful so far.
|
||||
error_count: usize,
|
||||
/// Total number of responses that have been received, including failed ones.
|
||||
total_received_responses: usize,
|
||||
/// A shuffled array of validator indices.
|
||||
validators: VecDeque<ValidatorIndex>,
|
||||
/// Collection of in-flight requests.
|
||||
requesting_chunks: OngoingRequests,
|
||||
}
|
||||
|
||||
impl FetchChunks {
|
||||
/// Instantiate a new strategy.
|
||||
pub fn new(params: FetchChunksParams) -> Self {
|
||||
// Shuffle the validators to make sure that we don't request chunks from the same
|
||||
// validators over and over.
|
||||
let mut validators: VecDeque<ValidatorIndex> =
|
||||
(0..params.n_validators).map(|i| ValidatorIndex(i as u32)).collect();
|
||||
validators.make_contiguous().shuffle(&mut rand::thread_rng());
|
||||
|
||||
Self {
|
||||
error_count: 0,
|
||||
total_received_responses: 0,
|
||||
validators,
|
||||
requesting_chunks: FuturesUndead::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_unavailable(
|
||||
unrequested_validators: usize,
|
||||
in_flight_requests: usize,
|
||||
chunk_count: usize,
|
||||
threshold: usize,
|
||||
) -> bool {
|
||||
is_unavailable(chunk_count, in_flight_requests, unrequested_validators, threshold)
|
||||
}
|
||||
|
||||
/// Desired number of parallel requests.
|
||||
///
|
||||
/// For the given threshold (total required number of chunks) get the desired number of
|
||||
/// requests we want to have running in parallel at this time.
|
||||
fn get_desired_request_count(&self, chunk_count: usize, threshold: usize) -> usize {
|
||||
// Upper bound for parallel requests.
|
||||
// We want to limit this, so requests can be processed within the timeout and we limit the
|
||||
// following feedback loop:
|
||||
// 1. Requests fail due to timeout
|
||||
// 2. We request more chunks to make up for it
|
||||
// 3. Bandwidth is spread out even more, so we get even more timeouts
|
||||
// 4. We request more chunks to make up for it ...
|
||||
let max_requests_boundary = std::cmp::min(N_PARALLEL, threshold);
|
||||
// How many chunks are still needed?
|
||||
let remaining_chunks = threshold.saturating_sub(chunk_count);
|
||||
// What is the current error rate, so we can make up for it?
|
||||
let inv_error_rate =
|
||||
self.total_received_responses.checked_div(self.error_count).unwrap_or(0);
|
||||
// Actual number of requests we want to have in flight in parallel:
|
||||
std::cmp::min(
|
||||
max_requests_boundary,
|
||||
remaining_chunks + remaining_chunks.checked_div(inv_error_rate).unwrap_or(0),
|
||||
)
|
||||
}
|
||||
|
||||
async fn attempt_recovery<Sender: overseer::AvailabilityRecoverySenderTrait>(
|
||||
&mut self,
|
||||
state: &mut State,
|
||||
common_params: &RecoveryParams,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
let recovery_duration =
|
||||
common_params
|
||||
.metrics
|
||||
.time_erasure_recovery(RecoveryStrategy::<Sender>::strategy_type(self));
|
||||
|
||||
// Send request to reconstruct available data from chunks.
|
||||
let (avilable_data_tx, available_data_rx) = oneshot::channel();
|
||||
|
||||
let mut erasure_task_tx = common_params.erasure_task_tx.clone();
|
||||
erasure_task_tx
|
||||
.send(ErasureTask::Reconstruct(
|
||||
common_params.n_validators,
|
||||
// Safe to leave an empty vec in place, as we're stopping the recovery process if
|
||||
// this reconstruct fails.
|
||||
std::mem::take(&mut state.received_chunks)
|
||||
.into_iter()
|
||||
.map(|(c_index, chunk)| (c_index, chunk.chunk))
|
||||
.collect(),
|
||||
avilable_data_tx,
|
||||
))
|
||||
.await
|
||||
.map_err(|_| RecoveryError::ChannelClosed)?;
|
||||
|
||||
let available_data_response =
|
||||
available_data_rx.await.map_err(|_| RecoveryError::ChannelClosed)?;
|
||||
|
||||
match available_data_response {
|
||||
// Attempt post-recovery check.
|
||||
Ok(data) => do_post_recovery_check(common_params, data)
|
||||
.await
|
||||
.inspect_err(|_| {
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
})
|
||||
.inspect(|_| {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
"Data recovery from chunks complete",
|
||||
);
|
||||
}),
|
||||
Err(err) => {
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
?err,
|
||||
"Data recovery error",
|
||||
);
|
||||
|
||||
Err(RecoveryError::Invalid)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender> for FetchChunks {
|
||||
fn display_name(&self) -> &'static str {
|
||||
"Fetch chunks"
|
||||
}
|
||||
|
||||
fn strategy_type(&self) -> &'static str {
|
||||
"regular_chunks"
|
||||
}
|
||||
|
||||
async fn run(
|
||||
mut self: Box<Self>,
|
||||
state: &mut State,
|
||||
sender: &mut Sender,
|
||||
common_params: &RecoveryParams,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
// First query the store for any chunks we've got.
|
||||
if !common_params.bypass_availability_store {
|
||||
let local_chunk_indices = state.populate_from_av_store(common_params, sender).await;
|
||||
self.validators.retain(|validator_index| {
|
||||
!local_chunk_indices.iter().any(|(v_index, _)| v_index == validator_index)
|
||||
});
|
||||
}
|
||||
|
||||
// No need to query the validators that have the chunks we already received or that we know
|
||||
// don't have the data from previous strategies.
|
||||
self.validators.retain(|v_index| {
|
||||
!state.received_chunks.values().any(|c| v_index == &c.validator_index) &&
|
||||
state.can_retry_request(
|
||||
&(common_params.validator_authority_keys[v_index.0 as usize].clone(), *v_index),
|
||||
REGULAR_CHUNKS_REQ_RETRY_LIMIT,
|
||||
)
|
||||
});
|
||||
|
||||
// Safe to `take` here, as we're consuming `self` anyway and we're not using the
|
||||
// `validators` field in other methods.
|
||||
let mut validators_queue: VecDeque<_> = std::mem::take(&mut self.validators)
|
||||
.into_iter()
|
||||
.map(|validator_index| {
|
||||
(
|
||||
common_params.validator_authority_keys[validator_index.0 as usize].clone(),
|
||||
validator_index,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
loop {
|
||||
// If received_chunks has more than threshold entries, attempt to recover the data.
|
||||
// If that fails, or a re-encoding of it doesn't match the expected erasure root,
|
||||
// return Err(RecoveryError::Invalid).
|
||||
// Do this before requesting any chunks because we may have enough of them coming from
|
||||
// past RecoveryStrategies.
|
||||
if state.chunk_count() >= common_params.threshold {
|
||||
return self.attempt_recovery::<Sender>(state, common_params).await;
|
||||
}
|
||||
|
||||
if Self::is_unavailable(
|
||||
validators_queue.len(),
|
||||
self.requesting_chunks.total_len(),
|
||||
state.chunk_count(),
|
||||
common_params.threshold,
|
||||
) {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
received = %state.chunk_count(),
|
||||
requesting = %self.requesting_chunks.len(),
|
||||
total_requesting = %self.requesting_chunks.total_len(),
|
||||
n_validators = %common_params.n_validators,
|
||||
"Data recovery from chunks is not possible",
|
||||
);
|
||||
|
||||
return Err(RecoveryError::Unavailable);
|
||||
}
|
||||
|
||||
let desired_requests_count =
|
||||
self.get_desired_request_count(state.chunk_count(), common_params.threshold);
|
||||
let already_requesting_count = self.requesting_chunks.len();
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?common_params.candidate_hash,
|
||||
?desired_requests_count,
|
||||
error_count= ?self.error_count,
|
||||
total_received = ?self.total_received_responses,
|
||||
threshold = ?common_params.threshold,
|
||||
?already_requesting_count,
|
||||
"Requesting availability chunks for a candidate",
|
||||
);
|
||||
|
||||
let strategy_type = RecoveryStrategy::<Sender>::strategy_type(&*self);
|
||||
|
||||
state
|
||||
.launch_parallel_chunk_requests(
|
||||
strategy_type,
|
||||
common_params,
|
||||
sender,
|
||||
desired_requests_count,
|
||||
&mut validators_queue,
|
||||
&mut self.requesting_chunks,
|
||||
)
|
||||
.await;
|
||||
|
||||
let (total_responses, error_count) = state
|
||||
.wait_for_chunks(
|
||||
strategy_type,
|
||||
common_params,
|
||||
REGULAR_CHUNKS_REQ_RETRY_LIMIT,
|
||||
&mut validators_queue,
|
||||
&mut self.requesting_chunks,
|
||||
&mut vec![],
|
||||
|unrequested_validators,
|
||||
in_flight_reqs,
|
||||
chunk_count,
|
||||
_systematic_chunk_count| {
|
||||
chunk_count >= common_params.threshold ||
|
||||
Self::is_unavailable(
|
||||
unrequested_validators,
|
||||
in_flight_reqs,
|
||||
chunk_count,
|
||||
common_params.threshold,
|
||||
)
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
self.total_received_responses += total_responses;
|
||||
self.error_count += error_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pezkuwi_erasure_coding::recovery_threshold;
|
||||
|
||||
#[test]
|
||||
fn test_get_desired_request_count() {
|
||||
let n_validators = 100;
|
||||
let threshold = recovery_threshold(n_validators).unwrap();
|
||||
|
||||
let mut fetch_chunks_task = FetchChunks::new(FetchChunksParams { n_validators });
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(0, threshold), threshold);
|
||||
fetch_chunks_task.error_count = 1;
|
||||
fetch_chunks_task.total_received_responses = 1;
|
||||
// We saturate at threshold (34):
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(0, threshold), threshold);
|
||||
|
||||
// We saturate at the parallel limit.
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(0, N_PARALLEL + 2), N_PARALLEL);
|
||||
|
||||
fetch_chunks_task.total_received_responses = 2;
|
||||
// With given error rate - still saturating:
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(1, threshold), threshold);
|
||||
fetch_chunks_task.total_received_responses = 10;
|
||||
// error rate: 1/10
|
||||
// remaining chunks needed: threshold (34) - 9
|
||||
// expected: 24 * (1+ 1/10) = (next greater integer) = 27
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(9, threshold), 27);
|
||||
// We saturate at the parallel limit.
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(9, N_PARALLEL + 9), N_PARALLEL);
|
||||
|
||||
fetch_chunks_task.error_count = 0;
|
||||
// With error count zero - we should fetch exactly as needed:
|
||||
assert_eq!(fetch_chunks_task.get_desired_request_count(10, threshold), threshold - 10);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,174 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
task::{RecoveryParams, RecoveryStrategy, State},
|
||||
ErasureTask, PostRecoveryCheck, LOG_TARGET,
|
||||
};
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::{
|
||||
self as req_res, outgoing::RequestError, OutgoingRequest, Recipient, Requests,
|
||||
};
|
||||
use pezkuwi_node_primitives::AvailableData;
|
||||
use pezkuwi_node_subsystem::{messages::NetworkBridgeTxMessage, overseer, RecoveryError};
|
||||
use pezkuwi_primitives::ValidatorIndex;
|
||||
use sc_network::{IfDisconnected, OutboundFailure, RequestFailure};
|
||||
|
||||
use futures::{channel::oneshot, SinkExt};
|
||||
use rand::seq::SliceRandom;
|
||||
|
||||
/// Parameters specific to the `FetchFull` strategy.
|
||||
pub struct FetchFullParams {
|
||||
/// Validators that will be used for fetching the data.
|
||||
pub validators: Vec<ValidatorIndex>,
|
||||
}
|
||||
|
||||
/// `RecoveryStrategy` that sequentially tries to fetch the full `AvailableData` from
|
||||
/// already-connected validators in the configured validator set.
|
||||
pub struct FetchFull {
|
||||
params: FetchFullParams,
|
||||
}
|
||||
|
||||
impl FetchFull {
|
||||
/// Create a new `FetchFull` recovery strategy.
|
||||
pub fn new(mut params: FetchFullParams) -> Self {
|
||||
params.validators.shuffle(&mut rand::thread_rng());
|
||||
Self { params }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender> for FetchFull {
|
||||
fn display_name(&self) -> &'static str {
|
||||
"Full recovery from backers"
|
||||
}
|
||||
|
||||
fn strategy_type(&self) -> &'static str {
|
||||
"full_from_backers"
|
||||
}
|
||||
|
||||
async fn run(
|
||||
mut self: Box<Self>,
|
||||
_: &mut State,
|
||||
sender: &mut Sender,
|
||||
common_params: &RecoveryParams,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
let strategy_type = RecoveryStrategy::<Sender>::strategy_type(&*self);
|
||||
|
||||
loop {
|
||||
// Pop the next validator.
|
||||
let validator_index =
|
||||
self.params.validators.pop().ok_or_else(|| RecoveryError::Unavailable)?;
|
||||
|
||||
// Request data.
|
||||
let (req, response) = OutgoingRequest::new(
|
||||
Recipient::Authority(
|
||||
common_params.validator_authority_keys[validator_index.0 as usize].clone(),
|
||||
),
|
||||
req_res::v1::AvailableDataFetchingRequest {
|
||||
candidate_hash: common_params.candidate_hash,
|
||||
},
|
||||
);
|
||||
|
||||
sender
|
||||
.send_message(NetworkBridgeTxMessage::SendRequests(
|
||||
vec![Requests::AvailableDataFetchingV1(req)],
|
||||
IfDisconnected::ImmediateError,
|
||||
))
|
||||
.await;
|
||||
|
||||
common_params.metrics.on_full_request_issued();
|
||||
|
||||
match response.await {
|
||||
Ok(req_res::v1::AvailableDataFetchingResponse::AvailableData(data)) => {
|
||||
let recovery_duration =
|
||||
common_params.metrics.time_erasure_recovery(strategy_type);
|
||||
let maybe_data = match common_params.post_recovery_check {
|
||||
PostRecoveryCheck::Reencode => {
|
||||
let (reencode_tx, reencode_rx) = oneshot::channel();
|
||||
let mut erasure_task_tx = common_params.erasure_task_tx.clone();
|
||||
|
||||
erasure_task_tx
|
||||
.send(ErasureTask::Reencode(
|
||||
common_params.n_validators,
|
||||
common_params.erasure_root,
|
||||
data,
|
||||
reencode_tx,
|
||||
))
|
||||
.await
|
||||
.map_err(|_| RecoveryError::ChannelClosed)?;
|
||||
|
||||
reencode_rx.await.map_err(|_| RecoveryError::ChannelClosed)?
|
||||
},
|
||||
PostRecoveryCheck::PovHash =>
|
||||
(data.pov.hash() == common_params.pov_hash).then_some(data),
|
||||
};
|
||||
|
||||
match maybe_data {
|
||||
Some(data) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
"Received full data",
|
||||
);
|
||||
|
||||
common_params.metrics.on_full_request_succeeded();
|
||||
return Ok(data);
|
||||
},
|
||||
None => {
|
||||
common_params.metrics.on_full_request_invalid();
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
?validator_index,
|
||||
"Invalid data response",
|
||||
);
|
||||
|
||||
// it doesn't help to report the peer with req/res.
|
||||
// we'll try the next backer.
|
||||
},
|
||||
}
|
||||
},
|
||||
Ok(req_res::v1::AvailableDataFetchingResponse::NoSuchData) => {
|
||||
common_params.metrics.on_full_request_no_such_data();
|
||||
},
|
||||
Err(e) => {
|
||||
match &e {
|
||||
RequestError::Canceled(_) => common_params.metrics.on_full_request_error(),
|
||||
RequestError::InvalidResponse(_) =>
|
||||
common_params.metrics.on_full_request_invalid(),
|
||||
RequestError::NetworkError(req_failure) => {
|
||||
if let RequestFailure::Network(OutboundFailure::Timeout) = req_failure {
|
||||
common_params.metrics.on_full_request_timeout();
|
||||
} else {
|
||||
common_params.metrics.on_full_request_error();
|
||||
}
|
||||
},
|
||||
};
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
?validator_index,
|
||||
err = ?e,
|
||||
"Error fetching full available data."
|
||||
);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,341 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
futures_undead::FuturesUndead,
|
||||
task::{
|
||||
strategy::{
|
||||
do_post_recovery_check, is_unavailable, OngoingRequests, N_PARALLEL,
|
||||
SYSTEMATIC_CHUNKS_REQ_RETRY_LIMIT,
|
||||
},
|
||||
RecoveryParams, RecoveryStrategy, State,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
|
||||
use pezkuwi_node_primitives::AvailableData;
|
||||
use pezkuwi_node_subsystem::{overseer, RecoveryError};
|
||||
use pezkuwi_primitives::{ChunkIndex, ValidatorIndex};
|
||||
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// Parameters needed for fetching systematic chunks.
|
||||
pub struct FetchSystematicChunksParams {
|
||||
/// Validators that hold the systematic chunks.
|
||||
pub validators: Vec<(ChunkIndex, ValidatorIndex)>,
|
||||
/// Validators in the backing group, to be used as a backup for requesting systematic chunks.
|
||||
pub backers: Vec<ValidatorIndex>,
|
||||
}
|
||||
|
||||
/// `RecoveryStrategy` that attempts to recover the systematic chunks from the validators that
|
||||
/// hold them, in order to bypass the erasure code reconstruction step, which is costly.
|
||||
pub struct FetchSystematicChunks {
|
||||
/// Systematic recovery threshold.
|
||||
threshold: usize,
|
||||
/// Validators that hold the systematic chunks.
|
||||
validators: Vec<(ChunkIndex, ValidatorIndex)>,
|
||||
/// Backers to be used as a backup.
|
||||
backers: Vec<ValidatorIndex>,
|
||||
/// Collection of in-flight requests.
|
||||
requesting_chunks: OngoingRequests,
|
||||
}
|
||||
|
||||
impl FetchSystematicChunks {
|
||||
/// Instantiate a new systematic chunks strategy.
|
||||
pub fn new(params: FetchSystematicChunksParams) -> Self {
|
||||
Self {
|
||||
threshold: params.validators.len(),
|
||||
validators: params.validators,
|
||||
backers: params.backers,
|
||||
requesting_chunks: FuturesUndead::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_unavailable(
|
||||
unrequested_validators: usize,
|
||||
in_flight_requests: usize,
|
||||
systematic_chunk_count: usize,
|
||||
threshold: usize,
|
||||
) -> bool {
|
||||
is_unavailable(
|
||||
systematic_chunk_count,
|
||||
in_flight_requests,
|
||||
unrequested_validators,
|
||||
threshold,
|
||||
)
|
||||
}
|
||||
|
||||
/// Desired number of parallel requests.
|
||||
///
|
||||
/// For the given threshold (total required number of chunks) get the desired number of
|
||||
/// requests we want to have running in parallel at this time.
|
||||
fn get_desired_request_count(&self, chunk_count: usize, threshold: usize) -> usize {
|
||||
// Upper bound for parallel requests.
|
||||
let max_requests_boundary = std::cmp::min(N_PARALLEL, threshold);
|
||||
// How many chunks are still needed?
|
||||
let remaining_chunks = threshold.saturating_sub(chunk_count);
|
||||
// Actual number of requests we want to have in flight in parallel:
|
||||
// We don't have to make up for any error rate, as an error fetching a systematic chunk
|
||||
// results in failure of the entire strategy.
|
||||
std::cmp::min(max_requests_boundary, remaining_chunks)
|
||||
}
|
||||
|
||||
async fn attempt_systematic_recovery<Sender: overseer::AvailabilityRecoverySenderTrait>(
|
||||
&mut self,
|
||||
state: &mut State,
|
||||
common_params: &RecoveryParams,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
let strategy_type = RecoveryStrategy::<Sender>::strategy_type(self);
|
||||
let recovery_duration = common_params.metrics.time_erasure_recovery(strategy_type);
|
||||
let reconstruct_duration = common_params.metrics.time_erasure_reconstruct(strategy_type);
|
||||
let chunks = state
|
||||
.received_chunks
|
||||
.range(
|
||||
ChunkIndex(0)..
|
||||
ChunkIndex(
|
||||
u32::try_from(self.threshold)
|
||||
.expect("validator count should not exceed u32"),
|
||||
),
|
||||
)
|
||||
.map(|(_, chunk)| chunk.chunk.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let available_data = pezkuwi_erasure_coding::reconstruct_from_systematic_v1(
|
||||
common_params.n_validators,
|
||||
chunks,
|
||||
);
|
||||
|
||||
match available_data {
|
||||
Ok(data) => {
|
||||
drop(reconstruct_duration);
|
||||
|
||||
// Attempt post-recovery check.
|
||||
do_post_recovery_check(common_params, data)
|
||||
.await
|
||||
.inspect_err(|_| {
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
})
|
||||
.inspect(|_| {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
"Data recovery from systematic chunks complete",
|
||||
);
|
||||
})
|
||||
},
|
||||
Err(err) => {
|
||||
reconstruct_duration.map(|rd| rd.stop_and_discard());
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
?err,
|
||||
"Systematic data recovery error",
|
||||
);
|
||||
|
||||
Err(RecoveryError::Invalid)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender>
|
||||
for FetchSystematicChunks
|
||||
{
|
||||
fn display_name(&self) -> &'static str {
|
||||
"Fetch systematic chunks"
|
||||
}
|
||||
|
||||
fn strategy_type(&self) -> &'static str {
|
||||
"systematic_chunks"
|
||||
}
|
||||
|
||||
async fn run(
|
||||
mut self: Box<Self>,
|
||||
state: &mut State,
|
||||
sender: &mut Sender,
|
||||
common_params: &RecoveryParams,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
// First query the store for any chunks we've got.
|
||||
if !common_params.bypass_availability_store {
|
||||
let local_chunk_indices = state.populate_from_av_store(common_params, sender).await;
|
||||
|
||||
for (_, our_c_index) in &local_chunk_indices {
|
||||
// If we are among the systematic validators but hold an invalid chunk, we cannot
|
||||
// perform the systematic recovery. Fall through to the next strategy.
|
||||
if self.validators.iter().any(|(c_index, _)| c_index == our_c_index) &&
|
||||
!state.received_chunks.contains_key(our_c_index)
|
||||
{
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
requesting = %self.requesting_chunks.len(),
|
||||
total_requesting = %self.requesting_chunks.total_len(),
|
||||
n_validators = %common_params.n_validators,
|
||||
chunk_index = ?our_c_index,
|
||||
"Systematic chunk recovery is not possible. We are among the systematic validators but hold an invalid chunk",
|
||||
);
|
||||
return Err(RecoveryError::Unavailable);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No need to query the validators that have the chunks we already received or that we know
|
||||
// don't have the data from previous strategies.
|
||||
self.validators.retain(|(c_index, v_index)| {
|
||||
!state.received_chunks.contains_key(c_index) &&
|
||||
state.can_retry_request(
|
||||
&(common_params.validator_authority_keys[v_index.0 as usize].clone(), *v_index),
|
||||
SYSTEMATIC_CHUNKS_REQ_RETRY_LIMIT,
|
||||
)
|
||||
});
|
||||
|
||||
let mut systematic_chunk_count = state
|
||||
.received_chunks
|
||||
.range(ChunkIndex(0)..ChunkIndex(self.threshold as u32))
|
||||
.count();
|
||||
|
||||
// Safe to `take` here, as we're consuming `self` anyway and we're not using the
|
||||
// `validators` or `backers` fields in other methods.
|
||||
let mut validators_queue: VecDeque<_> = std::mem::take(&mut self.validators)
|
||||
.into_iter()
|
||||
.map(|(_, validator_index)| {
|
||||
(
|
||||
common_params.validator_authority_keys[validator_index.0 as usize].clone(),
|
||||
validator_index,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
let mut backers: Vec<_> = std::mem::take(&mut self.backers)
|
||||
.into_iter()
|
||||
.map(|validator_index| {
|
||||
common_params.validator_authority_keys[validator_index.0 as usize].clone()
|
||||
})
|
||||
.collect();
|
||||
|
||||
loop {
|
||||
// If received_chunks has `systematic_chunk_threshold` entries, attempt to recover the
|
||||
// data.
|
||||
if systematic_chunk_count >= self.threshold {
|
||||
return self.attempt_systematic_recovery::<Sender>(state, common_params).await;
|
||||
}
|
||||
|
||||
if Self::is_unavailable(
|
||||
validators_queue.len(),
|
||||
self.requesting_chunks.total_len(),
|
||||
systematic_chunk_count,
|
||||
self.threshold,
|
||||
) {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?common_params.candidate_hash,
|
||||
erasure_root = ?common_params.erasure_root,
|
||||
%systematic_chunk_count,
|
||||
requesting = %self.requesting_chunks.len(),
|
||||
total_requesting = %self.requesting_chunks.total_len(),
|
||||
n_validators = %common_params.n_validators,
|
||||
systematic_threshold = ?self.threshold,
|
||||
"Data recovery from systematic chunks is not possible",
|
||||
);
|
||||
|
||||
return Err(RecoveryError::Unavailable);
|
||||
}
|
||||
|
||||
let desired_requests_count =
|
||||
self.get_desired_request_count(systematic_chunk_count, self.threshold);
|
||||
let already_requesting_count = self.requesting_chunks.len();
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?common_params.candidate_hash,
|
||||
?desired_requests_count,
|
||||
total_received = ?systematic_chunk_count,
|
||||
systematic_threshold = ?self.threshold,
|
||||
?already_requesting_count,
|
||||
"Requesting systematic availability chunks for a candidate",
|
||||
);
|
||||
|
||||
let strategy_type = RecoveryStrategy::<Sender>::strategy_type(&*self);
|
||||
|
||||
state
|
||||
.launch_parallel_chunk_requests(
|
||||
strategy_type,
|
||||
common_params,
|
||||
sender,
|
||||
desired_requests_count,
|
||||
&mut validators_queue,
|
||||
&mut self.requesting_chunks,
|
||||
)
|
||||
.await;
|
||||
|
||||
let _ = state
|
||||
.wait_for_chunks(
|
||||
strategy_type,
|
||||
common_params,
|
||||
SYSTEMATIC_CHUNKS_REQ_RETRY_LIMIT,
|
||||
&mut validators_queue,
|
||||
&mut self.requesting_chunks,
|
||||
&mut backers,
|
||||
|unrequested_validators,
|
||||
in_flight_reqs,
|
||||
// Don't use this chunk count, as it may contain non-systematic chunks.
|
||||
_chunk_count,
|
||||
new_systematic_chunk_count| {
|
||||
systematic_chunk_count = new_systematic_chunk_count;
|
||||
|
||||
let is_unavailable = Self::is_unavailable(
|
||||
unrequested_validators,
|
||||
in_flight_reqs,
|
||||
systematic_chunk_count,
|
||||
self.threshold,
|
||||
);
|
||||
|
||||
systematic_chunk_count >= self.threshold || is_unavailable
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pezkuwi_erasure_coding::systematic_recovery_threshold;
|
||||
|
||||
#[test]
|
||||
fn test_get_desired_request_count() {
|
||||
let num_validators = 100;
|
||||
let threshold = systematic_recovery_threshold(num_validators).unwrap();
|
||||
|
||||
let systematic_chunks_task = FetchSystematicChunks::new(FetchSystematicChunksParams {
|
||||
validators: vec![(1.into(), 1.into()); num_validators],
|
||||
backers: vec![],
|
||||
});
|
||||
assert_eq!(systematic_chunks_task.get_desired_request_count(0, threshold), threshold);
|
||||
assert_eq!(systematic_chunks_task.get_desired_request_count(5, threshold), threshold - 5);
|
||||
assert_eq!(
|
||||
systematic_chunks_task.get_desired_request_count(num_validators * 2, threshold),
|
||||
0
|
||||
);
|
||||
assert_eq!(systematic_chunks_task.get_desired_request_count(0, N_PARALLEL * 2), N_PARALLEL);
|
||||
assert_eq!(systematic_chunks_task.get_desired_request_count(N_PARALLEL, N_PARALLEL + 2), 2);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,47 @@
|
||||
[package]
|
||||
name = "pezkuwi-availability-bitfield-distribution"
|
||||
version = "7.0.0"
|
||||
description = "Pezkuwi Bitfiled Distribution subsystem, which gossips signed availability bitfields used to compactly determine which backed candidates are available or not based on a 2/3+ quorum."
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
futures = { workspace = true }
|
||||
futures-timer = { workspace = true }
|
||||
gum = { workspace = true, default-features = true }
|
||||
pezkuwi-node-network-protocol = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem-util = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives = { workspace = true, default-features = true }
|
||||
rand = { workspace = true, default-features = true }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = { workspace = true }
|
||||
bitvec = { features = ["alloc"], workspace = true }
|
||||
maplit = { workspace = true }
|
||||
pezkuwi-node-subsystem-test-helpers = { workspace = true }
|
||||
rand_chacha = { workspace = true, default-features = true }
|
||||
sp-application-crypto = { workspace = true, default-features = true }
|
||||
sp-authority-discovery = { workspace = true, default-features = true }
|
||||
sp-core = { workspace = true, default-features = true }
|
||||
sp-keyring = { workspace = true, default-features = true }
|
||||
sp-keystore = { workspace = true, default-features = true }
|
||||
sp-tracing = { workspace = true }
|
||||
|
||||
[features]
|
||||
runtime-benchmarks = [
|
||||
"gum/runtime-benchmarks",
|
||||
"pezkuwi-node-network-protocol/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-util/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem/runtime-benchmarks",
|
||||
"pezkuwi-primitives/runtime-benchmarks",
|
||||
"sp-authority-discovery/runtime-benchmarks",
|
||||
"sp-keyring/runtime-benchmarks",
|
||||
]
|
||||
@@ -0,0 +1,919 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! The bitfield distribution
|
||||
//!
|
||||
//! In case this node is a validator, gossips its own signed availability bitfield
|
||||
//! for a particular relay parent.
|
||||
//! Independently of that, gossips on received messages from peers to other interested peers.
|
||||
|
||||
#![deny(unused_crate_dependencies)]
|
||||
|
||||
use futures::{channel::oneshot, FutureExt};
|
||||
|
||||
use net_protocol::filter_by_peer_version;
|
||||
use pezkuwi_node_network_protocol::{
|
||||
self as net_protocol,
|
||||
grid_topology::{
|
||||
GridNeighbors, RandomRouting, RequiredRouting, SessionBoundGridTopologyStorage,
|
||||
},
|
||||
peer_set::{ProtocolVersion, ValidationVersion},
|
||||
v3 as protocol_v3, OurView, PeerId, UnifiedReputationChange as Rep, ValidationProtocols, View,
|
||||
};
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::*, overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem,
|
||||
SubsystemError, SubsystemResult,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util::{
|
||||
self as util,
|
||||
reputation::{ReputationAggregator, REPUTATION_CHANGE_INTERVAL},
|
||||
};
|
||||
|
||||
use futures::select;
|
||||
use pezkuwi_primitives::{Hash, SignedAvailabilityBitfield, SigningContext, ValidatorId};
|
||||
use rand::{CryptoRng, Rng, SeedableRng};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use self::metrics::Metrics;
|
||||
|
||||
mod metrics;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
const COST_SIGNATURE_INVALID: Rep = Rep::CostMajor("Bitfield signature invalid");
|
||||
const COST_VALIDATOR_INDEX_INVALID: Rep = Rep::CostMajor("Bitfield validator index invalid");
|
||||
const COST_MISSING_PEER_SESSION_KEY: Rep = Rep::CostMinor("Missing peer session key");
|
||||
const COST_NOT_IN_VIEW: Rep = Rep::CostMinor("Not interested in that parent hash");
|
||||
const COST_PEER_DUPLICATE_MESSAGE: Rep =
|
||||
Rep::CostMinorRepeated("Peer sent the same message multiple times");
|
||||
const BENEFIT_VALID_MESSAGE_FIRST: Rep =
|
||||
Rep::BenefitMinorFirst("Valid message with new information");
|
||||
const BENEFIT_VALID_MESSAGE: Rep = Rep::BenefitMinor("Valid message");
|
||||
|
||||
/// Checked signed availability bitfield that is distributed
|
||||
/// to other peers.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct BitfieldGossipMessage {
|
||||
/// The relay parent this message is relative to.
|
||||
relay_parent: Hash,
|
||||
/// The actual signed availability bitfield.
|
||||
signed_availability: SignedAvailabilityBitfield,
|
||||
}
|
||||
|
||||
impl BitfieldGossipMessage {
|
||||
fn into_validation_protocol(
|
||||
self,
|
||||
recipient_version: ProtocolVersion,
|
||||
) -> net_protocol::VersionedValidationProtocol {
|
||||
self.into_network_message(recipient_version).into()
|
||||
}
|
||||
|
||||
fn into_network_message(
|
||||
self,
|
||||
recipient_version: ProtocolVersion,
|
||||
) -> net_protocol::BitfieldDistributionMessage {
|
||||
match ValidationVersion::try_from(recipient_version).ok() {
|
||||
Some(ValidationVersion::V3) =>
|
||||
ValidationProtocols::V3(protocol_v3::BitfieldDistributionMessage::Bitfield(
|
||||
self.relay_parent,
|
||||
self.signed_availability.into(),
|
||||
)),
|
||||
None => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
version = ?recipient_version,
|
||||
"Unknown protocol version provided for message recipient"
|
||||
);
|
||||
|
||||
// fall back to v3 to avoid
|
||||
ValidationProtocols::V3(protocol_v3::BitfieldDistributionMessage::Bitfield(
|
||||
self.relay_parent,
|
||||
self.signed_availability.into(),
|
||||
))
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Data stored on a per-peer basis.
|
||||
#[derive(Debug)]
|
||||
pub struct PeerData {
|
||||
/// The peer's view.
|
||||
view: View,
|
||||
/// The peer's protocol version.
|
||||
version: ProtocolVersion,
|
||||
}
|
||||
|
||||
/// Data used to track information of peers and relay parents the
|
||||
/// overseer ordered us to work on.
|
||||
#[derive(Default)]
|
||||
struct ProtocolState {
|
||||
/// Track all active peer views and protocol versions
|
||||
/// to determine what is relevant to them.
|
||||
peer_data: HashMap<PeerId, PeerData>,
|
||||
|
||||
/// The current and previous gossip topologies
|
||||
topologies: SessionBoundGridTopologyStorage,
|
||||
|
||||
/// Our current view.
|
||||
view: OurView,
|
||||
|
||||
/// Additional data particular to a relay parent.
|
||||
per_relay_parent: HashMap<Hash, PerRelayParentData>,
|
||||
|
||||
/// Aggregated reputation change
|
||||
reputation: ReputationAggregator,
|
||||
}
|
||||
|
||||
/// Data for a particular relay parent.
|
||||
#[derive(Debug)]
|
||||
struct PerRelayParentData {
|
||||
/// Signing context for a particular relay parent.
|
||||
signing_context: SigningContext,
|
||||
|
||||
/// Set of validators for a particular relay parent.
|
||||
validator_set: Vec<ValidatorId>,
|
||||
|
||||
/// Set of validators for a particular relay parent for which we
|
||||
/// received a valid `BitfieldGossipMessage`.
|
||||
/// Also serves as the list of known messages for peers connecting
|
||||
/// after bitfield gossips were already received.
|
||||
one_per_validator: HashMap<ValidatorId, BitfieldGossipMessage>,
|
||||
|
||||
/// Avoid duplicate message transmission to our peers.
|
||||
message_sent_to_peer: HashMap<PeerId, HashSet<ValidatorId>>,
|
||||
|
||||
/// Track messages that were already received by a peer
|
||||
/// to prevent flooding.
|
||||
message_received_from_peer: HashMap<PeerId, HashSet<ValidatorId>>,
|
||||
}
|
||||
|
||||
impl PerRelayParentData {
|
||||
/// Create a new instance.
|
||||
fn new(signing_context: SigningContext, validator_set: Vec<ValidatorId>) -> Self {
|
||||
Self {
|
||||
signing_context,
|
||||
validator_set,
|
||||
one_per_validator: Default::default(),
|
||||
message_sent_to_peer: Default::default(),
|
||||
message_received_from_peer: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines if that particular message signed by a
|
||||
/// validator is needed by the given peer.
|
||||
fn message_from_validator_needed_by_peer(
|
||||
&self,
|
||||
peer: &PeerId,
|
||||
signed_by: &ValidatorId,
|
||||
) -> bool {
|
||||
self.message_sent_to_peer
|
||||
.get(peer)
|
||||
.map(|pubkeys| !pubkeys.contains(signed_by))
|
||||
.unwrap_or(true) &&
|
||||
self.message_received_from_peer
|
||||
.get(peer)
|
||||
.map(|pubkeys| !pubkeys.contains(signed_by))
|
||||
.unwrap_or(true)
|
||||
}
|
||||
}
|
||||
|
||||
const LOG_TARGET: &str = "teyrchain::bitfield-distribution";
|
||||
|
||||
/// The bitfield distribution subsystem.
|
||||
pub struct BitfieldDistribution {
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(BitfieldDistribution, prefix = self::overseer)]
|
||||
impl BitfieldDistribution {
|
||||
/// Create a new instance of the `BitfieldDistribution` subsystem.
|
||||
pub fn new(metrics: Metrics) -> Self {
|
||||
Self { metrics }
|
||||
}
|
||||
|
||||
/// Start processing work as passed on from the Overseer.
|
||||
async fn run<Context>(self, ctx: Context) {
|
||||
let mut state = ProtocolState::default();
|
||||
let mut rng = rand::rngs::StdRng::from_entropy();
|
||||
self.run_inner(ctx, &mut state, REPUTATION_CHANGE_INTERVAL, &mut rng).await
|
||||
}
|
||||
|
||||
async fn run_inner<Context>(
|
||||
self,
|
||||
mut ctx: Context,
|
||||
state: &mut ProtocolState,
|
||||
reputation_interval: Duration,
|
||||
rng: &mut (impl CryptoRng + Rng),
|
||||
) {
|
||||
// work: process incoming messages from the overseer and process accordingly.
|
||||
|
||||
let new_reputation_delay = || futures_timer::Delay::new(reputation_interval).fuse();
|
||||
let mut reputation_delay = new_reputation_delay();
|
||||
|
||||
loop {
|
||||
select! {
|
||||
_ = reputation_delay => {
|
||||
state.reputation.send(ctx.sender()).await;
|
||||
reputation_delay = new_reputation_delay();
|
||||
},
|
||||
message = ctx.recv().fuse() => {
|
||||
let message = match message {
|
||||
Ok(message) => message,
|
||||
Err(err) => {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
?err,
|
||||
"Failed to receive a message from Overseer, exiting"
|
||||
);
|
||||
return
|
||||
},
|
||||
};
|
||||
match message {
|
||||
FromOrchestra::Communication {
|
||||
msg:
|
||||
BitfieldDistributionMessage::DistributeBitfield(
|
||||
relay_parent,
|
||||
signed_availability,
|
||||
),
|
||||
} => {
|
||||
gum::trace!(target: LOG_TARGET, ?relay_parent, "Processing DistributeBitfield");
|
||||
handle_bitfield_distribution(
|
||||
&mut ctx,
|
||||
state,
|
||||
&self.metrics,
|
||||
relay_parent,
|
||||
signed_availability,
|
||||
rng,
|
||||
)
|
||||
.await;
|
||||
},
|
||||
FromOrchestra::Communication {
|
||||
msg: BitfieldDistributionMessage::NetworkBridgeUpdate(event),
|
||||
} => {
|
||||
gum::trace!(target: LOG_TARGET, "Processing NetworkMessage");
|
||||
// a network message was received
|
||||
handle_network_msg(&mut ctx, state, &self.metrics, event, rng).await;
|
||||
},
|
||||
FromOrchestra::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
|
||||
activated,
|
||||
..
|
||||
})) => {
|
||||
let _timer = self.metrics.time_active_leaves_update();
|
||||
|
||||
if let Some(activated) = activated {
|
||||
let relay_parent = activated.hash;
|
||||
|
||||
gum::trace!(target: LOG_TARGET, ?relay_parent, "activated");
|
||||
|
||||
// query validator set and signing context per relay_parent once only
|
||||
match query_basics(&mut ctx, relay_parent).await {
|
||||
Ok(Some((validator_set, signing_context))) => {
|
||||
// If our runtime API fails, we don't take down the node,
|
||||
// but we might alter peers' reputations erroneously as a result
|
||||
// of not having the correct bookkeeping. If we have lost a race
|
||||
// with state pruning, it is unlikely that peers will be sending
|
||||
// us anything to do with this relay-parent anyway.
|
||||
let _ = state.per_relay_parent.insert(
|
||||
relay_parent,
|
||||
PerRelayParentData::new(signing_context, validator_set),
|
||||
);
|
||||
},
|
||||
Err(err) => {
|
||||
gum::warn!(target: LOG_TARGET, ?err, "query_basics has failed");
|
||||
},
|
||||
_ => {},
|
||||
}
|
||||
}
|
||||
},
|
||||
FromOrchestra::Signal(OverseerSignal::BlockFinalized(hash, number)) => {
|
||||
gum::trace!(target: LOG_TARGET, ?hash, %number, "block finalized");
|
||||
},
|
||||
FromOrchestra::Signal(OverseerSignal::Conclude) => {
|
||||
gum::info!(target: LOG_TARGET, "Conclude");
|
||||
return
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Modify the reputation of a peer based on its behavior.
|
||||
async fn modify_reputation(
|
||||
reputation: &mut ReputationAggregator,
|
||||
sender: &mut impl overseer::BitfieldDistributionSenderTrait,
|
||||
relay_parent: Hash,
|
||||
peer: PeerId,
|
||||
rep: Rep,
|
||||
) {
|
||||
gum::trace!(target: LOG_TARGET, ?relay_parent, ?rep, %peer, "reputation change");
|
||||
|
||||
reputation.modify(sender, peer, rep).await;
|
||||
}
|
||||
/// Distribute a given valid and signature checked bitfield message.
|
||||
///
|
||||
/// For this variant the source is this node.
|
||||
#[overseer::contextbounds(BitfieldDistribution, prefix=self::overseer)]
|
||||
async fn handle_bitfield_distribution<Context>(
|
||||
ctx: &mut Context,
|
||||
state: &mut ProtocolState,
|
||||
metrics: &Metrics,
|
||||
relay_parent: Hash,
|
||||
signed_availability: SignedAvailabilityBitfield,
|
||||
rng: &mut (impl CryptoRng + Rng),
|
||||
) {
|
||||
let _timer = metrics.time_handle_bitfield_distribution();
|
||||
|
||||
// Ignore anything the overseer did not tell this subsystem to work on
|
||||
let mut job_data = state.per_relay_parent.get_mut(&relay_parent);
|
||||
let job_data: &mut _ = if let Some(ref mut job_data) = job_data {
|
||||
job_data
|
||||
} else {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?relay_parent,
|
||||
"Not supposed to work on relay parent related data",
|
||||
);
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
let session_idx = job_data.signing_context.session_index;
|
||||
let validator_set = &job_data.validator_set;
|
||||
if validator_set.is_empty() {
|
||||
gum::debug!(target: LOG_TARGET, ?relay_parent, "validator set is empty");
|
||||
return;
|
||||
}
|
||||
|
||||
let validator_index = signed_availability.validator_index();
|
||||
let validator = if let Some(validator) = validator_set.get(validator_index.0 as usize) {
|
||||
validator.clone()
|
||||
} else {
|
||||
gum::debug!(target: LOG_TARGET, validator_index = ?validator_index.0, "Could not find a validator for index");
|
||||
return;
|
||||
};
|
||||
|
||||
let msg = BitfieldGossipMessage { relay_parent, signed_availability };
|
||||
let topology = state.topologies.get_topology_or_fallback(session_idx).local_grid_neighbors();
|
||||
let required_routing = topology.required_routing_by_index(validator_index, true);
|
||||
|
||||
relay_message(
|
||||
ctx,
|
||||
job_data,
|
||||
topology,
|
||||
&mut state.peer_data,
|
||||
validator,
|
||||
msg,
|
||||
required_routing,
|
||||
rng,
|
||||
)
|
||||
.await;
|
||||
|
||||
metrics.on_own_bitfield_sent();
|
||||
}
|
||||
|
||||
/// Distribute a given valid and signature checked bitfield message.
|
||||
///
|
||||
/// Can be originated by another subsystem or received via network from another peer.
|
||||
#[overseer::contextbounds(BitfieldDistribution, prefix=self::overseer)]
|
||||
async fn relay_message<Context>(
|
||||
ctx: &mut Context,
|
||||
job_data: &mut PerRelayParentData,
|
||||
topology_neighbors: &GridNeighbors,
|
||||
peers: &mut HashMap<PeerId, PeerData>,
|
||||
validator: ValidatorId,
|
||||
message: BitfieldGossipMessage,
|
||||
required_routing: RequiredRouting,
|
||||
rng: &mut (impl CryptoRng + Rng),
|
||||
) {
|
||||
let relay_parent = message.relay_parent;
|
||||
|
||||
// notify the overseer about a new and valid signed bitfield
|
||||
ctx.send_message(ProvisionerMessage::ProvisionableData(
|
||||
relay_parent,
|
||||
ProvisionableData::Bitfield(relay_parent, message.signed_availability.clone()),
|
||||
))
|
||||
.await;
|
||||
|
||||
let total_peers = peers.len();
|
||||
let mut random_routing: RandomRouting = Default::default();
|
||||
|
||||
// pass on the bitfield distribution to all interested peers
|
||||
let interested_peers = peers
|
||||
.iter()
|
||||
.filter_map(|(peer, data)| {
|
||||
// check interest in the peer in this message's relay parent
|
||||
if data.view.contains(&message.relay_parent) {
|
||||
let message_needed =
|
||||
job_data.message_from_validator_needed_by_peer(&peer, &validator);
|
||||
if message_needed {
|
||||
let in_topology = topology_neighbors.route_to_peer(required_routing, &peer);
|
||||
let need_routing = in_topology || {
|
||||
let route_random = random_routing.sample(total_peers, rng);
|
||||
if route_random {
|
||||
random_routing.inc_sent();
|
||||
}
|
||||
|
||||
route_random
|
||||
};
|
||||
|
||||
if need_routing {
|
||||
Some((*peer, data.version))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<(PeerId, ProtocolVersion)>>();
|
||||
|
||||
interested_peers.iter().for_each(|(peer, _)| {
|
||||
// track the message as sent for this peer
|
||||
job_data
|
||||
.message_sent_to_peer
|
||||
.entry(*peer)
|
||||
.or_default()
|
||||
.insert(validator.clone());
|
||||
});
|
||||
|
||||
if interested_peers.is_empty() {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?relay_parent,
|
||||
"no peers are interested in gossip for relay parent",
|
||||
);
|
||||
} else {
|
||||
let v3_interested_peers =
|
||||
filter_by_peer_version(&interested_peers, ValidationVersion::V3.into());
|
||||
|
||||
if !v3_interested_peers.is_empty() {
|
||||
ctx.send_message(NetworkBridgeTxMessage::SendValidationMessage(
|
||||
v3_interested_peers,
|
||||
message.into_validation_protocol(ValidationVersion::V3.into()),
|
||||
))
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle an incoming message from a peer.
|
||||
#[overseer::contextbounds(BitfieldDistribution, prefix=self::overseer)]
|
||||
async fn process_incoming_peer_message<Context>(
|
||||
ctx: &mut Context,
|
||||
state: &mut ProtocolState,
|
||||
metrics: &Metrics,
|
||||
origin: PeerId,
|
||||
message: net_protocol::BitfieldDistributionMessage,
|
||||
rng: &mut (impl CryptoRng + Rng),
|
||||
) {
|
||||
let (relay_parent, bitfield) = match message {
|
||||
ValidationProtocols::V3(protocol_v3::BitfieldDistributionMessage::Bitfield(
|
||||
relay_parent,
|
||||
bitfield,
|
||||
)) => (relay_parent, bitfield),
|
||||
};
|
||||
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
peer = %origin,
|
||||
?relay_parent,
|
||||
"received bitfield gossip from peer"
|
||||
);
|
||||
// we don't care about this, not part of our view.
|
||||
if !state.view.contains(&relay_parent) {
|
||||
modify_reputation(
|
||||
&mut state.reputation,
|
||||
ctx.sender(),
|
||||
relay_parent,
|
||||
origin,
|
||||
COST_NOT_IN_VIEW,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ignore anything the overseer did not tell this subsystem to work on.
|
||||
let mut job_data = state.per_relay_parent.get_mut(&relay_parent);
|
||||
let job_data: &mut _ = if let Some(ref mut job_data) = job_data {
|
||||
job_data
|
||||
} else {
|
||||
modify_reputation(
|
||||
&mut state.reputation,
|
||||
ctx.sender(),
|
||||
relay_parent,
|
||||
origin,
|
||||
COST_NOT_IN_VIEW,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
};
|
||||
|
||||
let validator_index = bitfield.unchecked_validator_index();
|
||||
|
||||
let validator_set = &job_data.validator_set;
|
||||
if validator_set.is_empty() {
|
||||
gum::trace!(target: LOG_TARGET, ?relay_parent, ?origin, "Validator set is empty",);
|
||||
modify_reputation(
|
||||
&mut state.reputation,
|
||||
ctx.sender(),
|
||||
relay_parent,
|
||||
origin,
|
||||
COST_MISSING_PEER_SESSION_KEY,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
|
||||
// Use the (untrusted) validator index provided by the signed payload
|
||||
// and see if that one actually signed the availability bitset.
|
||||
let signing_context = job_data.signing_context.clone();
|
||||
let validator = if let Some(validator) = validator_set.get(validator_index.0 as usize) {
|
||||
validator.clone()
|
||||
} else {
|
||||
modify_reputation(
|
||||
&mut state.reputation,
|
||||
ctx.sender(),
|
||||
relay_parent,
|
||||
origin,
|
||||
COST_VALIDATOR_INDEX_INVALID,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
};
|
||||
|
||||
// Check if the peer already sent us a message for the validator denoted in the message earlier.
|
||||
// Must be done after validator index verification, in order to avoid storing an unbounded
|
||||
// number of set entries.
|
||||
let received_set = job_data.message_received_from_peer.entry(origin).or_default();
|
||||
|
||||
if !received_set.contains(&validator) {
|
||||
received_set.insert(validator.clone());
|
||||
} else {
|
||||
gum::trace!(target: LOG_TARGET, ?validator_index, ?origin, "Duplicate message");
|
||||
modify_reputation(
|
||||
&mut state.reputation,
|
||||
ctx.sender(),
|
||||
relay_parent,
|
||||
origin,
|
||||
COST_PEER_DUPLICATE_MESSAGE,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
};
|
||||
|
||||
let one_per_validator = &mut (job_data.one_per_validator);
|
||||
|
||||
// relay a message received from a validator at most _once_
|
||||
if let Some(old_message) = one_per_validator.get(&validator) {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?validator_index,
|
||||
"already received a message for validator",
|
||||
);
|
||||
if old_message.signed_availability.as_unchecked() == &bitfield {
|
||||
modify_reputation(
|
||||
&mut state.reputation,
|
||||
ctx.sender(),
|
||||
relay_parent,
|
||||
origin,
|
||||
BENEFIT_VALID_MESSAGE,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
return;
|
||||
}
|
||||
let signed_availability = match bitfield.try_into_checked(&signing_context, &validator) {
|
||||
Err(_) => {
|
||||
modify_reputation(
|
||||
&mut state.reputation,
|
||||
ctx.sender(),
|
||||
relay_parent,
|
||||
origin,
|
||||
COST_SIGNATURE_INVALID,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
},
|
||||
Ok(bitfield) => bitfield,
|
||||
};
|
||||
|
||||
let message = BitfieldGossipMessage { relay_parent, signed_availability };
|
||||
|
||||
let topology = state
|
||||
.topologies
|
||||
.get_topology_or_fallback(job_data.signing_context.session_index)
|
||||
.local_grid_neighbors();
|
||||
let required_routing = topology.required_routing_by_index(validator_index, false);
|
||||
|
||||
metrics.on_bitfield_received();
|
||||
one_per_validator.insert(validator.clone(), message.clone());
|
||||
|
||||
relay_message(
|
||||
ctx,
|
||||
job_data,
|
||||
topology,
|
||||
&mut state.peer_data,
|
||||
validator,
|
||||
message,
|
||||
required_routing,
|
||||
rng,
|
||||
)
|
||||
.await;
|
||||
|
||||
modify_reputation(
|
||||
&mut state.reputation,
|
||||
ctx.sender(),
|
||||
relay_parent,
|
||||
origin,
|
||||
BENEFIT_VALID_MESSAGE_FIRST,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Deal with network bridge updates and track what needs to be tracked
|
||||
/// which depends on the message type received.
|
||||
#[overseer::contextbounds(BitfieldDistribution, prefix=self::overseer)]
|
||||
async fn handle_network_msg<Context>(
|
||||
ctx: &mut Context,
|
||||
state: &mut ProtocolState,
|
||||
metrics: &Metrics,
|
||||
bridge_message: NetworkBridgeEvent<net_protocol::BitfieldDistributionMessage>,
|
||||
rng: &mut (impl CryptoRng + Rng),
|
||||
) {
|
||||
let _timer = metrics.time_handle_network_msg();
|
||||
|
||||
match bridge_message {
|
||||
NetworkBridgeEvent::PeerConnected(peer, role, version, _) => {
|
||||
gum::trace!(target: LOG_TARGET, ?peer, ?role, "Peer connected");
|
||||
// insert if none already present
|
||||
state
|
||||
.peer_data
|
||||
.entry(peer)
|
||||
.or_insert_with(|| PeerData { view: View::default(), version });
|
||||
},
|
||||
NetworkBridgeEvent::PeerDisconnected(peer) => {
|
||||
gum::trace!(target: LOG_TARGET, ?peer, "Peer disconnected");
|
||||
// get rid of superfluous data
|
||||
state.peer_data.remove(&peer);
|
||||
},
|
||||
NetworkBridgeEvent::NewGossipTopology(gossip_topology) => {
|
||||
let session_index = gossip_topology.session;
|
||||
let new_topology = gossip_topology.topology;
|
||||
let prev_neighbors =
|
||||
state.topologies.get_current_topology().local_grid_neighbors().clone();
|
||||
|
||||
state.topologies.update_topology(
|
||||
session_index,
|
||||
new_topology,
|
||||
gossip_topology.local_index,
|
||||
);
|
||||
let current_topology = state.topologies.get_current_topology();
|
||||
|
||||
let newly_added = current_topology.local_grid_neighbors().peers_diff(&prev_neighbors);
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?session_index,
|
||||
newly_added_peers = ?newly_added.len(),
|
||||
"New gossip topology received",
|
||||
);
|
||||
|
||||
for new_peer in newly_added {
|
||||
let old_view = match state.peer_data.get_mut(&new_peer) {
|
||||
Some(d) => {
|
||||
// in case we already knew that peer in the past
|
||||
// it might have had an existing view, we use to initialize
|
||||
// and minimize the delta on `PeerViewChange` to be sent
|
||||
std::mem::replace(&mut d.view, Default::default())
|
||||
},
|
||||
None => {
|
||||
// For peers which are currently unknown, we'll send topology-related
|
||||
// messages to them when they connect and send their first view update.
|
||||
continue;
|
||||
},
|
||||
};
|
||||
|
||||
handle_peer_view_change(ctx, state, new_peer, old_view, rng).await;
|
||||
}
|
||||
},
|
||||
NetworkBridgeEvent::PeerViewChange(peer_id, new_view) => {
|
||||
gum::trace!(target: LOG_TARGET, ?peer_id, ?new_view, "Peer view change");
|
||||
if state.peer_data.get(&peer_id).is_some() {
|
||||
handle_peer_view_change(ctx, state, peer_id, new_view, rng).await;
|
||||
}
|
||||
},
|
||||
NetworkBridgeEvent::OurViewChange(new_view) => {
|
||||
gum::trace!(target: LOG_TARGET, ?new_view, "Our view change");
|
||||
handle_our_view_change(state, new_view);
|
||||
},
|
||||
NetworkBridgeEvent::PeerMessage(remote, message) =>
|
||||
process_incoming_peer_message(ctx, state, metrics, remote, message, rng).await,
|
||||
NetworkBridgeEvent::UpdatedAuthorityIds(peer_id, authority_ids) => {
|
||||
state
|
||||
.topologies
|
||||
.get_current_topology_mut()
|
||||
.update_authority_ids(peer_id, &authority_ids);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle the changes necessary when our view changes.
|
||||
fn handle_our_view_change(state: &mut ProtocolState, view: OurView) {
|
||||
let old_view = std::mem::replace(&mut (state.view), view);
|
||||
|
||||
for added in state.view.difference(&old_view) {
|
||||
if !state.per_relay_parent.contains_key(&added) {
|
||||
// Is guaranteed to be handled in `ActiveHead` update
|
||||
// so this should never happen.
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%added,
|
||||
"Our view contains {}, but not in active heads",
|
||||
&added
|
||||
);
|
||||
}
|
||||
}
|
||||
for removed in old_view.difference(&state.view) {
|
||||
// cleanup relay parents we are not interested in any more
|
||||
let _ = state.per_relay_parent.remove(&removed);
|
||||
}
|
||||
}
|
||||
|
||||
// Send the difference between two views which were not sent
|
||||
// to that particular peer.
|
||||
//
|
||||
// This requires that there is an entry in the `peer_data` field for the
|
||||
// peer.
|
||||
#[overseer::contextbounds(BitfieldDistribution, prefix=self::overseer)]
|
||||
async fn handle_peer_view_change<Context>(
|
||||
ctx: &mut Context,
|
||||
state: &mut ProtocolState,
|
||||
origin: PeerId,
|
||||
view: View,
|
||||
rng: &mut (impl CryptoRng + Rng),
|
||||
) {
|
||||
let peer_data = match state.peer_data.get_mut(&origin) {
|
||||
None => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
peer = ?origin,
|
||||
"Attempted to update peer view for unknown peer."
|
||||
);
|
||||
|
||||
return;
|
||||
},
|
||||
Some(pd) => pd,
|
||||
};
|
||||
|
||||
let added = peer_data.view.replace_difference(view).cloned().collect::<Vec<_>>();
|
||||
let current_session_index = state.topologies.get_current_session_index();
|
||||
|
||||
let topology = state.topologies.get_current_topology().local_grid_neighbors();
|
||||
let is_gossip_peer = topology.route_to_peer(RequiredRouting::GridXY, &origin);
|
||||
|
||||
let lucky = is_gossip_peer ||
|
||||
util::gen_ratio_rng(
|
||||
util::MIN_GOSSIP_PEERS.saturating_sub(topology.len()),
|
||||
util::MIN_GOSSIP_PEERS,
|
||||
rng,
|
||||
);
|
||||
|
||||
if !lucky {
|
||||
gum::trace!(target: LOG_TARGET, ?origin, "Peer view change is ignored");
|
||||
return;
|
||||
}
|
||||
|
||||
// Send all messages we've seen before and the peer is now interested
|
||||
// in to that peer.
|
||||
let delta_set: Vec<(ValidatorId, BitfieldGossipMessage)> = added
|
||||
.into_iter()
|
||||
.filter_map(|new_relay_parent_interest| {
|
||||
if let Some(job_data) = state
|
||||
.per_relay_parent
|
||||
.get(&new_relay_parent_interest)
|
||||
.filter(|job_data| job_data.signing_context.session_index == current_session_index)
|
||||
{
|
||||
// Send all jointly known messages for a validator (given the current relay parent)
|
||||
// to the peer `origin`...
|
||||
let one_per_validator = job_data.one_per_validator.clone();
|
||||
Some(one_per_validator.into_iter().filter(move |(validator, _message)| {
|
||||
// ..except for the ones the peer already has.
|
||||
job_data.message_from_validator_needed_by_peer(&origin, validator)
|
||||
}))
|
||||
} else {
|
||||
// A relay parent is in the peers view, which is not in ours, ignore those.
|
||||
None
|
||||
}
|
||||
})
|
||||
.flatten()
|
||||
.collect();
|
||||
|
||||
for (validator, message) in delta_set.into_iter() {
|
||||
send_tracked_gossip_message(ctx, state, origin, validator, message).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Send a gossip message and track it in the per relay parent data.
|
||||
#[overseer::contextbounds(BitfieldDistribution, prefix=self::overseer)]
|
||||
async fn send_tracked_gossip_message<Context>(
|
||||
ctx: &mut Context,
|
||||
state: &mut ProtocolState,
|
||||
dest: PeerId,
|
||||
validator: ValidatorId,
|
||||
message: BitfieldGossipMessage,
|
||||
) {
|
||||
let job_data = if let Some(job_data) = state.per_relay_parent.get_mut(&message.relay_parent) {
|
||||
job_data
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?dest,
|
||||
?validator,
|
||||
relay_parent = ?message.relay_parent,
|
||||
"Sending gossip message"
|
||||
);
|
||||
|
||||
let version =
|
||||
if let Some(peer_data) = state.peer_data.get(&dest) { peer_data.version } else { return };
|
||||
|
||||
job_data.message_sent_to_peer.entry(dest).or_default().insert(validator.clone());
|
||||
|
||||
ctx.send_message(NetworkBridgeTxMessage::SendValidationMessage(
|
||||
vec![dest],
|
||||
message.into_validation_protocol(version),
|
||||
))
|
||||
.await;
|
||||
}
|
||||
|
||||
#[overseer::subsystem(BitfieldDistribution, error=SubsystemError, prefix=self::overseer)]
|
||||
impl<Context> BitfieldDistribution {
|
||||
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
||||
let future = self.run(ctx).map(|_| Ok(())).boxed();
|
||||
|
||||
SpawnedSubsystem { name: "bitfield-distribution-subsystem", future }
|
||||
}
|
||||
}
|
||||
|
||||
/// Query our validator set and signing context for a particular relay parent.
|
||||
#[overseer::contextbounds(BitfieldDistribution, prefix=self::overseer)]
|
||||
async fn query_basics<Context>(
|
||||
ctx: &mut Context,
|
||||
relay_parent: Hash,
|
||||
) -> SubsystemResult<Option<(Vec<ValidatorId>, SigningContext)>> {
|
||||
let (validators_tx, validators_rx) = oneshot::channel();
|
||||
let (session_tx, session_rx) = oneshot::channel();
|
||||
|
||||
// query validators
|
||||
ctx.send_message(RuntimeApiMessage::Request(
|
||||
relay_parent,
|
||||
RuntimeApiRequest::Validators(validators_tx),
|
||||
))
|
||||
.await;
|
||||
|
||||
// query signing context
|
||||
ctx.send_message(RuntimeApiMessage::Request(
|
||||
relay_parent,
|
||||
RuntimeApiRequest::SessionIndexForChild(session_tx),
|
||||
))
|
||||
.await;
|
||||
|
||||
match (validators_rx.await?, session_rx.await?) {
|
||||
(Ok(validators), Ok(session_index)) =>
|
||||
Ok(Some((validators, SigningContext { parent_hash: relay_parent, session_index }))),
|
||||
(Err(err), _) | (_, Err(err)) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?relay_parent,
|
||||
?err,
|
||||
"Failed to fetch basics from runtime API"
|
||||
);
|
||||
Ok(None)
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_subsystem_util::metrics::{prometheus, Metrics as MetricsTrait};
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
sent_own_availability_bitfields: prometheus::Counter<prometheus::U64>,
|
||||
received_availability_bitfields: prometheus::Counter<prometheus::U64>,
|
||||
active_leaves_update: prometheus::Histogram,
|
||||
handle_bitfield_distribution: prometheus::Histogram,
|
||||
handle_network_msg: prometheus::Histogram,
|
||||
}
|
||||
|
||||
/// Bitfield Distribution metrics.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
impl Metrics {
|
||||
pub(crate) fn on_own_bitfield_sent(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.sent_own_availability_bitfields.inc();
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_bitfield_received(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.received_availability_bitfields.inc();
|
||||
}
|
||||
}
|
||||
|
||||
/// Provide a timer for `active_leaves_update` which observes on drop.
|
||||
pub(crate) fn time_active_leaves_update(
|
||||
&self,
|
||||
) -> Option<prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.active_leaves_update.start_timer())
|
||||
}
|
||||
|
||||
/// Provide a timer for `handle_bitfield_distribution` which observes on drop.
|
||||
pub(crate) fn time_handle_bitfield_distribution(
|
||||
&self,
|
||||
) -> Option<prometheus::prometheus::HistogramTimer> {
|
||||
self.0
|
||||
.as_ref()
|
||||
.map(|metrics| metrics.handle_bitfield_distribution.start_timer())
|
||||
}
|
||||
|
||||
/// Provide a timer for `handle_network_msg` which observes on drop.
|
||||
pub(crate) fn time_handle_network_msg(&self) -> Option<prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.handle_network_msg.start_timer())
|
||||
}
|
||||
}
|
||||
|
||||
impl MetricsTrait for Metrics {
|
||||
fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
sent_own_availability_bitfields: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_teyrchain_sent_own_availability_bitfields_total",
|
||||
"Number of own availability bitfields sent to other peers.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
received_availability_bitfields: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_teyrchain_received_availability_bitfields_total",
|
||||
"Number of valid availability bitfields received from other peers.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
active_leaves_update: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_bitfield_distribution_active_leaves_update",
|
||||
"Time spent within `bitfield_distribution::active_leaves_update`",
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
handle_bitfield_distribution: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_bitfield_distribution_handle_bitfield_distribution",
|
||||
"Time spent within `bitfield_distribution::handle_bitfield_distribution`",
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
handle_network_msg: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_bitfield_distribution_handle_network_msg",
|
||||
"Time spent within `bitfield_distribution::handle_network_msg`",
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,55 @@
|
||||
[package]
|
||||
name = "pezkuwi-network-bridge"
|
||||
version = "7.0.0"
|
||||
description = "The Network Bridge Subsystem — protocol multiplexer for Pezkuwi."
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
always-assert = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
bytes = { workspace = true, default-features = true }
|
||||
codec = { features = ["derive"], workspace = true }
|
||||
fatality = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
gum = { workspace = true, default-features = true }
|
||||
parking_lot = { workspace = true, default-features = true }
|
||||
pezkuwi-node-metrics = { workspace = true, default-features = true }
|
||||
pezkuwi-node-network-protocol = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem = { workspace = true, default-features = true }
|
||||
pezkuwi-overseer = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives = { workspace = true, default-features = true }
|
||||
sc-network = { workspace = true, default-features = true }
|
||||
sp-consensus = { workspace = true, default-features = true }
|
||||
thiserror = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = { workspace = true }
|
||||
futures-timer = { workspace = true }
|
||||
pezkuwi-node-subsystem-test-helpers = { workspace = true }
|
||||
pezkuwi-node-subsystem-util = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives-test-helpers = { workspace = true }
|
||||
sp-core = { workspace = true, default-features = true }
|
||||
sp-keyring = { workspace = true, default-features = true }
|
||||
|
||||
[features]
|
||||
runtime-benchmarks = [
|
||||
"gum/runtime-benchmarks",
|
||||
"pezkuwi-node-metrics/runtime-benchmarks",
|
||||
"pezkuwi-node-network-protocol/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-util/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem/runtime-benchmarks",
|
||||
"pezkuwi-overseer/runtime-benchmarks",
|
||||
"pezkuwi-primitives-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-primitives/runtime-benchmarks",
|
||||
"sc-network/runtime-benchmarks",
|
||||
"sp-consensus/runtime-benchmarks",
|
||||
"sp-keyring/runtime-benchmarks",
|
||||
]
|
||||
@@ -0,0 +1,36 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_subsystem::SubsystemError;
|
||||
pub(crate) use pezkuwi_overseer::OverseerError;
|
||||
|
||||
#[fatality::fatality(splitable)]
|
||||
pub(crate) enum Error {
|
||||
/// Received error from overseer:
|
||||
#[fatal]
|
||||
#[error(transparent)]
|
||||
SubsystemError(#[from] SubsystemError),
|
||||
/// The stream of incoming events concluded.
|
||||
#[fatal]
|
||||
#[error("Event stream closed unexpectedly")]
|
||||
EventStreamConcluded,
|
||||
}
|
||||
|
||||
impl From<OverseerError> for Error {
|
||||
fn from(e: OverseerError) -> Self {
|
||||
Error::SubsystemError(SubsystemError::from(e))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,132 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! The Network Bridge Subsystem - protocol multiplexer for Pezkuwi.
|
||||
//!
|
||||
//! Split into incoming (`..In`) and outgoing (`..Out`) subsystems.
|
||||
|
||||
#![deny(unused_crate_dependencies)]
|
||||
#![warn(missing_docs)]
|
||||
|
||||
use codec::{Decode, Encode};
|
||||
use futures::prelude::*;
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use sp_consensus::SyncOracle;
|
||||
|
||||
use pezkuwi_node_network_protocol::{
|
||||
peer_set::{PeerSet, ProtocolVersion},
|
||||
PeerId, UnifiedReputationChange as Rep, View,
|
||||
};
|
||||
|
||||
/// Peer set info for network initialization.
|
||||
///
|
||||
/// To be passed to [`FullNetworkConfiguration::add_notification_protocol`]().
|
||||
pub use pezkuwi_node_network_protocol::peer_set::{peer_sets_info, IsAuthority};
|
||||
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
mod validator_discovery;
|
||||
|
||||
/// Actual interfacing to the network based on the `Network` trait.
|
||||
///
|
||||
/// Defines the `Network` trait with an implementation for an `Arc<NetworkService>`.
|
||||
mod network;
|
||||
use self::network::Network;
|
||||
|
||||
mod metrics;
|
||||
pub use self::metrics::Metrics;
|
||||
|
||||
mod errors;
|
||||
pub(crate) use self::errors::Error;
|
||||
|
||||
mod tx;
|
||||
pub use self::tx::*;
|
||||
|
||||
mod rx;
|
||||
pub use self::rx::*;
|
||||
|
||||
/// The maximum amount of heads a peer is allowed to have in their view at any time.
|
||||
///
|
||||
/// We use the same limit to compute the view sent to peers locally.
|
||||
pub(crate) const MAX_VIEW_HEADS: usize = 5;
|
||||
|
||||
pub(crate) const MALFORMED_MESSAGE_COST: Rep = Rep::CostMajor("Malformed Network-bridge message");
|
||||
pub(crate) const UNCONNECTED_PEERSET_COST: Rep =
|
||||
Rep::CostMinor("Message sent to un-connected peer-set");
|
||||
pub(crate) const MALFORMED_VIEW_COST: Rep = Rep::CostMajor("Malformed view");
|
||||
pub(crate) const EMPTY_VIEW_COST: Rep = Rep::CostMajor("Peer sent us an empty view");
|
||||
|
||||
/// Messages from and to the network.
|
||||
///
|
||||
/// As transmitted to and received from subsystems.
|
||||
#[derive(Debug, Encode, Decode, Clone)]
|
||||
pub(crate) enum WireMessage<M> {
|
||||
/// A message from a peer on a specific protocol.
|
||||
#[codec(index = 1)]
|
||||
ProtocolMessage(M),
|
||||
/// A view update from a peer.
|
||||
#[codec(index = 2)]
|
||||
ViewUpdate(View),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct PeerData {
|
||||
/// The Latest view sent by the peer.
|
||||
view: View,
|
||||
version: ProtocolVersion,
|
||||
}
|
||||
|
||||
/// Shared state between incoming and outgoing.
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
pub(crate) struct Shared(Arc<Mutex<SharedInner>>);
|
||||
|
||||
#[derive(Default)]
|
||||
struct SharedInner {
|
||||
local_view: Option<View>,
|
||||
validation_peers: HashMap<PeerId, PeerData>,
|
||||
collation_peers: HashMap<PeerId, PeerData>,
|
||||
}
|
||||
|
||||
// Counts the number of peers that are connectioned using `version`
|
||||
fn count_peers_by_version(peers: &HashMap<PeerId, PeerData>) -> HashMap<ProtocolVersion, usize> {
|
||||
let mut by_version_count = HashMap::new();
|
||||
for peer in peers.values() {
|
||||
*(by_version_count.entry(peer.version).or_default()) += 1;
|
||||
}
|
||||
by_version_count
|
||||
}
|
||||
|
||||
// Notes the peer count
|
||||
fn note_peers_count(metrics: &Metrics, shared: &Shared) {
|
||||
let guard = shared.0.lock();
|
||||
let validation_stats = count_peers_by_version(&guard.validation_peers);
|
||||
let collation_stats = count_peers_by_version(&guard.collation_peers);
|
||||
|
||||
for (version, count) in validation_stats {
|
||||
metrics.note_peer_count(PeerSet::Validation, version, count)
|
||||
}
|
||||
|
||||
for (version, count) in collation_stats {
|
||||
metrics.note_peer_count(PeerSet::Collation, version, count)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum Mode {
|
||||
Syncing(Box<dyn SyncOracle + Send>),
|
||||
Active,
|
||||
}
|
||||
@@ -0,0 +1,287 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use super::{PeerSet, ProtocolVersion};
|
||||
use pezkuwi_node_metrics::metrics::{self, prometheus};
|
||||
|
||||
/// Metrics for the network bridge.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Metrics(pub(crate) Option<MetricsInner>);
|
||||
|
||||
fn peer_set_label(peer_set: PeerSet, version: ProtocolVersion) -> &'static str {
|
||||
// Higher level code is meant to protect against this ever happening.
|
||||
peer_set.get_protocol_label(version).unwrap_or("<internal error>")
|
||||
}
|
||||
|
||||
#[allow(missing_docs)]
|
||||
impl Metrics {
|
||||
pub fn on_peer_connected(&self, peer_set: PeerSet, version: ProtocolVersion) {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics
|
||||
.connected_events
|
||||
.with_label_values(&[peer_set_label(peer_set, version)])
|
||||
.inc()
|
||||
});
|
||||
}
|
||||
|
||||
pub fn on_peer_disconnected(&self, peer_set: PeerSet, version: ProtocolVersion) {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics
|
||||
.disconnected_events
|
||||
.with_label_values(&[peer_set_label(peer_set, version)])
|
||||
.inc()
|
||||
});
|
||||
}
|
||||
|
||||
pub fn note_peer_count(&self, peer_set: PeerSet, version: ProtocolVersion, count: usize) {
|
||||
if let Some(metrics) = self.0.as_ref() {
|
||||
let label = peer_set_label(peer_set, version);
|
||||
metrics.peer_count.with_label_values(&[label]).set(count as u64);
|
||||
metrics.peer_connectivity.with_label_values(&[label]).observe(count as f64);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_notification_received(
|
||||
&self,
|
||||
peer_set: PeerSet,
|
||||
version: ProtocolVersion,
|
||||
size: usize,
|
||||
) {
|
||||
if let Some(metrics) = self.0.as_ref() {
|
||||
metrics
|
||||
.notifications_received
|
||||
.with_label_values(&[peer_set_label(peer_set, version)])
|
||||
.inc();
|
||||
|
||||
metrics
|
||||
.bytes_received
|
||||
.with_label_values(&[peer_set_label(peer_set, version)])
|
||||
.inc_by(size as u64);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_notification_sent(
|
||||
&self,
|
||||
peer_set: PeerSet,
|
||||
version: ProtocolVersion,
|
||||
size: usize,
|
||||
to_peers: usize,
|
||||
) {
|
||||
if let Some(metrics) = self.0.as_ref() {
|
||||
metrics
|
||||
.notifications_sent
|
||||
.with_label_values(&[peer_set_label(peer_set, version)])
|
||||
.inc_by(to_peers as u64);
|
||||
|
||||
metrics
|
||||
.bytes_sent
|
||||
.with_label_values(&[peer_set_label(peer_set, version)])
|
||||
.inc_by((size * to_peers) as u64);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn note_desired_peer_count(&self, peer_set: PeerSet, size: usize) {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics
|
||||
.desired_peer_count
|
||||
.with_label_values(&[peer_set.get_label()])
|
||||
.set(size as u64)
|
||||
});
|
||||
}
|
||||
|
||||
pub fn on_report_event(&self) {
|
||||
if let Some(metrics) = self.0.as_ref() {
|
||||
self.on_message("report_peer");
|
||||
metrics.report_events.inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_message(&self, message_type: &'static str) {
|
||||
if let Some(metrics) = self.0.as_ref() {
|
||||
metrics.messages_sent.with_label_values(&[message_type]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_delayed_rx_queue(&self, queue_size: usize) {
|
||||
if let Some(metrics) = self.0.as_ref() {
|
||||
metrics.rx_delayed_processing.observe(queue_size as f64);
|
||||
}
|
||||
}
|
||||
pub fn time_delayed_rx_events(
|
||||
&self,
|
||||
) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.rx_delayed_processing_time.start_timer())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct MetricsInner {
|
||||
peer_count: prometheus::GaugeVec<prometheus::U64>,
|
||||
peer_connectivity: prometheus::HistogramVec,
|
||||
connected_events: prometheus::CounterVec<prometheus::U64>,
|
||||
disconnected_events: prometheus::CounterVec<prometheus::U64>,
|
||||
desired_peer_count: prometheus::GaugeVec<prometheus::U64>,
|
||||
report_events: prometheus::Counter<prometheus::U64>,
|
||||
|
||||
notifications_received: prometheus::CounterVec<prometheus::U64>,
|
||||
notifications_sent: prometheus::CounterVec<prometheus::U64>,
|
||||
|
||||
bytes_received: prometheus::CounterVec<prometheus::U64>,
|
||||
bytes_sent: prometheus::CounterVec<prometheus::U64>,
|
||||
|
||||
messages_sent: prometheus::CounterVec<prometheus::U64>,
|
||||
// The reason why a `Histogram` is used to track a queue size is that
|
||||
// we need not only an average size of the queue (that will be 0 normally), but
|
||||
// we also need a dynamics for this queue size in case of messages delays.
|
||||
rx_delayed_processing: prometheus::Histogram,
|
||||
rx_delayed_processing_time: prometheus::Histogram,
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(
|
||||
registry: &prometheus::Registry,
|
||||
) -> std::result::Result<Self, prometheus::PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
peer_count: prometheus::register(
|
||||
prometheus::GaugeVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_peer_count",
|
||||
"The number of peers on a teyrchain-related peer-set",
|
||||
),
|
||||
&["protocol"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
peer_connectivity: prometheus::register(
|
||||
prometheus::HistogramVec::new(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_peer_connectivity",
|
||||
"Histogram of peer counts on a teyrchain-related peer-set to track connectivity patterns",
|
||||
).buckets(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0, 50.0, 100.0, 250.0, 500.0, 1000.0]),
|
||||
&["protocol"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
connected_events: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_peer_connect_events_total",
|
||||
"The number of peer connect events on a teyrchain notifications protocol",
|
||||
),
|
||||
&["protocol"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
disconnected_events: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_peer_disconnect_events_total",
|
||||
"The number of peer disconnect events on a teyrchain notifications protocol",
|
||||
),
|
||||
&["protocol"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
desired_peer_count: prometheus::register(
|
||||
prometheus::GaugeVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_desired_peer_count",
|
||||
"The number of peers that the local node is expected to connect to on a teyrchain-related peer-set (either including or not including unresolvable authorities, depending on whether `ConnectToValidators` or `ConnectToValidatorsResolved` was used.)",
|
||||
),
|
||||
&["protocol"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
report_events: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_teyrchain_network_report_events_total",
|
||||
"The amount of reputation changes issued by subsystems",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
notifications_received: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_notifications_received_total",
|
||||
"The number of notifications received on a teyrchain protocol",
|
||||
),
|
||||
&["protocol"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
notifications_sent: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_notifications_sent_total",
|
||||
"The number of notifications sent on a teyrchain protocol",
|
||||
),
|
||||
&["protocol"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
bytes_received: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_notification_bytes_received_total",
|
||||
"The number of bytes received on a teyrchain notification protocol",
|
||||
),
|
||||
&["protocol"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
bytes_sent: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_notification_bytes_sent_total",
|
||||
"The number of bytes sent on a teyrchain notification protocol",
|
||||
),
|
||||
&["protocol"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
messages_sent: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_messages_sent_total",
|
||||
"The number of messages sent via network bridge",
|
||||
),
|
||||
&["type"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
rx_delayed_processing: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_network_bridge_rx_delayed",
|
||||
"Number of events being delayed while broadcasting from the network bridge",
|
||||
).buckets(vec![0.0, 1.0, 2.0, 8.0, 16.0]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
rx_delayed_processing_time: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_network_bridge_rx_delayed_time",
|
||||
"Time spent for waiting of the delayed events",
|
||||
),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,329 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use codec::Encode;
|
||||
|
||||
use sc_network::{
|
||||
config::parse_addr, multiaddr::Multiaddr, service::traits::NetworkService, types::ProtocolName,
|
||||
IfDisconnected, MessageSink, OutboundFailure, ReputationChange, RequestFailure,
|
||||
};
|
||||
|
||||
use pezkuwi_node_network_protocol::{
|
||||
peer_set::{CollationVersion, PeerSet, ProtocolVersion, ValidationVersion},
|
||||
request_response::{OutgoingRequest, Recipient, ReqProtocolNames, Requests},
|
||||
v1 as protocol_v1, v2 as protocol_v2, v3 as protocol_v3, PeerId,
|
||||
};
|
||||
use pezkuwi_primitives::AuthorityDiscoveryId;
|
||||
|
||||
use crate::{metrics::Metrics, validator_discovery::AuthorityDiscovery, WireMessage};
|
||||
|
||||
// network bridge network abstraction log target
|
||||
const LOG_TARGET: &'static str = "teyrchain::network-bridge-net";
|
||||
|
||||
// Helper function to send a validation v3 message to a list of peers.
|
||||
// Messages are always sent via the main protocol, even legacy protocol messages.
|
||||
pub(crate) fn send_validation_message_v3(
|
||||
peers: Vec<PeerId>,
|
||||
message: WireMessage<protocol_v3::ValidationProtocol>,
|
||||
metrics: &Metrics,
|
||||
notification_sinks: &Arc<Mutex<HashMap<(PeerSet, PeerId), Box<dyn MessageSink>>>>,
|
||||
) {
|
||||
gum::trace!(target: LOG_TARGET, ?peers, ?message, "Sending validation v3 message to peers",);
|
||||
|
||||
send_message(
|
||||
peers,
|
||||
PeerSet::Validation,
|
||||
ValidationVersion::V3.into(),
|
||||
message,
|
||||
metrics,
|
||||
notification_sinks,
|
||||
);
|
||||
}
|
||||
|
||||
// Helper function to send a collation v1 message to a list of peers.
|
||||
// Messages are always sent via the main protocol, even legacy protocol messages.
|
||||
pub(crate) fn send_collation_message_v1(
|
||||
peers: Vec<PeerId>,
|
||||
message: WireMessage<protocol_v1::CollationProtocol>,
|
||||
metrics: &Metrics,
|
||||
notification_sinks: &Arc<Mutex<HashMap<(PeerSet, PeerId), Box<dyn MessageSink>>>>,
|
||||
) {
|
||||
send_message(
|
||||
peers,
|
||||
PeerSet::Collation,
|
||||
CollationVersion::V1.into(),
|
||||
message,
|
||||
metrics,
|
||||
notification_sinks,
|
||||
);
|
||||
}
|
||||
|
||||
// Helper function to send a collation v2 message to a list of peers.
|
||||
// Messages are always sent via the main protocol, even legacy protocol messages.
|
||||
pub(crate) fn send_collation_message_v2(
|
||||
peers: Vec<PeerId>,
|
||||
message: WireMessage<protocol_v2::CollationProtocol>,
|
||||
metrics: &Metrics,
|
||||
notification_sinks: &Arc<Mutex<HashMap<(PeerSet, PeerId), Box<dyn MessageSink>>>>,
|
||||
) {
|
||||
send_message(
|
||||
peers,
|
||||
PeerSet::Collation,
|
||||
CollationVersion::V2.into(),
|
||||
message,
|
||||
metrics,
|
||||
notification_sinks,
|
||||
);
|
||||
}
|
||||
|
||||
/// Lower level function that sends a message to the network using the main protocol version.
|
||||
///
|
||||
/// This function is only used internally by the network-bridge, which is responsible to only send
|
||||
/// messages that are compatible with the passed peer set, as that is currently not enforced by
|
||||
/// this function. These are messages of type `WireMessage` parameterized on the matching type.
|
||||
fn send_message<M>(
|
||||
mut peers: Vec<PeerId>,
|
||||
peer_set: PeerSet,
|
||||
version: ProtocolVersion,
|
||||
message: M,
|
||||
metrics: &super::Metrics,
|
||||
network_notification_sinks: &Arc<Mutex<HashMap<(PeerSet, PeerId), Box<dyn MessageSink>>>>,
|
||||
) where
|
||||
M: Encode + Clone,
|
||||
{
|
||||
if peers.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let message = {
|
||||
let encoded = message.encode();
|
||||
metrics.on_notification_sent(peer_set, version, encoded.len(), peers.len());
|
||||
metrics.on_message(std::any::type_name::<M>());
|
||||
encoded
|
||||
};
|
||||
|
||||
let notification_sinks = network_notification_sinks.lock();
|
||||
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peers,
|
||||
?peer_set,
|
||||
?version,
|
||||
?message,
|
||||
"Sending message to peers",
|
||||
);
|
||||
|
||||
// optimization: avoid cloning the message for the last peer in the
|
||||
// list. The message payload can be quite large. If the underlying
|
||||
// network used `Bytes` this would not be necessary.
|
||||
//
|
||||
// peer may have gotten disconnect by the time `send_message()` is called
|
||||
// at which point the sink is not available.
|
||||
let last_peer = peers.pop();
|
||||
peers.into_iter().for_each(|peer| {
|
||||
if let Some(sink) = notification_sinks.get(&(peer_set, peer)) {
|
||||
sink.send_sync_notification(message.clone());
|
||||
}
|
||||
});
|
||||
|
||||
if let Some(peer) = last_peer {
|
||||
if let Some(sink) = notification_sinks.get(&(peer_set, peer)) {
|
||||
sink.send_sync_notification(message.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An abstraction over networking for the purposes of this subsystem.
|
||||
#[async_trait]
|
||||
pub trait Network: Clone + Send + 'static {
|
||||
/// Ask the network to keep a substream open with these nodes and not disconnect from them
|
||||
/// until removed from the protocol's peer set.
|
||||
/// Note that `out_peers` setting has no effect on this.
|
||||
async fn set_reserved_peers(
|
||||
&mut self,
|
||||
protocol: ProtocolName,
|
||||
multiaddresses: HashSet<Multiaddr>,
|
||||
) -> Result<(), String>;
|
||||
|
||||
/// Ask the network to extend the reserved set with these nodes.
|
||||
async fn add_peers_to_reserved_set(
|
||||
&mut self,
|
||||
protocol: ProtocolName,
|
||||
multiaddresses: HashSet<Multiaddr>,
|
||||
) -> Result<(), String>;
|
||||
|
||||
/// Removes the peers for the protocol's peer set (both reserved and non-reserved).
|
||||
async fn remove_from_peers_set(
|
||||
&mut self,
|
||||
protocol: ProtocolName,
|
||||
peers: Vec<PeerId>,
|
||||
) -> Result<(), String>;
|
||||
|
||||
/// Send a request to a remote peer.
|
||||
async fn start_request<AD: AuthorityDiscovery>(
|
||||
&self,
|
||||
authority_discovery: &mut AD,
|
||||
req: Requests,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
if_disconnected: IfDisconnected,
|
||||
);
|
||||
|
||||
/// Report a given peer as either beneficial (+) or costly (-) according to the given scalar.
|
||||
fn report_peer(&self, who: PeerId, rep: ReputationChange);
|
||||
|
||||
/// Disconnect a given peer from the protocol specified without harming reputation.
|
||||
fn disconnect_peer(&self, who: PeerId, protocol: ProtocolName);
|
||||
|
||||
/// Get peer role.
|
||||
fn peer_role(&self, who: PeerId, handshake: Vec<u8>) -> Option<sc_network::ObservedRole>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Network for Arc<dyn NetworkService> {
|
||||
async fn set_reserved_peers(
|
||||
&mut self,
|
||||
protocol: ProtocolName,
|
||||
multiaddresses: HashSet<Multiaddr>,
|
||||
) -> Result<(), String> {
|
||||
<dyn NetworkService>::set_reserved_peers(&**self, protocol, multiaddresses)
|
||||
}
|
||||
|
||||
async fn add_peers_to_reserved_set(
|
||||
&mut self,
|
||||
protocol: ProtocolName,
|
||||
multiaddresses: HashSet<Multiaddr>,
|
||||
) -> Result<(), String> {
|
||||
<dyn NetworkService>::add_peers_to_reserved_set(&**self, protocol, multiaddresses)
|
||||
}
|
||||
|
||||
async fn remove_from_peers_set(
|
||||
&mut self,
|
||||
protocol: ProtocolName,
|
||||
peers: Vec<PeerId>,
|
||||
) -> Result<(), String> {
|
||||
<dyn NetworkService>::remove_peers_from_reserved_set(&**self, protocol, peers)
|
||||
}
|
||||
|
||||
fn report_peer(&self, who: PeerId, rep: ReputationChange) {
|
||||
<dyn NetworkService>::report_peer(&**self, who, rep);
|
||||
}
|
||||
|
||||
fn disconnect_peer(&self, who: PeerId, protocol: ProtocolName) {
|
||||
<dyn NetworkService>::disconnect_peer(&**self, who, protocol);
|
||||
}
|
||||
|
||||
async fn start_request<AD: AuthorityDiscovery>(
|
||||
&self,
|
||||
authority_discovery: &mut AD,
|
||||
req: Requests,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
if_disconnected: IfDisconnected,
|
||||
) {
|
||||
let (protocol, OutgoingRequest { peer, payload, pending_response, fallback_request }) =
|
||||
req.encode_request();
|
||||
|
||||
let peer_id = match peer {
|
||||
Recipient::Peer(peer_id) => Some(peer_id),
|
||||
Recipient::Authority(authority) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?authority,
|
||||
"Searching for peer id to connect to authority",
|
||||
);
|
||||
|
||||
let mut found_peer_id = None;
|
||||
// Note: `get_addresses_by_authority_id` searched in a cache, and it thus expected
|
||||
// to be very quick.
|
||||
for addr in authority_discovery
|
||||
.get_addresses_by_authority_id(authority)
|
||||
.await
|
||||
.into_iter()
|
||||
.flat_map(|list| list.into_iter())
|
||||
{
|
||||
let (peer_id, addr) = match parse_addr(addr) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
<dyn NetworkService>::add_known_address(&**self, peer_id, addr);
|
||||
found_peer_id = Some(peer_id);
|
||||
}
|
||||
found_peer_id
|
||||
},
|
||||
};
|
||||
|
||||
let peer_id = match peer_id {
|
||||
None => {
|
||||
gum::debug!(target: LOG_TARGET, "Discovering authority failed");
|
||||
match pending_response
|
||||
.send(Err(RequestFailure::Network(OutboundFailure::DialFailure)))
|
||||
{
|
||||
Err(_) => {
|
||||
gum::debug!(target: LOG_TARGET, "Sending failed request response failed.")
|
||||
},
|
||||
Ok(_) => {},
|
||||
}
|
||||
return;
|
||||
},
|
||||
Some(peer_id) => peer_id,
|
||||
};
|
||||
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
%peer_id,
|
||||
protocol = %req_protocol_names.get_name(protocol),
|
||||
fallback_protocol = ?fallback_request.as_ref().map(|(_, p)| req_protocol_names.get_name(*p)),
|
||||
?if_disconnected,
|
||||
"Starting request",
|
||||
);
|
||||
|
||||
<dyn NetworkService>::start_request(
|
||||
&**self,
|
||||
peer_id,
|
||||
req_protocol_names.get_name(protocol),
|
||||
payload,
|
||||
fallback_request.map(|(r, p)| (r, req_protocol_names.get_name(p))),
|
||||
pending_response,
|
||||
if_disconnected,
|
||||
);
|
||||
}
|
||||
|
||||
fn peer_role(&self, who: PeerId, handshake: Vec<u8>) -> Option<sc_network::ObservedRole> {
|
||||
<dyn NetworkService>::peer_role(&**self, who, handshake)
|
||||
}
|
||||
}
|
||||
|
||||
/// We assume one `peer_id` per `authority_id`.
|
||||
pub async fn get_peer_id_by_authority_id<AD: AuthorityDiscovery>(
|
||||
authority_discovery: &mut AD,
|
||||
authority: AuthorityDiscoveryId,
|
||||
) -> Option<PeerId> {
|
||||
// Note: `get_addresses_by_authority_id` searched in a cache, and it thus expected
|
||||
// to be very quick.
|
||||
authority_discovery
|
||||
.get_addresses_by_authority_id(authority)
|
||||
.await
|
||||
.into_iter()
|
||||
.flat_map(|list| list.into_iter())
|
||||
.find_map(|addr| parse_addr(addr).ok().map(|(p, _)| p))
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,400 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! The Network Bridge Subsystem - handles _outgoing_ messages, from subsystem to the network.
|
||||
use super::*;
|
||||
|
||||
use pezkuwi_node_network_protocol::{
|
||||
peer_set::PeerSetProtocolNames, request_response::ReqProtocolNames, CollationProtocols,
|
||||
ValidationProtocols,
|
||||
};
|
||||
|
||||
use pezkuwi_node_subsystem::{
|
||||
errors::SubsystemError,
|
||||
messages::{NetworkBridgeTxMessage, ReportPeerMessage},
|
||||
overseer, FromOrchestra, OverseerSignal, SpawnedSubsystem,
|
||||
};
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::Requests;
|
||||
use sc_network::{MessageSink, ReputationChange};
|
||||
|
||||
use crate::validator_discovery;
|
||||
|
||||
/// Actual interfacing to the network based on the `Network` trait.
|
||||
///
|
||||
/// Defines the `Network` trait with an implementation for an `Arc<NetworkService>`.
|
||||
use crate::network::{
|
||||
send_collation_message_v1, send_collation_message_v2, send_validation_message_v3, Network,
|
||||
};
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
// network bridge log target
|
||||
const LOG_TARGET: &'static str = "teyrchain::network-bridge-tx";
|
||||
|
||||
/// The network bridge subsystem.
|
||||
pub struct NetworkBridgeTx<N, AD> {
|
||||
/// `Network` trait implementing type.
|
||||
network_service: N,
|
||||
authority_discovery_service: AD,
|
||||
metrics: Metrics,
|
||||
req_protocol_names: ReqProtocolNames,
|
||||
peerset_protocol_names: PeerSetProtocolNames,
|
||||
notification_sinks: Arc<Mutex<HashMap<(PeerSet, PeerId), Box<dyn MessageSink>>>>,
|
||||
}
|
||||
|
||||
impl<N, AD> NetworkBridgeTx<N, AD> {
|
||||
/// Create a new network bridge subsystem with underlying network service and authority
|
||||
/// discovery service.
|
||||
///
|
||||
/// This assumes that the network service has had the notifications protocol for the network
|
||||
/// bridge already registered. See [`peer_sets_info`].
|
||||
pub fn new(
|
||||
network_service: N,
|
||||
authority_discovery_service: AD,
|
||||
metrics: Metrics,
|
||||
req_protocol_names: ReqProtocolNames,
|
||||
peerset_protocol_names: PeerSetProtocolNames,
|
||||
notification_sinks: Arc<Mutex<HashMap<(PeerSet, PeerId), Box<dyn MessageSink>>>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
network_service,
|
||||
authority_discovery_service,
|
||||
metrics,
|
||||
req_protocol_names,
|
||||
peerset_protocol_names,
|
||||
notification_sinks,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::subsystem(NetworkBridgeTx, error = SubsystemError, prefix = self::overseer)]
|
||||
impl<Net, AD, Context> NetworkBridgeTx<Net, AD>
|
||||
where
|
||||
Net: Network + Sync,
|
||||
AD: validator_discovery::AuthorityDiscovery + Clone + Sync,
|
||||
{
|
||||
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
||||
let future = run_network_out(self, ctx)
|
||||
.map_err(|e| SubsystemError::with_origin("network-bridge", e))
|
||||
.boxed();
|
||||
SpawnedSubsystem { name: "network-bridge-tx-subsystem", future }
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(NetworkBridgeTx, prefix = self::overseer)]
|
||||
async fn handle_subsystem_messages<Context, N, AD>(
|
||||
mut ctx: Context,
|
||||
mut network_service: N,
|
||||
mut authority_discovery_service: AD,
|
||||
metrics: Metrics,
|
||||
req_protocol_names: ReqProtocolNames,
|
||||
peerset_protocol_names: PeerSetProtocolNames,
|
||||
notification_sinks: Arc<Mutex<HashMap<(PeerSet, PeerId), Box<dyn MessageSink>>>>,
|
||||
) -> Result<(), Error>
|
||||
where
|
||||
N: Network,
|
||||
AD: validator_discovery::AuthorityDiscovery + Clone,
|
||||
{
|
||||
let mut validator_discovery =
|
||||
validator_discovery::Service::<N, AD>::new(peerset_protocol_names.clone());
|
||||
|
||||
loop {
|
||||
match ctx.recv().fuse().await? {
|
||||
FromOrchestra::Signal(OverseerSignal::Conclude) => return Ok(()),
|
||||
FromOrchestra::Signal(_) => { /* handled by incoming */ },
|
||||
FromOrchestra::Communication { msg } => {
|
||||
(network_service, authority_discovery_service) =
|
||||
handle_incoming_subsystem_communication(
|
||||
&mut ctx,
|
||||
network_service,
|
||||
&mut validator_discovery,
|
||||
authority_discovery_service.clone(),
|
||||
msg,
|
||||
&metrics,
|
||||
&req_protocol_names,
|
||||
&peerset_protocol_names,
|
||||
¬ification_sinks,
|
||||
)
|
||||
.await;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(NetworkBridgeTx, prefix = self::overseer)]
|
||||
async fn handle_incoming_subsystem_communication<Context, N, AD>(
|
||||
_ctx: &mut Context,
|
||||
network_service: N,
|
||||
validator_discovery: &mut validator_discovery::Service<N, AD>,
|
||||
mut authority_discovery_service: AD,
|
||||
msg: NetworkBridgeTxMessage,
|
||||
metrics: &Metrics,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
peerset_protocol_names: &PeerSetProtocolNames,
|
||||
notification_sinks: &Arc<Mutex<HashMap<(PeerSet, PeerId), Box<dyn MessageSink>>>>,
|
||||
) -> (N, AD)
|
||||
where
|
||||
N: Network,
|
||||
AD: validator_discovery::AuthorityDiscovery + Clone,
|
||||
{
|
||||
match msg {
|
||||
NetworkBridgeTxMessage::ReportPeer(ReportPeerMessage::Single(peer, rep)) => {
|
||||
if !rep.value.is_positive() {
|
||||
gum::debug!(target: LOG_TARGET, ?peer, ?rep, action = "ReportPeer");
|
||||
}
|
||||
|
||||
metrics.on_report_event();
|
||||
network_service.report_peer(peer, rep);
|
||||
},
|
||||
NetworkBridgeTxMessage::ReportPeer(ReportPeerMessage::Batch(batch)) => {
|
||||
for (peer, score) in batch {
|
||||
let rep = ReputationChange::new(score, "Aggregated reputation change");
|
||||
if !rep.value.is_positive() {
|
||||
gum::debug!(target: LOG_TARGET, ?peer, ?rep, action = "ReportPeer");
|
||||
}
|
||||
|
||||
metrics.on_report_event();
|
||||
network_service.report_peer(peer, rep);
|
||||
}
|
||||
},
|
||||
NetworkBridgeTxMessage::DisconnectPeers(peers, peer_set) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
action = "DisconnectPeers",
|
||||
?peers,
|
||||
peer_set = ?peer_set,
|
||||
);
|
||||
|
||||
// [`NetworkService`] keeps track of the protocols by their main name.
|
||||
let protocol = peerset_protocol_names.get_main_name(peer_set);
|
||||
for peer in peers {
|
||||
network_service.disconnect_peer(peer, protocol.clone());
|
||||
}
|
||||
},
|
||||
NetworkBridgeTxMessage::SendValidationMessage(peers, msg) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
action = "SendValidationMessages",
|
||||
?msg,
|
||||
num_messages = 1usize,
|
||||
);
|
||||
|
||||
match msg {
|
||||
ValidationProtocols::V3(msg) => send_validation_message_v3(
|
||||
peers,
|
||||
WireMessage::ProtocolMessage(msg),
|
||||
&metrics,
|
||||
notification_sinks,
|
||||
),
|
||||
}
|
||||
},
|
||||
NetworkBridgeTxMessage::SendValidationMessages(msgs) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
action = "SendValidationMessages",
|
||||
num_messages = %msgs.len(),
|
||||
?msgs,
|
||||
);
|
||||
|
||||
for (peers, msg) in msgs {
|
||||
match msg {
|
||||
ValidationProtocols::V3(msg) => send_validation_message_v3(
|
||||
peers,
|
||||
WireMessage::ProtocolMessage(msg),
|
||||
&metrics,
|
||||
notification_sinks,
|
||||
),
|
||||
}
|
||||
}
|
||||
},
|
||||
NetworkBridgeTxMessage::SendCollationMessage(peers, msg) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
action = "SendCollationMessages",
|
||||
num_messages = 1usize,
|
||||
);
|
||||
|
||||
match msg {
|
||||
CollationProtocols::V1(msg) => send_collation_message_v1(
|
||||
peers,
|
||||
WireMessage::ProtocolMessage(msg),
|
||||
&metrics,
|
||||
notification_sinks,
|
||||
),
|
||||
CollationProtocols::V2(msg) => send_collation_message_v2(
|
||||
peers,
|
||||
WireMessage::ProtocolMessage(msg),
|
||||
&metrics,
|
||||
notification_sinks,
|
||||
),
|
||||
}
|
||||
},
|
||||
NetworkBridgeTxMessage::SendCollationMessages(msgs) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
action = "SendCollationMessages",
|
||||
num_messages = %msgs.len(),
|
||||
);
|
||||
|
||||
for (peers, msg) in msgs {
|
||||
match msg {
|
||||
CollationProtocols::V1(msg) => send_collation_message_v1(
|
||||
peers,
|
||||
WireMessage::ProtocolMessage(msg),
|
||||
&metrics,
|
||||
notification_sinks,
|
||||
),
|
||||
CollationProtocols::V2(msg) => send_collation_message_v2(
|
||||
peers,
|
||||
WireMessage::ProtocolMessage(msg),
|
||||
&metrics,
|
||||
notification_sinks,
|
||||
),
|
||||
}
|
||||
}
|
||||
},
|
||||
NetworkBridgeTxMessage::SendRequests(reqs, if_disconnected) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
action = "SendRequests",
|
||||
num_requests = %reqs.len(),
|
||||
);
|
||||
|
||||
for req in reqs {
|
||||
match req {
|
||||
Requests::ChunkFetching(ref req) => {
|
||||
// This is not the actual request that will succeed, as we don't know yet
|
||||
// what that will be. It's only the primary request we tried.
|
||||
if req.fallback_request.is_some() {
|
||||
metrics.on_message("chunk_fetching_v2")
|
||||
} else {
|
||||
metrics.on_message("chunk_fetching_v1")
|
||||
}
|
||||
},
|
||||
Requests::AvailableDataFetchingV1(_) =>
|
||||
metrics.on_message("available_data_fetching_v1"),
|
||||
Requests::CollationFetchingV1(_) => metrics.on_message("collation_fetching_v1"),
|
||||
Requests::CollationFetchingV2(_) => metrics.on_message("collation_fetching_v2"),
|
||||
Requests::PoVFetchingV1(_) => metrics.on_message("pov_fetching_v1"),
|
||||
Requests::DisputeSendingV1(_) => metrics.on_message("dispute_sending_v1"),
|
||||
Requests::AttestedCandidateV2(_) => metrics.on_message("attested_candidate_v2"),
|
||||
}
|
||||
|
||||
network_service
|
||||
.start_request(
|
||||
&mut authority_discovery_service,
|
||||
req,
|
||||
req_protocol_names,
|
||||
if_disconnected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
},
|
||||
NetworkBridgeTxMessage::ConnectToValidators { validator_ids, peer_set, failed } => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
action = "ConnectToValidators",
|
||||
peer_set = ?peer_set,
|
||||
ids = ?validator_ids,
|
||||
"Received a validator connection request",
|
||||
);
|
||||
|
||||
metrics.note_desired_peer_count(peer_set, validator_ids.len());
|
||||
|
||||
let (network_service, ads) = validator_discovery
|
||||
.on_request(
|
||||
validator_ids,
|
||||
peer_set,
|
||||
failed,
|
||||
network_service,
|
||||
authority_discovery_service,
|
||||
)
|
||||
.await;
|
||||
|
||||
return (network_service, ads);
|
||||
},
|
||||
NetworkBridgeTxMessage::ConnectToResolvedValidators { validator_addrs, peer_set } => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
action = "ConnectToPeers",
|
||||
peer_set = ?peer_set,
|
||||
?validator_addrs,
|
||||
"Received a resolved validator connection request",
|
||||
);
|
||||
|
||||
metrics.note_desired_peer_count(peer_set, validator_addrs.len());
|
||||
|
||||
let all_addrs = validator_addrs.into_iter().flatten().collect();
|
||||
let network_service = validator_discovery
|
||||
.on_resolved_request(all_addrs, peer_set, network_service)
|
||||
.await;
|
||||
return (network_service, authority_discovery_service);
|
||||
},
|
||||
|
||||
NetworkBridgeTxMessage::AddToResolvedValidators { validator_addrs, peer_set } => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
action = "AddToResolvedValidators",
|
||||
peer_set = ?peer_set,
|
||||
?validator_addrs,
|
||||
"Received a resolved validator connection request",
|
||||
);
|
||||
|
||||
let all_addrs = validator_addrs.into_iter().flatten().collect();
|
||||
let network_service = validator_discovery
|
||||
.on_add_to_resolved_request(all_addrs, peer_set, network_service)
|
||||
.await;
|
||||
return (network_service, authority_discovery_service);
|
||||
},
|
||||
}
|
||||
(network_service, authority_discovery_service)
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(NetworkBridgeTx, prefix = self::overseer)]
|
||||
async fn run_network_out<N, AD, Context>(
|
||||
bridge: NetworkBridgeTx<N, AD>,
|
||||
ctx: Context,
|
||||
) -> Result<(), Error>
|
||||
where
|
||||
N: Network,
|
||||
AD: validator_discovery::AuthorityDiscovery + Clone + Sync,
|
||||
{
|
||||
let NetworkBridgeTx {
|
||||
network_service,
|
||||
authority_discovery_service,
|
||||
metrics,
|
||||
req_protocol_names,
|
||||
peerset_protocol_names,
|
||||
notification_sinks,
|
||||
} = bridge;
|
||||
|
||||
handle_subsystem_messages(
|
||||
ctx,
|
||||
network_service,
|
||||
authority_discovery_service,
|
||||
metrics,
|
||||
req_protocol_names,
|
||||
peerset_protocol_names,
|
||||
notification_sinks,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,371 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use super::*;
|
||||
use futures::executor;
|
||||
use pezkuwi_node_subsystem_util::TimeoutExt;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use parking_lot::Mutex;
|
||||
use std::collections::HashSet;
|
||||
|
||||
use sc_network::{
|
||||
IfDisconnected, ObservedRole as SubstrateObservedRole, ProtocolName, ReputationChange, Roles,
|
||||
};
|
||||
|
||||
use codec::DecodeAll;
|
||||
use pezkuwi_node_network_protocol::{
|
||||
peer_set::PeerSetProtocolNames,
|
||||
request_response::{outgoing::Requests, ReqProtocolNames},
|
||||
v1 as protocol_v1, v3 as protocol_v3, CollationProtocols, ObservedRole, ValidationProtocols,
|
||||
};
|
||||
use pezkuwi_node_subsystem::{FromOrchestra, OverseerSignal};
|
||||
use pezkuwi_node_subsystem_test_helpers::TestSubsystemContextHandle;
|
||||
use pezkuwi_node_subsystem_util::metered;
|
||||
use pezkuwi_primitives::{AuthorityDiscoveryId, Hash};
|
||||
use pezkuwi_primitives_test_helpers::dummy_collator_signature;
|
||||
use sc_network::Multiaddr;
|
||||
use sp_keyring::Sr25519Keyring;
|
||||
|
||||
const TIMEOUT: std::time::Duration = pezkuwi_node_subsystem_test_helpers::TestSubsystemContextHandle::<NetworkBridgeTxMessage>::TIMEOUT;
|
||||
|
||||
use crate::{network::Network, validator_discovery::AuthorityDiscovery};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum NetworkAction {
|
||||
/// Note a change in reputation for a peer.
|
||||
ReputationChange(PeerId, ReputationChange),
|
||||
/// Disconnect a peer from the given peer-set.
|
||||
DisconnectPeer(PeerId, PeerSet),
|
||||
/// Write a notification to a given peer on the given peer-set.
|
||||
WriteNotification(PeerId, PeerSet, Vec<u8>),
|
||||
}
|
||||
|
||||
// The subsystem's view of the network.
|
||||
#[derive(Clone)]
|
||||
struct TestNetwork {
|
||||
action_tx: Arc<Mutex<metered::UnboundedMeteredSender<NetworkAction>>>,
|
||||
peerset_protocol_names: Arc<PeerSetProtocolNames>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct TestAuthorityDiscovery;
|
||||
|
||||
// The test's view of the network. This receives updates from the subsystem in the form
|
||||
// of `NetworkAction`s.
|
||||
struct TestNetworkHandle {
|
||||
action_rx: metered::UnboundedMeteredReceiver<NetworkAction>,
|
||||
_peerset_protocol_names: PeerSetProtocolNames,
|
||||
notification_sinks: Arc<Mutex<HashMap<(PeerSet, PeerId), Box<dyn MessageSink>>>>,
|
||||
action_tx: Arc<Mutex<metered::UnboundedMeteredSender<NetworkAction>>>,
|
||||
}
|
||||
|
||||
struct TestMessageSink {
|
||||
peer: PeerId,
|
||||
peer_set: PeerSet,
|
||||
action_tx: Arc<Mutex<metered::UnboundedMeteredSender<NetworkAction>>>,
|
||||
}
|
||||
|
||||
impl TestMessageSink {
|
||||
fn new(
|
||||
peer: PeerId,
|
||||
peer_set: PeerSet,
|
||||
action_tx: Arc<Mutex<metered::UnboundedMeteredSender<NetworkAction>>>,
|
||||
) -> TestMessageSink {
|
||||
Self { peer, peer_set, action_tx }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MessageSink for TestMessageSink {
|
||||
fn send_sync_notification(&self, notification: Vec<u8>) {
|
||||
self.action_tx
|
||||
.lock()
|
||||
.unbounded_send(NetworkAction::WriteNotification(
|
||||
self.peer,
|
||||
self.peer_set,
|
||||
notification,
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
async fn send_async_notification(
|
||||
&self,
|
||||
_notification: Vec<u8>,
|
||||
) -> Result<(), sc_network::error::Error> {
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
|
||||
fn new_test_network(
|
||||
peerset_protocol_names: PeerSetProtocolNames,
|
||||
) -> (
|
||||
TestNetwork,
|
||||
TestNetworkHandle,
|
||||
TestAuthorityDiscovery,
|
||||
Arc<Mutex<HashMap<(PeerSet, PeerId), Box<dyn MessageSink>>>>,
|
||||
) {
|
||||
let (action_tx, action_rx) = metered::unbounded();
|
||||
let notification_sinks = Arc::new(Mutex::new(HashMap::new()));
|
||||
let action_tx = Arc::new(Mutex::new(action_tx));
|
||||
|
||||
(
|
||||
TestNetwork {
|
||||
action_tx: action_tx.clone(),
|
||||
peerset_protocol_names: Arc::new(peerset_protocol_names.clone()),
|
||||
},
|
||||
TestNetworkHandle {
|
||||
action_rx,
|
||||
_peerset_protocol_names: peerset_protocol_names,
|
||||
action_tx,
|
||||
notification_sinks: notification_sinks.clone(),
|
||||
},
|
||||
TestAuthorityDiscovery,
|
||||
notification_sinks,
|
||||
)
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Network for TestNetwork {
|
||||
async fn set_reserved_peers(
|
||||
&mut self,
|
||||
_protocol: ProtocolName,
|
||||
_: HashSet<Multiaddr>,
|
||||
) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn add_peers_to_reserved_set(
|
||||
&mut self,
|
||||
_protocol: ProtocolName,
|
||||
_: HashSet<Multiaddr>,
|
||||
) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn remove_from_peers_set(
|
||||
&mut self,
|
||||
_protocol: ProtocolName,
|
||||
_: Vec<PeerId>,
|
||||
) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn start_request<AD: AuthorityDiscovery>(
|
||||
&self,
|
||||
_: &mut AD,
|
||||
_: Requests,
|
||||
_: &ReqProtocolNames,
|
||||
_: IfDisconnected,
|
||||
) {
|
||||
}
|
||||
|
||||
fn report_peer(&self, who: PeerId, rep: ReputationChange) {
|
||||
self.action_tx
|
||||
.lock()
|
||||
.unbounded_send(NetworkAction::ReputationChange(who, rep))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
fn disconnect_peer(&self, who: PeerId, protocol: ProtocolName) {
|
||||
let (peer_set, version) = self.peerset_protocol_names.try_get_protocol(&protocol).unwrap();
|
||||
assert_eq!(version, peer_set.get_main_version());
|
||||
|
||||
self.action_tx
|
||||
.lock()
|
||||
.unbounded_send(NetworkAction::DisconnectPeer(who, peer_set))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
fn peer_role(&self, _peer_id: PeerId, handshake: Vec<u8>) -> Option<SubstrateObservedRole> {
|
||||
Roles::decode_all(&mut &handshake[..])
|
||||
.ok()
|
||||
.and_then(|role| Some(SubstrateObservedRole::from(role)))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl validator_discovery::AuthorityDiscovery for TestAuthorityDiscovery {
|
||||
async fn get_addresses_by_authority_id(
|
||||
&mut self,
|
||||
_authority: AuthorityDiscoveryId,
|
||||
) -> Option<HashSet<Multiaddr>> {
|
||||
None
|
||||
}
|
||||
|
||||
async fn get_authority_ids_by_peer_id(
|
||||
&mut self,
|
||||
_peer_id: PeerId,
|
||||
) -> Option<HashSet<AuthorityDiscoveryId>> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl TestNetworkHandle {
|
||||
// Get the next network action.
|
||||
async fn next_network_action(&mut self) -> NetworkAction {
|
||||
self.action_rx.next().await.expect("subsystem concluded early")
|
||||
}
|
||||
|
||||
async fn connect_peer(&mut self, peer: PeerId, peer_set: PeerSet, _role: ObservedRole) {
|
||||
self.notification_sinks.lock().insert(
|
||||
(peer_set, peer),
|
||||
Box::new(TestMessageSink::new(peer, peer_set, self.action_tx.clone())),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
type VirtualOverseer = TestSubsystemContextHandle<NetworkBridgeTxMessage>;
|
||||
|
||||
struct TestHarness {
|
||||
network_handle: TestNetworkHandle,
|
||||
virtual_overseer: VirtualOverseer,
|
||||
}
|
||||
|
||||
fn test_harness<T: Future<Output = VirtualOverseer>>(test: impl FnOnce(TestHarness) -> T) {
|
||||
let genesis_hash = Hash::repeat_byte(0xff);
|
||||
let fork_id = None;
|
||||
let req_protocol_names = ReqProtocolNames::new(genesis_hash, fork_id);
|
||||
let peerset_protocol_names = PeerSetProtocolNames::new(genesis_hash, fork_id);
|
||||
|
||||
let pool = sp_core::testing::TaskExecutor::new();
|
||||
let (network, network_handle, discovery, network_notification_sinks) =
|
||||
new_test_network(peerset_protocol_names.clone());
|
||||
|
||||
let (context, virtual_overseer) =
|
||||
pezkuwi_node_subsystem_test_helpers::make_subsystem_context(pool);
|
||||
|
||||
let bridge_out = NetworkBridgeTx::new(
|
||||
network,
|
||||
discovery,
|
||||
Metrics(None),
|
||||
req_protocol_names,
|
||||
peerset_protocol_names,
|
||||
network_notification_sinks,
|
||||
);
|
||||
|
||||
let network_bridge_out_fut = run_network_out(bridge_out, context)
|
||||
.map_err(|e| panic!("bridge-out subsystem execution failed {:?}", e))
|
||||
.map(|_| ());
|
||||
|
||||
let test_fut = test(TestHarness { network_handle, virtual_overseer });
|
||||
|
||||
futures::pin_mut!(test_fut);
|
||||
futures::pin_mut!(network_bridge_out_fut);
|
||||
|
||||
let _ = executor::block_on(future::join(
|
||||
async move {
|
||||
let mut virtual_overseer = test_fut.await;
|
||||
virtual_overseer.send(FromOrchestra::Signal(OverseerSignal::Conclude)).await;
|
||||
},
|
||||
network_bridge_out_fut,
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn send_messages_to_peers() {
|
||||
test_harness(|test_harness| async move {
|
||||
let TestHarness { mut network_handle, mut virtual_overseer } = test_harness;
|
||||
|
||||
let peer = PeerId::random();
|
||||
|
||||
network_handle
|
||||
.connect_peer(peer, PeerSet::Validation, ObservedRole::Full)
|
||||
.timeout(TIMEOUT)
|
||||
.await
|
||||
.expect("Timeout does not occur");
|
||||
|
||||
// the outgoing side does not consume network messages
|
||||
// so the single item sink has to be free explicitly
|
||||
|
||||
network_handle
|
||||
.connect_peer(peer, PeerSet::Collation, ObservedRole::Full)
|
||||
.timeout(TIMEOUT)
|
||||
.await
|
||||
.expect("Timeout does not occur");
|
||||
|
||||
// send a validation protocol message.
|
||||
|
||||
{
|
||||
let approval_distribution_message =
|
||||
protocol_v3::ApprovalDistributionMessage::Approvals(Vec::new());
|
||||
|
||||
let message_v1 = protocol_v3::ValidationProtocol::ApprovalDistribution(
|
||||
approval_distribution_message.clone(),
|
||||
);
|
||||
|
||||
virtual_overseer
|
||||
.send(FromOrchestra::Communication {
|
||||
msg: NetworkBridgeTxMessage::SendValidationMessage(
|
||||
vec![peer],
|
||||
ValidationProtocols::V3(message_v1.clone()),
|
||||
),
|
||||
})
|
||||
.timeout(TIMEOUT)
|
||||
.await
|
||||
.expect("Timeout does not occur");
|
||||
|
||||
assert_eq!(
|
||||
network_handle
|
||||
.next_network_action()
|
||||
.timeout(TIMEOUT)
|
||||
.await
|
||||
.expect("Timeout does not occur"),
|
||||
NetworkAction::WriteNotification(
|
||||
peer,
|
||||
PeerSet::Validation,
|
||||
WireMessage::ProtocolMessage(message_v1).encode(),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// send a collation protocol message.
|
||||
|
||||
{
|
||||
let collator_protocol_message = protocol_v1::CollatorProtocolMessage::Declare(
|
||||
Sr25519Keyring::Alice.public().into(),
|
||||
0_u32.into(),
|
||||
dummy_collator_signature(),
|
||||
);
|
||||
|
||||
let message_v1 =
|
||||
protocol_v1::CollationProtocol::CollatorProtocol(collator_protocol_message.clone());
|
||||
|
||||
virtual_overseer
|
||||
.send(FromOrchestra::Communication {
|
||||
msg: NetworkBridgeTxMessage::SendCollationMessage(
|
||||
vec![peer],
|
||||
CollationProtocols::V1(message_v1.clone()),
|
||||
),
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
network_handle
|
||||
.next_network_action()
|
||||
.timeout(TIMEOUT)
|
||||
.await
|
||||
.expect("Timeout does not occur"),
|
||||
NetworkAction::WriteNotification(
|
||||
peer,
|
||||
PeerSet::Collation,
|
||||
WireMessage::ProtocolMessage(message_v1).encode(),
|
||||
)
|
||||
);
|
||||
}
|
||||
virtual_overseer
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,413 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! A validator discovery service for the Network Bridge.
|
||||
|
||||
use crate::Network;
|
||||
|
||||
use core::marker::PhantomData;
|
||||
use std::collections::HashSet;
|
||||
|
||||
use futures::channel::oneshot;
|
||||
|
||||
use sc_network::multiaddr::{self, Multiaddr};
|
||||
|
||||
pub use pezkuwi_node_network_protocol::authority_discovery::AuthorityDiscovery;
|
||||
use pezkuwi_node_network_protocol::{
|
||||
peer_set::{PeerSet, PeerSetProtocolNames, PerPeerSet},
|
||||
PeerId,
|
||||
};
|
||||
use pezkuwi_primitives::AuthorityDiscoveryId;
|
||||
|
||||
const LOG_TARGET: &str = "teyrchain::validator-discovery";
|
||||
|
||||
pub(super) struct Service<N, AD> {
|
||||
state: PerPeerSet<StatePerPeerSet>,
|
||||
peerset_protocol_names: PeerSetProtocolNames,
|
||||
// PhantomData used to make the struct generic instead of having generic methods
|
||||
_phantom: PhantomData<(N, AD)>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct StatePerPeerSet {
|
||||
previously_requested: HashSet<PeerId>,
|
||||
}
|
||||
|
||||
impl<N: Network, AD: AuthorityDiscovery> Service<N, AD> {
|
||||
pub fn new(peerset_protocol_names: PeerSetProtocolNames) -> Self {
|
||||
Self { state: Default::default(), peerset_protocol_names, _phantom: PhantomData }
|
||||
}
|
||||
|
||||
/// Connect to already resolved addresses.
|
||||
pub async fn on_resolved_request(
|
||||
&mut self,
|
||||
newly_requested: HashSet<Multiaddr>,
|
||||
peer_set: PeerSet,
|
||||
mut network_service: N,
|
||||
) -> N {
|
||||
let state = &mut self.state[peer_set];
|
||||
let new_peer_ids: HashSet<PeerId> = extract_peer_ids(newly_requested.iter().cloned());
|
||||
let num_peers = new_peer_ids.len();
|
||||
|
||||
let peers_to_remove: Vec<PeerId> =
|
||||
state.previously_requested.difference(&new_peer_ids).cloned().collect();
|
||||
let removed = peers_to_remove.len();
|
||||
state.previously_requested = new_peer_ids;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer_set,
|
||||
?num_peers,
|
||||
?removed,
|
||||
"New ConnectToValidators resolved request",
|
||||
);
|
||||
// ask the network to connect to these nodes and not disconnect
|
||||
// from them until removed from the set
|
||||
//
|
||||
// for peer-set management, the main protocol name should be used regardless of
|
||||
// the negotiated version.
|
||||
if let Err(e) = network_service
|
||||
.set_reserved_peers(
|
||||
self.peerset_protocol_names.get_main_name(peer_set),
|
||||
newly_requested,
|
||||
)
|
||||
.await
|
||||
{
|
||||
gum::warn!(target: LOG_TARGET, err = ?e, "AuthorityDiscoveryService returned an invalid multiaddress");
|
||||
}
|
||||
|
||||
network_service
|
||||
}
|
||||
|
||||
/// Connect to already resolved addresses.
|
||||
pub async fn on_add_to_resolved_request(
|
||||
&mut self,
|
||||
newly_requested: HashSet<Multiaddr>,
|
||||
peer_set: PeerSet,
|
||||
mut network_service: N,
|
||||
) -> N {
|
||||
let state = &mut self.state[peer_set];
|
||||
let new_peer_ids: HashSet<PeerId> = extract_peer_ids(newly_requested.iter().cloned());
|
||||
let num_peers = new_peer_ids.len();
|
||||
|
||||
state.previously_requested.extend(new_peer_ids);
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer_set,
|
||||
?num_peers,
|
||||
"New add to resolved validators request",
|
||||
);
|
||||
|
||||
// ask the network to connect to these nodes and not disconnect
|
||||
// from them until they are removed from the set.
|
||||
//
|
||||
// for peer-set management, the main protocol name should be used regardless of
|
||||
// the negotiated version.
|
||||
if let Err(e) = network_service
|
||||
.add_peers_to_reserved_set(
|
||||
self.peerset_protocol_names.get_main_name(peer_set),
|
||||
newly_requested,
|
||||
)
|
||||
.await
|
||||
{
|
||||
gum::warn!(target: LOG_TARGET, err = ?e, "AuthorityDiscoveryService returned an invalid multiaddress");
|
||||
}
|
||||
|
||||
network_service
|
||||
}
|
||||
|
||||
/// On a new connection request, a peer set update will be issued.
|
||||
/// It will ask the network to connect to the validators and not disconnect
|
||||
/// from them at least until the next request is issued for the same peer set.
|
||||
///
|
||||
/// This method will also disconnect from previously connected validators not in the
|
||||
/// `validator_ids` set. it takes `network_service` and `authority_discovery_service` by value
|
||||
/// and returns them as a workaround for the Future: Send requirement imposed by async function
|
||||
/// implementation.
|
||||
pub async fn on_request(
|
||||
&mut self,
|
||||
validator_ids: Vec<AuthorityDiscoveryId>,
|
||||
peer_set: PeerSet,
|
||||
failed: oneshot::Sender<usize>,
|
||||
network_service: N,
|
||||
mut authority_discovery_service: AD,
|
||||
) -> (N, AD) {
|
||||
// collect multiaddress of validators
|
||||
let mut failed_to_resolve: usize = 0;
|
||||
let mut newly_requested = HashSet::new();
|
||||
let requested = validator_ids.len();
|
||||
for authority in validator_ids.into_iter() {
|
||||
let result = authority_discovery_service
|
||||
.get_addresses_by_authority_id(authority.clone())
|
||||
.await;
|
||||
if let Some(addresses) = result {
|
||||
newly_requested.extend(addresses);
|
||||
} else {
|
||||
failed_to_resolve += 1;
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
"Authority Discovery couldn't resolve {:?}",
|
||||
authority
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer_set,
|
||||
?requested,
|
||||
?failed_to_resolve,
|
||||
"New ConnectToValidators request",
|
||||
);
|
||||
|
||||
let r = self.on_resolved_request(newly_requested, peer_set, network_service).await;
|
||||
|
||||
let _ = failed.send(failed_to_resolve);
|
||||
|
||||
(r, authority_discovery_service)
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_peer_ids(multiaddr: impl Iterator<Item = Multiaddr>) -> HashSet<PeerId> {
|
||||
multiaddr
|
||||
.filter_map(|mut addr| match addr.pop() {
|
||||
Some(multiaddr::Protocol::P2p(key)) => PeerId::from_multihash(key).ok(),
|
||||
_ => None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::network::Network;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use pezkuwi_node_network_protocol::{
|
||||
request_response::{outgoing::Requests, ReqProtocolNames},
|
||||
PeerId,
|
||||
};
|
||||
use pezkuwi_primitives::Hash;
|
||||
use sc_network::{IfDisconnected, ProtocolName, ReputationChange};
|
||||
use sp_keyring::Sr25519Keyring;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
fn new_service() -> Service<TestNetwork, TestAuthorityDiscovery> {
|
||||
let genesis_hash = Hash::repeat_byte(0xff);
|
||||
let fork_id = None;
|
||||
let protocol_names = PeerSetProtocolNames::new(genesis_hash, fork_id);
|
||||
|
||||
Service::new(protocol_names)
|
||||
}
|
||||
|
||||
fn new_network() -> (TestNetwork, TestAuthorityDiscovery) {
|
||||
(TestNetwork::default(), TestAuthorityDiscovery::new())
|
||||
}
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
struct TestNetwork {
|
||||
peers_set: HashSet<PeerId>,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
struct TestAuthorityDiscovery {
|
||||
by_authority_id: HashMap<AuthorityDiscoveryId, HashSet<Multiaddr>>,
|
||||
by_peer_id: HashMap<PeerId, HashSet<AuthorityDiscoveryId>>,
|
||||
}
|
||||
|
||||
impl TestAuthorityDiscovery {
|
||||
fn new() -> Self {
|
||||
let peer_ids = known_peer_ids();
|
||||
let authorities = known_authorities();
|
||||
let multiaddr = known_multiaddr().into_iter().zip(peer_ids.iter().cloned()).map(
|
||||
|(mut addr, peer_id)| {
|
||||
addr.push(multiaddr::Protocol::P2p(peer_id.into()));
|
||||
HashSet::from([addr])
|
||||
},
|
||||
);
|
||||
Self {
|
||||
by_authority_id: authorities.iter().cloned().zip(multiaddr).collect(),
|
||||
by_peer_id: peer_ids
|
||||
.into_iter()
|
||||
.zip(authorities.into_iter().map(|a| HashSet::from([a])))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Network for TestNetwork {
|
||||
async fn set_reserved_peers(
|
||||
&mut self,
|
||||
_protocol: ProtocolName,
|
||||
multiaddresses: HashSet<Multiaddr>,
|
||||
) -> Result<(), String> {
|
||||
self.peers_set = extract_peer_ids(multiaddresses.into_iter());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn add_peers_to_reserved_set(
|
||||
&mut self,
|
||||
_protocol: ProtocolName,
|
||||
multiaddresses: HashSet<Multiaddr>,
|
||||
) -> Result<(), String> {
|
||||
self.peers_set.extend(extract_peer_ids(multiaddresses.into_iter()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn remove_from_peers_set(
|
||||
&mut self,
|
||||
_protocol: ProtocolName,
|
||||
peers: Vec<PeerId>,
|
||||
) -> Result<(), String> {
|
||||
self.peers_set.retain(|elem| !peers.contains(elem));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn start_request<AD: AuthorityDiscovery>(
|
||||
&self,
|
||||
_: &mut AD,
|
||||
_: Requests,
|
||||
_: &ReqProtocolNames,
|
||||
_: IfDisconnected,
|
||||
) {
|
||||
}
|
||||
|
||||
fn report_peer(&self, _: PeerId, _: ReputationChange) {
|
||||
panic!()
|
||||
}
|
||||
|
||||
fn disconnect_peer(&self, _: PeerId, _: ProtocolName) {
|
||||
panic!()
|
||||
}
|
||||
|
||||
fn peer_role(
|
||||
&self,
|
||||
_peer_id: PeerId,
|
||||
_handshake: Vec<u8>,
|
||||
) -> Option<sc_network::ObservedRole> {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AuthorityDiscovery for TestAuthorityDiscovery {
|
||||
async fn get_addresses_by_authority_id(
|
||||
&mut self,
|
||||
authority: AuthorityDiscoveryId,
|
||||
) -> Option<HashSet<Multiaddr>> {
|
||||
self.by_authority_id.get(&authority).cloned()
|
||||
}
|
||||
|
||||
async fn get_authority_ids_by_peer_id(
|
||||
&mut self,
|
||||
peer_id: PeerId,
|
||||
) -> Option<HashSet<AuthorityDiscoveryId>> {
|
||||
self.by_peer_id.get(&peer_id).cloned()
|
||||
}
|
||||
}
|
||||
|
||||
fn known_authorities() -> Vec<AuthorityDiscoveryId> {
|
||||
[Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie]
|
||||
.iter()
|
||||
.map(|k| k.public().into())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn known_peer_ids() -> Vec<PeerId> {
|
||||
(0..3).map(|_| PeerId::random()).collect()
|
||||
}
|
||||
|
||||
fn known_multiaddr() -> Vec<Multiaddr> {
|
||||
vec![
|
||||
"/ip4/127.0.0.1/tcp/1234".parse().unwrap(),
|
||||
"/ip4/127.0.0.1/tcp/1235".parse().unwrap(),
|
||||
"/ip4/127.0.0.1/tcp/1236".parse().unwrap(),
|
||||
]
|
||||
}
|
||||
// Test cleanup works.
|
||||
#[test]
|
||||
fn old_multiaddrs_are_removed_on_new_request() {
|
||||
let mut service = new_service();
|
||||
|
||||
let (ns, ads) = new_network();
|
||||
|
||||
let authority_ids: Vec<_> =
|
||||
ads.by_peer_id.values().flat_map(|v| v.iter()).cloned().collect();
|
||||
|
||||
futures::executor::block_on(async move {
|
||||
let (failed, _) = oneshot::channel();
|
||||
let (ns, ads) = service
|
||||
.on_request(vec![authority_ids[0].clone()], PeerSet::Validation, failed, ns, ads)
|
||||
.await;
|
||||
|
||||
let (failed, _) = oneshot::channel();
|
||||
let (_, ads) = service
|
||||
.on_request(vec![authority_ids[1].clone()], PeerSet::Validation, failed, ns, ads)
|
||||
.await;
|
||||
|
||||
let state = &service.state[PeerSet::Validation];
|
||||
assert_eq!(state.previously_requested.len(), 1);
|
||||
let peer_1 = extract_peer_ids(
|
||||
ads.by_authority_id.get(&authority_ids[1]).unwrap().clone().into_iter(),
|
||||
)
|
||||
.iter()
|
||||
.cloned()
|
||||
.next()
|
||||
.unwrap();
|
||||
assert!(state.previously_requested.contains(&peer_1));
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failed_resolution_is_reported_properly() {
|
||||
let mut service = new_service();
|
||||
|
||||
let (ns, ads) = new_network();
|
||||
|
||||
let authority_ids: Vec<_> =
|
||||
ads.by_peer_id.values().flat_map(|v| v.iter()).cloned().collect();
|
||||
|
||||
futures::executor::block_on(async move {
|
||||
let (failed, failed_rx) = oneshot::channel();
|
||||
let unknown = Sr25519Keyring::Ferdie.public().into();
|
||||
let (_, ads) = service
|
||||
.on_request(
|
||||
vec![authority_ids[0].clone(), unknown],
|
||||
PeerSet::Validation,
|
||||
failed,
|
||||
ns,
|
||||
ads,
|
||||
)
|
||||
.await;
|
||||
|
||||
let state = &service.state[PeerSet::Validation];
|
||||
assert_eq!(state.previously_requested.len(), 1);
|
||||
let peer_0 = extract_peer_ids(
|
||||
ads.by_authority_id.get(&authority_ids[0]).unwrap().clone().into_iter(),
|
||||
)
|
||||
.iter()
|
||||
.cloned()
|
||||
.next()
|
||||
.unwrap();
|
||||
assert!(state.previously_requested.contains(&peer_0));
|
||||
|
||||
let failed = failed_rx.await.unwrap();
|
||||
assert_eq!(failed, 1);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
[package]
|
||||
name = "pezkuwi-collator-protocol"
|
||||
version = "7.0.0"
|
||||
description = "Pezkuwi Collator Protocol subsystem. Allows collators and validators to talk to each other."
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = { workspace = true, optional = true }
|
||||
bitvec = { features = ["alloc"], workspace = true }
|
||||
futures = { workspace = true }
|
||||
futures-timer = { workspace = true }
|
||||
gum = { workspace = true, default-features = true }
|
||||
schnellru = { workspace = true }
|
||||
|
||||
sp-core = { workspace = true, default-features = true }
|
||||
sp-keystore = { workspace = true, default-features = true }
|
||||
sp-runtime = { workspace = true, default-features = true }
|
||||
|
||||
fatality = { workspace = true }
|
||||
pezkuwi-node-network-protocol = { workspace = true, default-features = true }
|
||||
pezkuwi-node-primitives = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem-util = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives = { workspace = true, default-features = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio-util = { workspace = true }
|
||||
# This should have really been a dev-dependency but clippy is complaining that it's not used with
|
||||
# experimental-collator-protocol disabled, while the rust compiler claims that having optional
|
||||
# dev-dependencies is not possible.
|
||||
tokio = { features = [
|
||||
"macros",
|
||||
], workspace = true, default-features = true, optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = { workspace = true }
|
||||
rstest = { workspace = true }
|
||||
sp-tracing = { workspace = true }
|
||||
|
||||
codec = { features = ["std"], workspace = true, default-features = true }
|
||||
sc-keystore = { workspace = true, default-features = true }
|
||||
sc-network = { workspace = true, default-features = true }
|
||||
sp-core = { features = ["std"], workspace = true, default-features = true }
|
||||
sp-keyring = { workspace = true, default-features = true }
|
||||
|
||||
itertools = { workspace = true }
|
||||
pezkuwi-node-subsystem-test-helpers = { workspace = true }
|
||||
pezkuwi-primitives-test-helpers = { workspace = true }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
experimental-collator-protocol = ["async-trait", "tokio"]
|
||||
runtime-benchmarks = [
|
||||
"gum/runtime-benchmarks",
|
||||
"pezkuwi-node-network-protocol/runtime-benchmarks",
|
||||
"pezkuwi-node-primitives/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-util/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem/runtime-benchmarks",
|
||||
"pezkuwi-primitives-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-primitives/runtime-benchmarks",
|
||||
"sc-network/runtime-benchmarks",
|
||||
"sp-keyring/runtime-benchmarks",
|
||||
"sp-runtime/runtime-benchmarks",
|
||||
]
|
||||
@@ -0,0 +1,165 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Primitives for tracking collations-related data.
|
||||
|
||||
use std::collections::{HashSet, VecDeque};
|
||||
|
||||
use futures::{future::BoxFuture, stream::FuturesUnordered};
|
||||
|
||||
use pezkuwi_node_network_protocol::{
|
||||
request_response::{incoming::OutgoingResponse, v2 as protocol_v2, IncomingRequest},
|
||||
PeerId,
|
||||
};
|
||||
use pezkuwi_node_primitives::PoV;
|
||||
use pezkuwi_primitives::{
|
||||
CandidateHash, CandidateReceiptV2 as CandidateReceipt, Hash, HeadData, Id as ParaId,
|
||||
};
|
||||
|
||||
/// The status of a collation as seen from the collator.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum CollationStatus {
|
||||
/// The collation was created, but we did not advertise it to any validator.
|
||||
Created,
|
||||
/// The collation was advertised to at least one validator.
|
||||
Advertised,
|
||||
/// The collation was requested by at least one validator.
|
||||
Requested,
|
||||
}
|
||||
|
||||
impl CollationStatus {
|
||||
/// Advance to the [`Self::Advertised`] status.
|
||||
///
|
||||
/// This ensures that `self` isn't already [`Self::Requested`].
|
||||
pub fn advance_to_advertised(&mut self) {
|
||||
if !matches!(self, Self::Requested) {
|
||||
*self = Self::Advertised;
|
||||
}
|
||||
}
|
||||
|
||||
/// Advance to the [`Self::Requested`] status.
|
||||
pub fn advance_to_requested(&mut self) {
|
||||
*self = Self::Requested;
|
||||
}
|
||||
|
||||
/// Return label for metrics.
|
||||
pub fn label(&self) -> &'static str {
|
||||
match self {
|
||||
CollationStatus::Created => "created",
|
||||
CollationStatus::Advertised => "advertised",
|
||||
CollationStatus::Requested => "requested",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A collation built by the collator.
|
||||
pub struct Collation {
|
||||
/// Candidate receipt.
|
||||
pub receipt: CandidateReceipt,
|
||||
/// Proof to verify the state transition of the teyrchain.
|
||||
pub pov: PoV,
|
||||
/// Parent head-data
|
||||
pub parent_head_data: HeadData,
|
||||
/// Collation status.
|
||||
pub status: CollationStatus,
|
||||
}
|
||||
|
||||
/// Stores the state for waiting collation fetches per relay parent.
|
||||
#[derive(Default)]
|
||||
pub struct WaitingCollationFetches {
|
||||
/// A flag indicating that we have an ongoing request.
|
||||
/// This limits the number of collations being sent at any moment
|
||||
/// of time to 1 for each relay parent.
|
||||
///
|
||||
/// If set to `true`, any new request will be queued.
|
||||
pub collation_fetch_active: bool,
|
||||
/// The collation fetches waiting to be fulfilled.
|
||||
pub req_queue: VecDeque<VersionedCollationRequest>,
|
||||
/// All peers that are waiting or actively uploading.
|
||||
///
|
||||
/// We will not accept multiple requests from the same peer, otherwise our DoS protection of
|
||||
/// moving on to the next peer after `MAX_UNSHARED_UPLOAD_TIME` would be pointless.
|
||||
pub waiting_peers: HashSet<(PeerId, CandidateHash)>,
|
||||
}
|
||||
|
||||
/// Backwards-compatible wrapper for incoming collations requests.
|
||||
pub enum VersionedCollationRequest {
|
||||
V2(IncomingRequest<protocol_v2::CollationFetchingRequest>),
|
||||
}
|
||||
|
||||
impl From<IncomingRequest<protocol_v2::CollationFetchingRequest>> for VersionedCollationRequest {
|
||||
fn from(req: IncomingRequest<protocol_v2::CollationFetchingRequest>) -> Self {
|
||||
Self::V2(req)
|
||||
}
|
||||
}
|
||||
|
||||
impl VersionedCollationRequest {
|
||||
/// Returns teyrchain id from the request payload.
|
||||
pub fn para_id(&self) -> ParaId {
|
||||
match self {
|
||||
VersionedCollationRequest::V2(req) => req.payload.para_id,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns candidate hash from the request payload.
|
||||
pub fn candidate_hash(&self) -> CandidateHash {
|
||||
match self {
|
||||
VersionedCollationRequest::V2(req) => req.payload.candidate_hash,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns relay parent from the request payload.
|
||||
pub fn relay_parent(&self) -> Hash {
|
||||
match self {
|
||||
VersionedCollationRequest::V2(req) => req.payload.relay_parent,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns id of the peer the request was received from.
|
||||
pub fn peer_id(&self) -> PeerId {
|
||||
match self {
|
||||
VersionedCollationRequest::V2(req) => req.peer,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sends the response back to requester.
|
||||
pub fn send_outgoing_response(
|
||||
self,
|
||||
response: OutgoingResponse<protocol_v2::CollationFetchingResponse>,
|
||||
) -> Result<(), ()> {
|
||||
match self {
|
||||
VersionedCollationRequest::V2(req) => req.send_outgoing_response(response),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of the finished background send-collation task.
|
||||
///
|
||||
/// Note that if the timeout was hit the request doesn't get
|
||||
/// aborted, it only indicates that we should start processing
|
||||
/// the next one from the queue.
|
||||
pub struct CollationSendResult {
|
||||
/// Candidate's relay parent.
|
||||
pub relay_parent: Hash,
|
||||
/// Candidate hash.
|
||||
pub candidate_hash: CandidateHash,
|
||||
/// Peer id.
|
||||
pub peer_id: PeerId,
|
||||
/// Whether the max unshared timeout was hit.
|
||||
pub timed_out: bool,
|
||||
}
|
||||
|
||||
pub type ActiveCollationFetches = FuturesUnordered<BoxFuture<'static, CollationSendResult>>;
|
||||
@@ -0,0 +1,66 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::incoming;
|
||||
use pezkuwi_node_primitives::UncheckedSignedFullStatement;
|
||||
use pezkuwi_node_subsystem::{errors::SubsystemError, RuntimeApiError};
|
||||
use pezkuwi_node_subsystem_util::{backing_implicit_view, runtime};
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
/// General result.
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
use fatality::Nested;
|
||||
|
||||
#[allow(missing_docs)]
|
||||
#[fatality::fatality(splitable)]
|
||||
pub enum Error {
|
||||
#[fatal]
|
||||
#[error("Receiving message from overseer failed")]
|
||||
SubsystemReceive(#[from] SubsystemError),
|
||||
|
||||
#[fatal(forward)]
|
||||
#[error("Retrieving next incoming request failed")]
|
||||
IncomingRequest(#[from] incoming::Error),
|
||||
|
||||
#[fatal(forward)]
|
||||
#[error("Error while accessing runtime information")]
|
||||
Runtime(#[from] runtime::Error),
|
||||
|
||||
#[error("Error while accessing Runtime API")]
|
||||
RuntimeApi(#[from] RuntimeApiError),
|
||||
|
||||
#[error(transparent)]
|
||||
ImplicitViewFetchError(backing_implicit_view::FetchError),
|
||||
|
||||
#[error("CollationSeconded contained statement with invalid signature")]
|
||||
InvalidStatementSignature(UncheckedSignedFullStatement),
|
||||
}
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error. This utility function is meant to
|
||||
/// consume top-level errors by simply logging them.
|
||||
pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), FatalError> {
|
||||
match result.into_nested()? {
|
||||
Ok(()) => Ok(()),
|
||||
Err(jfyi) => {
|
||||
gum::warn!(target: LOG_TARGET, error = ?jfyi, ctx);
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,608 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use pezkuwi_node_subsystem::prometheus::prometheus::HistogramTimer;
|
||||
use pezkuwi_node_subsystem_util::metrics::{self, prometheus};
|
||||
use pezkuwi_primitives::{BlockNumber, CandidateReceiptV2 as CandidateReceipt, Hash};
|
||||
use sp_core::H256;
|
||||
|
||||
use super::collation::CollationStatus;
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
impl Metrics {
|
||||
/// Record the time a collation took to be backed.
|
||||
pub fn on_collation_backed(&self, latency: f64) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.collation_backing_latency.observe(latency);
|
||||
}
|
||||
}
|
||||
|
||||
/// Record the time a collation took to be included.
|
||||
pub fn on_collation_included(&self, latency: f64) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.collation_inclusion_latency.observe(latency);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_advertisement_made(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.advertisements_made.inc();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_collation_sent_requested(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.collations_send_requested.inc();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_collation_sent(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.collations_sent.inc();
|
||||
}
|
||||
}
|
||||
|
||||
/// Provide a timer for `process_msg` which observes on drop.
|
||||
pub fn time_process_msg(&self) -> Option<prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.process_msg.start_timer())
|
||||
}
|
||||
|
||||
/// Provide a timer for `distribute_collation` which observes on drop.
|
||||
pub fn time_collation_distribution(
|
||||
&self,
|
||||
label: &'static str,
|
||||
) -> Option<prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics.collation_distribution_time.with_label_values(&[label]).start_timer()
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a timer to measure how much time collations spend before being fetched.
|
||||
pub fn time_collation_fetch_latency(&self) -> Option<prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.collation_fetch_latency.start_timer())
|
||||
}
|
||||
|
||||
/// Create a timer to measure how much time it takes for fetched collations to be backed.
|
||||
pub fn time_collation_backing_latency(&self) -> Option<prometheus::prometheus::HistogramTimer> {
|
||||
self.0
|
||||
.as_ref()
|
||||
.map(|metrics| metrics.collation_backing_latency_time.start_timer())
|
||||
}
|
||||
|
||||
/// Record the time a collation took before expiring.
|
||||
/// Collations can expire in the following states: "advertised, fetched or backed"
|
||||
pub fn on_collation_expired(&self, latency: f64, state: &'static str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.collation_expired_total.with_label_values(&[state]).observe(latency);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
advertisements_made: prometheus::Counter<prometheus::U64>,
|
||||
collations_sent: prometheus::Counter<prometheus::U64>,
|
||||
collations_send_requested: prometheus::Counter<prometheus::U64>,
|
||||
process_msg: prometheus::Histogram,
|
||||
collation_distribution_time: prometheus::HistogramVec,
|
||||
collation_fetch_latency: prometheus::Histogram,
|
||||
collation_backing_latency_time: prometheus::Histogram,
|
||||
collation_backing_latency: prometheus::Histogram,
|
||||
collation_inclusion_latency: prometheus::Histogram,
|
||||
collation_expired_total: prometheus::HistogramVec,
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(
|
||||
registry: &prometheus::Registry,
|
||||
) -> std::result::Result<Self, prometheus::PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
advertisements_made: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_teyrchain_collation_advertisements_made_total",
|
||||
"A number of collation advertisements sent to validators.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
collations_send_requested: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_teyrchain_collations_sent_requested_total",
|
||||
"A number of collations requested to be sent to validators.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
collations_sent: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_teyrchain_collations_sent_total",
|
||||
"A number of collations sent to validators.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
process_msg: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collator_protocol_collator_process_msg",
|
||||
"Time spent within `collator_protocol_collator::process_msg`",
|
||||
)
|
||||
.buckets(vec![
|
||||
0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.75,
|
||||
1.0,
|
||||
]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
collation_distribution_time: prometheus::register(
|
||||
prometheus::HistogramVec::new(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collator_protocol_collator_distribution_time",
|
||||
"Time spent within `collator_protocol_collator::distribute_collation`",
|
||||
)
|
||||
.buckets(vec![
|
||||
0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.75,
|
||||
1.0,
|
||||
]),
|
||||
&["state"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
collation_fetch_latency: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collation_fetch_latency",
|
||||
"How much time collations spend waiting to be fetched",
|
||||
)
|
||||
.buckets(vec![
|
||||
0.001, 0.01, 0.025, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.75, 1.0, 2.0, 5.0,
|
||||
]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
collation_backing_latency_time: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collation_backing_latency_time",
|
||||
"How much time it takes for a fetched collation to be backed",
|
||||
)
|
||||
.buckets(vec![
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 12.0, 15.0, 18.0, 24.0, 30.0,
|
||||
]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
collation_backing_latency: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collation_backing_latency",
|
||||
"How many blocks away from the relay parent are collations backed",
|
||||
)
|
||||
.buckets(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
collation_inclusion_latency: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collation_inclusion_latency",
|
||||
"How many blocks it takes for a backed collation to be included",
|
||||
)
|
||||
.buckets(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
collation_expired_total: prometheus::register(
|
||||
prometheus::HistogramVec::new(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collation_expired",
|
||||
"How many collations expired (not backed or not included)",
|
||||
)
|
||||
.buckets(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]),
|
||||
&["state"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
|
||||
// Equal to claim queue length.
|
||||
pub(crate) const MAX_BACKING_DELAY: BlockNumber = 3;
|
||||
// Paras availability period. In practice, candidates time out in exceptional situations.
|
||||
pub(crate) const MAX_AVAILABILITY_DELAY: BlockNumber = 10;
|
||||
|
||||
/// Collations are kept in the tracker, until they are included or expired
|
||||
#[derive(Default)]
|
||||
pub(crate) struct CollationTracker {
|
||||
/// All un-expired collation entries
|
||||
entries: HashMap<Hash, CollationStats>,
|
||||
}
|
||||
|
||||
impl CollationTracker {
|
||||
/// Mark a tracked collation as backed.
|
||||
///
|
||||
/// Block built on top of N is earliest backed at N + 1.
|
||||
pub fn collation_backed(
|
||||
&mut self,
|
||||
block_number: BlockNumber,
|
||||
leaf: H256,
|
||||
receipt: CandidateReceipt,
|
||||
) {
|
||||
let head = receipt.descriptor.para_head();
|
||||
let Some(entry) = self.entries.get_mut(&head) else {
|
||||
gum::debug!(
|
||||
target: crate::LOG_TARGET_STATS,
|
||||
?head,
|
||||
"Backed collation not found in tracker",
|
||||
);
|
||||
return;
|
||||
};
|
||||
|
||||
if entry.backed().is_some() {
|
||||
gum::debug!(
|
||||
target: crate::LOG_TARGET_STATS,
|
||||
?head,
|
||||
"Collation already backed in a fork, skipping",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
entry.set_backed_at(block_number);
|
||||
if let Some(latency) = entry.backed() {
|
||||
// Observe the backing latency since the collation was fetched.
|
||||
let maybe_latency =
|
||||
entry.backed_latency_metric.take().map(|metric| metric.stop_and_record());
|
||||
gum::debug!(
|
||||
target: crate::LOG_TARGET_STATS,
|
||||
latency_blocks = ?latency,
|
||||
latency_time = ?maybe_latency,
|
||||
relay_block = ?leaf,
|
||||
relay_parent = ?entry.relay_parent,
|
||||
para_id = ?receipt.descriptor.para_id(),
|
||||
?head,
|
||||
"A fetched collation was backed on relay chain",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark a previously backed collation as included.
|
||||
///
|
||||
/// Block built on top of N is earliest included at N + 2.
|
||||
pub fn collation_included(
|
||||
&mut self,
|
||||
block_number: BlockNumber,
|
||||
leaf: H256,
|
||||
receipt: CandidateReceipt,
|
||||
) {
|
||||
let head = receipt.descriptor.para_head();
|
||||
let para_id = receipt.descriptor.para_id();
|
||||
let Some(entry) = self.entries.get_mut(&head) else {
|
||||
gum::debug!(
|
||||
target: crate::LOG_TARGET_STATS,
|
||||
?para_id,
|
||||
?head,
|
||||
"Included collation not found in tracker",
|
||||
);
|
||||
return;
|
||||
};
|
||||
|
||||
let pov_hash = entry.pov_hash();
|
||||
let candidate_hash = entry.candidate_hash();
|
||||
|
||||
if entry.included().is_some() {
|
||||
gum::debug!(
|
||||
target: crate::LOG_TARGET_STATS,
|
||||
?para_id,
|
||||
?head,
|
||||
?candidate_hash,
|
||||
?pov_hash,
|
||||
"Collation already included in a fork, skipping",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
entry.set_included_at(block_number);
|
||||
if let Some(latency) = entry.included() {
|
||||
gum::debug!(
|
||||
target: crate::LOG_TARGET_STATS,
|
||||
?latency,
|
||||
relay_block = ?leaf,
|
||||
relay_parent = ?entry.relay_parent,
|
||||
?para_id,
|
||||
?head,
|
||||
?candidate_hash,
|
||||
?pov_hash,
|
||||
"Collation included on relay chain",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns all the collations that have expired at `block_number`.
|
||||
pub fn drain_expired(&mut self, block_number: BlockNumber) -> Vec<CollationStats> {
|
||||
let expired = self
|
||||
.entries
|
||||
.iter()
|
||||
.filter_map(|(head, entry)| entry.is_tracking_expired(block_number).then_some(*head))
|
||||
.collect::<Vec<_>>();
|
||||
expired
|
||||
.iter()
|
||||
.filter_map(|head| self.entries.remove(head))
|
||||
.map(|mut entry| {
|
||||
entry.set_expired_at(block_number);
|
||||
entry
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// Drain and return all collations that are possibly finalized at `block_number`.
|
||||
///
|
||||
/// We only track the inclusion block number, not the inclusion block hash.
|
||||
/// There is a small chance that a collation was included in a fork that is not finalized.
|
||||
pub fn drain_finalized(&mut self, block_number: BlockNumber) -> Vec<CollationStats> {
|
||||
let finalized = self
|
||||
.entries
|
||||
.iter()
|
||||
.filter_map(|(head, entry)| entry.is_possibly_finalized(block_number).then_some(*head))
|
||||
.collect::<Vec<_>>();
|
||||
finalized
|
||||
.iter()
|
||||
.filter_map(|head| self.entries.remove(head))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// Track a collation for a given period of time (TTL). TTL depends
|
||||
/// on the collation state.
|
||||
/// Collation is evicted after it expires.
|
||||
pub fn track(&mut self, mut stats: CollationStats) {
|
||||
// Disable the fetch timer, to prevent bogus observe on drop.
|
||||
if let Some(fetch_latency_metric) = stats.fetch_latency_metric.take() {
|
||||
fetch_latency_metric.stop_and_discard();
|
||||
}
|
||||
|
||||
if let Some(entry) = self
|
||||
.entries
|
||||
.values()
|
||||
.find(|entry| entry.relay_parent_number == stats.relay_parent_number)
|
||||
{
|
||||
gum::debug!(
|
||||
target: crate::LOG_TARGET_STATS,
|
||||
?stats.relay_parent_number,
|
||||
?stats.relay_parent,
|
||||
entry_relay_parent = ?entry.relay_parent,
|
||||
"Collation built on a fork",
|
||||
);
|
||||
}
|
||||
|
||||
self.entries.insert(stats.head, stats);
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about how collations live their lives.
|
||||
pub(crate) struct CollationStats {
|
||||
/// The pre-backing collation status information
|
||||
pre_backing_status: CollationStatus,
|
||||
/// The block header hash.
|
||||
head: Hash,
|
||||
/// The relay parent on top of which collation was built
|
||||
relay_parent_number: BlockNumber,
|
||||
/// The relay parent hash.
|
||||
relay_parent: Hash,
|
||||
/// The expiration block number if expired.
|
||||
expired_at: Option<BlockNumber>,
|
||||
/// The backed block number.
|
||||
backed_at: Option<BlockNumber>,
|
||||
/// The included block number if backed.
|
||||
included_at: Option<BlockNumber>,
|
||||
/// The collation fetch time.
|
||||
fetched_at: Option<Instant>,
|
||||
/// Advertisement time
|
||||
advertised_at: Instant,
|
||||
/// The collation fetch latency (seconds).
|
||||
fetch_latency_metric: Option<HistogramTimer>,
|
||||
/// The collation backing latency (seconds). Duration since collation fetched
|
||||
/// until the import of a relay chain block where collation is backed.
|
||||
backed_latency_metric: Option<HistogramTimer>,
|
||||
/// The Collation candidate hash
|
||||
candidate_hash: Hash,
|
||||
/// The Collation PoV hash
|
||||
pov_hash: Hash,
|
||||
}
|
||||
|
||||
impl CollationStats {
|
||||
/// Create new empty instance.
|
||||
pub fn new(
|
||||
head: Hash,
|
||||
relay_parent_number: BlockNumber,
|
||||
relay_parent: Hash,
|
||||
metrics: &Metrics,
|
||||
candidate_hash: Hash,
|
||||
pov_hash: Hash,
|
||||
) -> Self {
|
||||
Self {
|
||||
pre_backing_status: CollationStatus::Created,
|
||||
head,
|
||||
relay_parent_number,
|
||||
relay_parent,
|
||||
advertised_at: std::time::Instant::now(),
|
||||
backed_at: None,
|
||||
expired_at: None,
|
||||
fetched_at: None,
|
||||
included_at: None,
|
||||
fetch_latency_metric: metrics.time_collation_fetch_latency(),
|
||||
backed_latency_metric: None,
|
||||
candidate_hash,
|
||||
pov_hash,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the hash and number of the relay parent.
|
||||
pub fn relay_parent(&self) -> (Hash, BlockNumber) {
|
||||
(self.relay_parent, self.relay_parent_number)
|
||||
}
|
||||
|
||||
/// Returns the age at which the collation expired.
|
||||
pub fn expired(&self) -> Option<BlockNumber> {
|
||||
let expired_at = self.expired_at?;
|
||||
Some(expired_at.saturating_sub(self.relay_parent_number))
|
||||
}
|
||||
|
||||
/// Returns the age of the collation at the moment of backing.
|
||||
pub fn backed(&self) -> Option<BlockNumber> {
|
||||
let backed_at = self.backed_at?;
|
||||
Some(backed_at.saturating_sub(self.relay_parent_number))
|
||||
}
|
||||
|
||||
/// Returns the age of the collation at the moment of inclusion.
|
||||
pub fn included(&self) -> Option<BlockNumber> {
|
||||
let included_at = self.included_at?;
|
||||
let backed_at = self.backed_at?;
|
||||
Some(included_at.saturating_sub(backed_at))
|
||||
}
|
||||
|
||||
/// Returns time the collation waited to be fetched.
|
||||
pub fn fetch_latency(&self) -> Option<Duration> {
|
||||
let fetched_at = self.fetched_at?;
|
||||
Some(fetched_at - self.advertised_at)
|
||||
}
|
||||
|
||||
/// Get teyrchain block header hash.
|
||||
pub fn head(&self) -> H256 {
|
||||
self.head
|
||||
}
|
||||
|
||||
/// Get candidate hash.
|
||||
pub fn candidate_hash(&self) -> H256 {
|
||||
self.candidate_hash
|
||||
}
|
||||
|
||||
/// Get candidate PoV hash.
|
||||
pub fn pov_hash(&self) -> H256 {
|
||||
self.pov_hash
|
||||
}
|
||||
|
||||
/// Set the timestamp at which collation is fetched.
|
||||
pub fn set_fetched_at(&mut self, fetched_at: Instant) {
|
||||
self.fetched_at = Some(fetched_at);
|
||||
}
|
||||
|
||||
/// Set the timestamp at which collation is backed.
|
||||
pub fn set_backed_at(&mut self, backed_at: BlockNumber) {
|
||||
self.backed_at = Some(backed_at);
|
||||
}
|
||||
|
||||
/// Set the timestamp at which collation is included.
|
||||
pub fn set_included_at(&mut self, included_at: BlockNumber) {
|
||||
self.included_at = Some(included_at);
|
||||
}
|
||||
|
||||
/// Set the timestamp at which collation is expired.
|
||||
pub fn set_expired_at(&mut self, expired_at: BlockNumber) {
|
||||
self.expired_at = Some(expired_at);
|
||||
}
|
||||
|
||||
/// Sets the pre-backing status of the collation.
|
||||
pub fn set_pre_backing_status(&mut self, status: CollationStatus) {
|
||||
self.pre_backing_status = status;
|
||||
}
|
||||
|
||||
/// Returns the pre-backing status of the collation.
|
||||
pub fn pre_backing_status(&self) -> &CollationStatus {
|
||||
&self.pre_backing_status
|
||||
}
|
||||
|
||||
/// Take the fetch latency metric timer.
|
||||
pub fn take_fetch_latency_metric(&mut self) -> Option<HistogramTimer> {
|
||||
self.fetch_latency_metric.take()
|
||||
}
|
||||
|
||||
/// Set the backing latency metric timer.
|
||||
pub fn set_backed_latency_metric(&mut self, timer: Option<HistogramTimer>) {
|
||||
self.backed_latency_metric = timer;
|
||||
}
|
||||
|
||||
/// Returns the time to live for the collation.
|
||||
pub fn tracking_ttl(&self) -> BlockNumber {
|
||||
if self.fetch_latency().is_none() {
|
||||
0 // Collation was never fetched, expires ASAP
|
||||
} else if self.backed().is_none() {
|
||||
MAX_BACKING_DELAY
|
||||
} else if self.included().is_none() {
|
||||
self.backed().expect("backed, checked above") + MAX_AVAILABILITY_DELAY
|
||||
} else {
|
||||
0 // If block included no reason to track it.
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the state of the collation at the moment of expiry.
|
||||
pub fn expiry_state(&self) -> &'static str {
|
||||
if self.fetch_latency().is_none() {
|
||||
// If collation was not fetched, we rely on the status provided
|
||||
// by the collator protocol.
|
||||
self.pre_backing_status().label()
|
||||
} else if self.backed().is_none() {
|
||||
"fetched"
|
||||
} else if self.included().is_none() {
|
||||
"backed"
|
||||
} else {
|
||||
"none"
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the collation is expired.
|
||||
pub fn is_tracking_expired(&self, current_block: BlockNumber) -> bool {
|
||||
// Don't expire included collations
|
||||
if self.included().is_some() {
|
||||
return false;
|
||||
}
|
||||
let expiry_block = self.relay_parent_number + self.tracking_ttl();
|
||||
expiry_block <= current_block
|
||||
}
|
||||
|
||||
/// Check if this collation is possibly finalized based on block number.
|
||||
///
|
||||
/// Returns `true` if the collation was included at or before `last_finalized`.
|
||||
///
|
||||
/// We only track the inclusion block number, not the inclusion block hash.
|
||||
/// There is a small chance that a collation was included in a fork that is not finalized.
|
||||
pub fn is_possibly_finalized(&self, last_finalized: BlockNumber) -> bool {
|
||||
self.included_at
|
||||
.map(|included_at| included_at <= last_finalized)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for CollationStats {
|
||||
fn drop(&mut self) {
|
||||
if let Some(fetch_latency_metric) = self.fetch_latency_metric.take() {
|
||||
// This metric is only observed when collation was sent fully to the validator.
|
||||
//
|
||||
// If `fetch_latency_metric` is Some it means that the metrics was observed.
|
||||
// We don't want to observe it again and report a higher value at a later point in time.
|
||||
fetch_latency_metric.stop_and_discard();
|
||||
}
|
||||
// If timer still exists, drop it. It is measured in `collation_backed`.
|
||||
if let Some(backed_latency_metric) = self.backed_latency_metric.take() {
|
||||
backed_latency_metric.stop_and_discard();
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+883
@@ -0,0 +1,883 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Tests for the collator side with enabled prospective teyrchains.
|
||||
|
||||
use super::*;
|
||||
|
||||
use pezkuwi_node_subsystem::messages::ChainApiMessage;
|
||||
use pezkuwi_primitives::Header;
|
||||
use rstest::rstest;
|
||||
|
||||
fn get_parent_hash(hash: Hash) -> Hash {
|
||||
Hash::from_low_u64_be(hash.to_low_u64_be() + 1)
|
||||
}
|
||||
|
||||
/// Handle a view update.
|
||||
pub(super) async fn update_view(
|
||||
expected_connected: Option<Vec<AuthorityDiscoveryId>>,
|
||||
test_state: &TestState,
|
||||
virtual_overseer: &mut VirtualOverseer,
|
||||
new_view: Vec<(Hash, u32)>, // Hash and block number.
|
||||
activated: u8, // How many new heads does this update contain?
|
||||
) {
|
||||
let new_view: HashMap<Hash, u32> = HashMap::from_iter(new_view);
|
||||
|
||||
let our_view = OurView::new(new_view.keys().map(|hash| *hash), 0);
|
||||
|
||||
overseer_send(
|
||||
virtual_overseer,
|
||||
CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::OurViewChange(our_view)),
|
||||
)
|
||||
.await;
|
||||
|
||||
for _ in 0..activated {
|
||||
assert_matches!(
|
||||
overseer_recv(virtual_overseer).await,
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::SessionIndexForChild(tx),
|
||||
)) => {
|
||||
tx.send(Ok(test_state.current_session_index())).unwrap();
|
||||
}
|
||||
);
|
||||
|
||||
// obtain the claim queue schedule.
|
||||
let (leaf_hash, leaf_number) = assert_matches!(
|
||||
overseer_recv(virtual_overseer).await,
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
parent,
|
||||
RuntimeApiRequest::ClaimQueue(tx),
|
||||
)) => {
|
||||
tx.send(Ok(test_state.claim_queue.clone())).unwrap();
|
||||
(parent, new_view.get(&parent).copied().expect("Unknown parent requested"))
|
||||
}
|
||||
);
|
||||
|
||||
let min_number = leaf_number.saturating_sub(SCHEDULING_LOOKAHEAD as u32 - 1);
|
||||
|
||||
let ancestry_len = leaf_number + 1 - min_number;
|
||||
let ancestry_hashes = std::iter::successors(Some(leaf_hash), |h| Some(get_parent_hash(*h)))
|
||||
.take(ancestry_len as usize);
|
||||
let ancestry_numbers = (min_number..=leaf_number).rev();
|
||||
let mut ancestry_iter = ancestry_hashes.clone().zip(ancestry_numbers).peekable();
|
||||
if let Some((hash, number)) = ancestry_iter.next() {
|
||||
assert_matches!(
|
||||
overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(50)).await.unwrap(),
|
||||
AllMessages::ChainApi(ChainApiMessage::BlockHeader(.., tx)) => {
|
||||
let header = Header {
|
||||
parent_hash: get_parent_hash(hash),
|
||||
number,
|
||||
state_root: Hash::zero(),
|
||||
extrinsics_root: Hash::zero(),
|
||||
digest: Default::default(),
|
||||
};
|
||||
|
||||
tx.send(Ok(Some(header))).unwrap();
|
||||
}
|
||||
);
|
||||
|
||||
assert_matches!(
|
||||
overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(50)).await.unwrap(),
|
||||
AllMessages::RuntimeApi(
|
||||
RuntimeApiMessage::Request(
|
||||
..,
|
||||
RuntimeApiRequest::SessionIndexForChild(
|
||||
tx
|
||||
)
|
||||
)
|
||||
) => {
|
||||
tx.send(Ok(1)).unwrap();
|
||||
}
|
||||
);
|
||||
|
||||
assert_matches!(
|
||||
overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(50)).await.unwrap(),
|
||||
AllMessages::RuntimeApi(
|
||||
RuntimeApiMessage::Request(
|
||||
..,
|
||||
RuntimeApiRequest::SchedulingLookahead(
|
||||
session_index,
|
||||
tx
|
||||
)
|
||||
)
|
||||
) => {
|
||||
assert_eq!(session_index, 1);
|
||||
tx.send(Ok(SCHEDULING_LOOKAHEAD as u32)).unwrap();
|
||||
}
|
||||
);
|
||||
|
||||
assert_matches!(
|
||||
overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(50)).await.unwrap(),
|
||||
AllMessages::ChainApi(
|
||||
ChainApiMessage::Ancestors {
|
||||
k,
|
||||
response_channel: tx,
|
||||
..
|
||||
}
|
||||
) => {
|
||||
assert_eq!(k, SCHEDULING_LOOKAHEAD - 1);
|
||||
let hashes: Vec<_> = ancestry_hashes.clone().skip(1).into_iter().collect();
|
||||
assert_eq!(k, hashes.len());
|
||||
tx.send(Ok(hashes)).unwrap();
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
for _ in ancestry_iter.clone() {
|
||||
assert_matches!(
|
||||
overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(50)).await.unwrap(),
|
||||
AllMessages::RuntimeApi(
|
||||
RuntimeApiMessage::Request(
|
||||
..,
|
||||
RuntimeApiRequest::SessionIndexForChild(
|
||||
tx
|
||||
)
|
||||
)
|
||||
) => {
|
||||
tx.send(Ok(1)).unwrap();
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
let mut iter_clone = ancestry_iter.clone();
|
||||
while let Some((hash, number)) = iter_clone.next() {
|
||||
// May be `None` for the last element.
|
||||
let parent_hash =
|
||||
iter_clone.peek().map(|(h, _)| *h).unwrap_or_else(|| get_parent_hash(hash));
|
||||
|
||||
let Some(msg) =
|
||||
overseer_peek_with_timeout(virtual_overseer, Duration::from_millis(50)).await
|
||||
else {
|
||||
return;
|
||||
};
|
||||
|
||||
if !matches!(
|
||||
&msg,
|
||||
AllMessages::ChainApi(ChainApiMessage::BlockHeader(_hash, ..))
|
||||
if *_hash == hash
|
||||
) {
|
||||
// Ancestry has already been cached for this leaf.
|
||||
break;
|
||||
}
|
||||
|
||||
assert_matches!(
|
||||
overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(50)).await.unwrap(),
|
||||
AllMessages::ChainApi(ChainApiMessage::BlockHeader(.., tx)) => {
|
||||
let header = Header {
|
||||
parent_hash,
|
||||
number,
|
||||
state_root: Hash::zero(),
|
||||
extrinsics_root: Hash::zero(),
|
||||
digest: Default::default(),
|
||||
};
|
||||
|
||||
tx.send(Ok(Some(header))).unwrap();
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
for (_core, _paras) in test_state
|
||||
.claim_queue
|
||||
.iter()
|
||||
.filter(|(_, paras)| paras.contains(&test_state.para_id))
|
||||
{
|
||||
expect_determine_validator_group(virtual_overseer, &test_state).await;
|
||||
}
|
||||
|
||||
for _ in ancestry_iter {
|
||||
while let Some(msg) =
|
||||
overseer_peek_with_timeout(virtual_overseer, Duration::from_millis(50)).await
|
||||
{
|
||||
if !matches!(
|
||||
&msg,
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::ClaimQueue(_),
|
||||
))
|
||||
) && !matches!(
|
||||
&msg,
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::CandidateEvents(_),
|
||||
))
|
||||
) && !matches!(
|
||||
&msg,
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::SessionIndexForChild(_),
|
||||
))
|
||||
) {
|
||||
break;
|
||||
}
|
||||
|
||||
if matches!(
|
||||
&msg,
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::SessionIndexForChild(_),
|
||||
))
|
||||
) {
|
||||
for (_core, _paras) in test_state
|
||||
.claim_queue
|
||||
.iter()
|
||||
.filter(|(_, paras)| paras.contains(&test_state.para_id))
|
||||
{
|
||||
expect_determine_validator_group(virtual_overseer, &test_state).await;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
match overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(50))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::ClaimQueue(tx),
|
||||
)) => {
|
||||
tx.send(Ok(test_state.claim_queue.clone())).unwrap();
|
||||
},
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
..,
|
||||
RuntimeApiRequest::CandidateEvents(tx),
|
||||
)) => {
|
||||
tx.send(Ok(vec![])).unwrap();
|
||||
},
|
||||
_ => {
|
||||
unimplemented!()
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(expected_connected) = expected_connected {
|
||||
check_connected_to_validators(virtual_overseer, expected_connected).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Check that the next received message is a `Declare` message.
|
||||
pub(super) async fn expect_declare_msg(
|
||||
virtual_overseer: &mut VirtualOverseer,
|
||||
test_state: &TestState,
|
||||
peer: &PeerId,
|
||||
) {
|
||||
assert_matches!(
|
||||
overseer_recv(virtual_overseer).await,
|
||||
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendCollationMessage(
|
||||
to,
|
||||
CollationProtocols::V2(protocol_v2::CollationProtocol::CollatorProtocol(
|
||||
wire_message,
|
||||
)),
|
||||
)) => {
|
||||
assert_eq!(to[0], *peer);
|
||||
assert_matches!(
|
||||
wire_message,
|
||||
protocol_v2::CollatorProtocolMessage::Declare(
|
||||
collator_id,
|
||||
para_id,
|
||||
signature,
|
||||
) => {
|
||||
assert!(signature.verify(
|
||||
&*protocol_v2::declare_signature_payload(&test_state.local_peer_id),
|
||||
&collator_id),
|
||||
);
|
||||
assert_eq!(collator_id, test_state.collator_pair.public());
|
||||
assert_eq!(para_id, test_state.para_id);
|
||||
}
|
||||
);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/// Test that a collator distributes a collation from the allowed ancestry
|
||||
/// to correct validators group.
|
||||
/// Run once with validators sending their view first and then the collator setting their own
|
||||
/// view first.
|
||||
#[rstest]
|
||||
#[case(true)]
|
||||
#[case(false)]
|
||||
fn distribute_collation_from_implicit_view(#[case] validator_sends_view_first: bool) {
|
||||
let head_a = Hash::from_low_u64_be(126);
|
||||
let head_a_num: u32 = 66;
|
||||
|
||||
// Grandparent of head `a`.
|
||||
let head_b = Hash::from_low_u64_be(128);
|
||||
let head_b_num: u32 = 64;
|
||||
|
||||
// Grandparent of head `b`.
|
||||
let head_c = Hash::from_low_u64_be(130);
|
||||
let head_c_num = 62;
|
||||
|
||||
let group_rotation_info = GroupRotationInfo {
|
||||
session_start_block: head_c_num - 2,
|
||||
group_rotation_frequency: 3,
|
||||
now: head_c_num,
|
||||
};
|
||||
|
||||
let mut test_state = TestState::default();
|
||||
test_state.group_rotation_info = group_rotation_info;
|
||||
|
||||
let local_peer_id = test_state.local_peer_id;
|
||||
let collator_pair = test_state.collator_pair.clone();
|
||||
|
||||
test_harness(
|
||||
local_peer_id,
|
||||
collator_pair,
|
||||
ReputationAggregator::new(|_| true),
|
||||
|mut test_harness| async move {
|
||||
let virtual_overseer = &mut test_harness.virtual_overseer;
|
||||
|
||||
overseer_send(virtual_overseer, CollatorProtocolMessage::ConnectToBackingGroups).await;
|
||||
|
||||
// Set collating para id.
|
||||
overseer_send(virtual_overseer, CollatorProtocolMessage::CollateOn(test_state.para_id))
|
||||
.await;
|
||||
|
||||
if validator_sends_view_first {
|
||||
// Activate leaf `c` to accept at least the collation.
|
||||
update_view(
|
||||
Some(test_state.current_group_validator_authority_ids()),
|
||||
&test_state,
|
||||
virtual_overseer,
|
||||
vec![(head_c, head_c_num)],
|
||||
1,
|
||||
)
|
||||
.await;
|
||||
} else {
|
||||
// Activated leaf is `b`, but the collation will be based on `c`.
|
||||
update_view(
|
||||
Some(test_state.current_group_validator_authority_ids()),
|
||||
&test_state,
|
||||
virtual_overseer,
|
||||
vec![(head_b, head_b_num)],
|
||||
1,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let validator_peer_ids = test_state.current_group_validator_peer_ids();
|
||||
for (val, peer) in test_state
|
||||
.current_group_validator_authority_ids()
|
||||
.into_iter()
|
||||
.zip(validator_peer_ids.clone())
|
||||
{
|
||||
connect_peer(virtual_overseer, peer, CollationVersion::V2, Some(val.clone())).await;
|
||||
}
|
||||
|
||||
// Collator declared itself to each peer.
|
||||
for peer_id in &validator_peer_ids {
|
||||
expect_declare_msg(virtual_overseer, &test_state, peer_id).await;
|
||||
}
|
||||
|
||||
let pov = PoV { block_data: BlockData(vec![1, 2, 3]) };
|
||||
let parent_head_data_hash = Hash::repeat_byte(0xAA);
|
||||
let candidate = TestCandidateBuilder {
|
||||
para_id: test_state.para_id,
|
||||
relay_parent: head_c,
|
||||
pov_hash: pov.hash(),
|
||||
..Default::default()
|
||||
}
|
||||
.build();
|
||||
|
||||
let DistributeCollation { candidate, pov_block: _ } =
|
||||
distribute_collation_with_receipt(
|
||||
virtual_overseer,
|
||||
test_state.current_group_validator_authority_ids(),
|
||||
candidate,
|
||||
pov,
|
||||
parent_head_data_hash,
|
||||
)
|
||||
.await;
|
||||
|
||||
let candidate_hash = candidate.hash();
|
||||
|
||||
// Update peer views.
|
||||
for peer_id in &validator_peer_ids {
|
||||
send_peer_view_change(virtual_overseer, peer_id, vec![head_b]).await;
|
||||
|
||||
if !validator_sends_view_first {
|
||||
expect_advertise_collation_msg(
|
||||
virtual_overseer,
|
||||
&[*peer_id],
|
||||
head_c,
|
||||
vec![candidate_hash],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
if validator_sends_view_first {
|
||||
// Activated leaf is `b`, but the collation will be based on `c`.
|
||||
update_view(None, &test_state, virtual_overseer, vec![(head_b, head_b_num)], 1)
|
||||
.await;
|
||||
|
||||
for _ in &validator_peer_ids {
|
||||
expect_advertise_collation_msg(
|
||||
virtual_overseer,
|
||||
&validator_peer_ids,
|
||||
head_c,
|
||||
vec![candidate_hash],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
check_connected_to_validators(
|
||||
virtual_overseer,
|
||||
test_state.current_group_validator_authority_ids(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// Head `c` goes out of view.
|
||||
// Build a different candidate for this relay parent and attempt to distribute it.
|
||||
update_view(
|
||||
Some(test_state.current_group_validator_authority_ids()),
|
||||
&test_state,
|
||||
virtual_overseer,
|
||||
vec![(head_a, head_a_num)],
|
||||
1,
|
||||
)
|
||||
.await;
|
||||
|
||||
let pov = PoV { block_data: BlockData(vec![4, 5, 6]) };
|
||||
let parent_head_data_hash = Hash::repeat_byte(0xBB);
|
||||
let candidate = TestCandidateBuilder {
|
||||
para_id: test_state.para_id,
|
||||
relay_parent: head_c,
|
||||
pov_hash: pov.hash(),
|
||||
..Default::default()
|
||||
}
|
||||
.build();
|
||||
overseer_send(
|
||||
virtual_overseer,
|
||||
CollatorProtocolMessage::DistributeCollation {
|
||||
candidate_receipt: candidate.clone(),
|
||||
parent_head_data_hash,
|
||||
pov: pov.clone(),
|
||||
parent_head_data: HeadData(vec![1, 2, 3]),
|
||||
result_sender: None,
|
||||
core_index: CoreIndex(0),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
check_connected_to_validators(
|
||||
virtual_overseer,
|
||||
test_state.current_group_validator_authority_ids(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Parent out of view, nothing happens.
|
||||
assert!(overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(100))
|
||||
.await
|
||||
.is_none());
|
||||
|
||||
test_harness
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Tests that collator respects the per relay parent limit of collations, which is equal to the
|
||||
/// number of assignments they have in the claim queue for that core.
|
||||
#[test]
|
||||
fn distribute_collation_up_to_limit() {
|
||||
let mut test_state = TestState::default();
|
||||
// Claim queue has 4 assignments for our paraid on core 0, 1 assignment for another paraid on
|
||||
// core 1. Let's replace one of our assignments on core 0.
|
||||
|
||||
*test_state.claim_queue.get_mut(&CoreIndex(0)).unwrap().get_mut(1).unwrap() = ParaId::from(3);
|
||||
let expected_assignments = SCHEDULING_LOOKAHEAD - 1;
|
||||
|
||||
let local_peer_id = test_state.local_peer_id;
|
||||
let collator_pair = test_state.collator_pair.clone();
|
||||
|
||||
test_harness(
|
||||
local_peer_id,
|
||||
collator_pair,
|
||||
ReputationAggregator::new(|_| true),
|
||||
|mut test_harness| async move {
|
||||
let virtual_overseer = &mut test_harness.virtual_overseer;
|
||||
|
||||
let head_a = Hash::from_low_u64_be(128);
|
||||
let head_a_num: u32 = 64;
|
||||
|
||||
// Grandparent of head `a`.
|
||||
let head_b = Hash::from_low_u64_be(130);
|
||||
|
||||
overseer_send(virtual_overseer, CollatorProtocolMessage::ConnectToBackingGroups).await;
|
||||
|
||||
// Set collating para id.
|
||||
overseer_send(virtual_overseer, CollatorProtocolMessage::CollateOn(test_state.para_id))
|
||||
.await;
|
||||
// Activated leaf is `a`, but the collation will be based on `b`.
|
||||
update_view(
|
||||
Some(test_state.current_group_validator_authority_ids()),
|
||||
&test_state,
|
||||
virtual_overseer,
|
||||
vec![(head_a, head_a_num)],
|
||||
1,
|
||||
)
|
||||
.await;
|
||||
|
||||
for i in 0..expected_assignments {
|
||||
let pov = PoV { block_data: BlockData(vec![i as u8]) };
|
||||
let parent_head_data_hash = Hash::repeat_byte(0xAA);
|
||||
let candidate = TestCandidateBuilder {
|
||||
para_id: test_state.para_id,
|
||||
relay_parent: head_b,
|
||||
pov_hash: pov.hash(),
|
||||
core_index: CoreIndex(0),
|
||||
..Default::default()
|
||||
}
|
||||
.build();
|
||||
distribute_collation_with_receipt(
|
||||
virtual_overseer,
|
||||
test_state.current_group_validator_authority_ids(),
|
||||
candidate,
|
||||
pov,
|
||||
parent_head_data_hash,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let pov = PoV { block_data: BlockData(vec![10, 12, 6]) };
|
||||
let parent_head_data_hash = Hash::repeat_byte(0xBB);
|
||||
let candidate = TestCandidateBuilder {
|
||||
para_id: test_state.para_id,
|
||||
relay_parent: head_b,
|
||||
pov_hash: pov.hash(),
|
||||
core_index: CoreIndex(0),
|
||||
..Default::default()
|
||||
}
|
||||
.build();
|
||||
overseer_send(
|
||||
virtual_overseer,
|
||||
CollatorProtocolMessage::DistributeCollation {
|
||||
candidate_receipt: candidate.clone(),
|
||||
parent_head_data_hash,
|
||||
pov: pov.clone(),
|
||||
parent_head_data: HeadData(vec![1, 2, 3]),
|
||||
result_sender: None,
|
||||
core_index: CoreIndex(0),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
check_connected_to_validators(
|
||||
virtual_overseer,
|
||||
test_state.current_group_validator_authority_ids(),
|
||||
)
|
||||
.await;
|
||||
// Limit has been reached.
|
||||
assert!(overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(100))
|
||||
.await
|
||||
.is_none());
|
||||
|
||||
// Let's also try on core 1, where we don't have any assignments.
|
||||
|
||||
let pov = PoV { block_data: BlockData(vec![10, 12, 6]) };
|
||||
let parent_head_data_hash = Hash::repeat_byte(0xBB);
|
||||
let candidate = TestCandidateBuilder {
|
||||
para_id: test_state.para_id,
|
||||
relay_parent: head_b,
|
||||
pov_hash: pov.hash(),
|
||||
core_index: CoreIndex(1),
|
||||
..Default::default()
|
||||
}
|
||||
.build();
|
||||
overseer_send(
|
||||
virtual_overseer,
|
||||
CollatorProtocolMessage::DistributeCollation {
|
||||
candidate_receipt: candidate.clone(),
|
||||
parent_head_data_hash,
|
||||
pov: pov.clone(),
|
||||
parent_head_data: HeadData(vec![1, 2, 3]),
|
||||
result_sender: None,
|
||||
core_index: CoreIndex(1),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
check_connected_to_validators(
|
||||
virtual_overseer,
|
||||
test_state.current_group_validator_authority_ids(),
|
||||
)
|
||||
.await;
|
||||
|
||||
assert!(overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(100))
|
||||
.await
|
||||
.is_none());
|
||||
|
||||
test_harness
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Tests that collator send the parent head data in
|
||||
/// case the para is assigned to multiple cores (elastic scaling).
|
||||
#[test]
|
||||
fn send_parent_head_data_for_elastic_scaling() {
|
||||
let test_state = TestState::with_elastic_scaling();
|
||||
|
||||
let local_peer_id = test_state.local_peer_id;
|
||||
let collator_pair = test_state.collator_pair.clone();
|
||||
|
||||
test_harness(
|
||||
local_peer_id,
|
||||
collator_pair,
|
||||
ReputationAggregator::new(|_| true),
|
||||
|test_harness| async move {
|
||||
let mut virtual_overseer = test_harness.virtual_overseer;
|
||||
let mut req_v2_cfg = test_harness.req_v2_cfg;
|
||||
|
||||
let head_b = Hash::from_low_u64_be(129);
|
||||
let head_b_num: u32 = 63;
|
||||
|
||||
overseer_send(&mut virtual_overseer, CollatorProtocolMessage::ConnectToBackingGroups)
|
||||
.await;
|
||||
|
||||
// Set collating para id.
|
||||
overseer_send(
|
||||
&mut virtual_overseer,
|
||||
CollatorProtocolMessage::CollateOn(test_state.para_id),
|
||||
)
|
||||
.await;
|
||||
let expected_connected = [CoreIndex(0), CoreIndex(2), CoreIndex(3)]
|
||||
.into_iter()
|
||||
.map(|core| test_state.validator_authority_ids_for_core(core))
|
||||
.fold(HashSet::new(), |mut acc, res| {
|
||||
acc.extend(res.into_iter());
|
||||
acc
|
||||
})
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
update_view(
|
||||
Some(expected_connected.clone()),
|
||||
&test_state,
|
||||
&mut virtual_overseer,
|
||||
vec![(head_b, head_b_num)],
|
||||
1,
|
||||
)
|
||||
.await;
|
||||
|
||||
let pov_data = PoV { block_data: BlockData(vec![1 as u8]) };
|
||||
let candidate = TestCandidateBuilder {
|
||||
para_id: test_state.para_id,
|
||||
relay_parent: head_b,
|
||||
pov_hash: pov_data.hash(),
|
||||
..Default::default()
|
||||
}
|
||||
.build();
|
||||
|
||||
let phd = HeadData(vec![1, 2, 3]);
|
||||
let phdh = phd.hash();
|
||||
|
||||
distribute_collation_with_receipt(
|
||||
&mut virtual_overseer,
|
||||
expected_connected,
|
||||
candidate.clone(),
|
||||
pov_data.clone(),
|
||||
phdh,
|
||||
)
|
||||
.await;
|
||||
|
||||
let peer = test_state.validator_peer_id[0];
|
||||
let validator_id = test_state.current_group_validator_authority_ids()[0].clone();
|
||||
connect_peer(
|
||||
&mut virtual_overseer,
|
||||
peer,
|
||||
CollationVersion::V2,
|
||||
Some(validator_id.clone()),
|
||||
)
|
||||
.await;
|
||||
expect_declare_msg(&mut virtual_overseer, &test_state, &peer).await;
|
||||
|
||||
send_peer_view_change(&mut virtual_overseer, &peer, vec![head_b]).await;
|
||||
let hashes: Vec<_> = vec![candidate.hash()];
|
||||
expect_advertise_collation_msg(&mut virtual_overseer, &[peer], head_b, hashes).await;
|
||||
|
||||
let (pending_response, rx) = oneshot::channel();
|
||||
req_v2_cfg
|
||||
.inbound_queue
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.send(RawIncomingRequest {
|
||||
peer,
|
||||
payload: CollationFetchingRequest {
|
||||
relay_parent: head_b,
|
||||
para_id: test_state.para_id,
|
||||
candidate_hash: candidate.hash(),
|
||||
}
|
||||
.encode(),
|
||||
pending_response,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_matches!(
|
||||
rx.await,
|
||||
Ok(full_response) => {
|
||||
let response: CollationFetchingResponse =
|
||||
CollationFetchingResponse::decode(
|
||||
&mut full_response.result
|
||||
.expect("We should have a proper answer").as_ref()
|
||||
).expect("Decoding should work");
|
||||
assert_matches!(
|
||||
response,
|
||||
CollationFetchingResponse::CollationWithParentHeadData {
|
||||
receipt, pov, parent_head_data
|
||||
} => {
|
||||
assert_eq!(receipt, candidate);
|
||||
assert_eq!(pov, pov_data);
|
||||
assert_eq!(parent_head_data, phd);
|
||||
}
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
TestHarness { virtual_overseer, req_v2_cfg }
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Tests that collator correctly handles peer V2 requests.
|
||||
#[test]
|
||||
fn advertise_and_send_collation_by_hash() {
|
||||
let test_state = TestState::default();
|
||||
|
||||
let local_peer_id = test_state.local_peer_id;
|
||||
let collator_pair = test_state.collator_pair.clone();
|
||||
|
||||
test_harness(
|
||||
local_peer_id,
|
||||
collator_pair,
|
||||
ReputationAggregator::new(|_| true),
|
||||
|test_harness| async move {
|
||||
let mut virtual_overseer = test_harness.virtual_overseer;
|
||||
let mut req_v2_cfg = test_harness.req_v2_cfg;
|
||||
|
||||
let head_a = Hash::from_low_u64_be(128);
|
||||
let head_a_num: u32 = 64;
|
||||
|
||||
// Parent of head `a`.
|
||||
let head_b = Hash::from_low_u64_be(129);
|
||||
let head_b_num: u32 = 63;
|
||||
|
||||
overseer_send(&mut virtual_overseer, CollatorProtocolMessage::ConnectToBackingGroups)
|
||||
.await;
|
||||
|
||||
// Set collating para id.
|
||||
overseer_send(
|
||||
&mut virtual_overseer,
|
||||
CollatorProtocolMessage::CollateOn(test_state.para_id),
|
||||
)
|
||||
.await;
|
||||
update_view(
|
||||
Some(test_state.current_group_validator_authority_ids()),
|
||||
&test_state,
|
||||
&mut virtual_overseer,
|
||||
vec![(head_b, head_b_num)],
|
||||
1,
|
||||
)
|
||||
.await;
|
||||
update_view(
|
||||
Some(test_state.current_group_validator_authority_ids()),
|
||||
&test_state,
|
||||
&mut virtual_overseer,
|
||||
vec![(head_a, head_a_num)],
|
||||
1,
|
||||
)
|
||||
.await;
|
||||
|
||||
let candidates: Vec<_> = (0..2)
|
||||
.map(|i| {
|
||||
let pov = PoV { block_data: BlockData(vec![i as u8]) };
|
||||
let candidate = TestCandidateBuilder {
|
||||
para_id: test_state.para_id,
|
||||
relay_parent: head_b,
|
||||
pov_hash: pov.hash(),
|
||||
..Default::default()
|
||||
}
|
||||
.build();
|
||||
(candidate, pov)
|
||||
})
|
||||
.collect();
|
||||
|
||||
for (candidate, pov) in &candidates {
|
||||
distribute_collation_with_receipt(
|
||||
&mut virtual_overseer,
|
||||
test_state.current_group_validator_authority_ids(),
|
||||
candidate.clone(),
|
||||
pov.clone(),
|
||||
Hash::zero(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let peer = test_state.validator_peer_id[0];
|
||||
let validator_id = test_state.current_group_validator_authority_ids()[0].clone();
|
||||
connect_peer(
|
||||
&mut virtual_overseer,
|
||||
peer,
|
||||
CollationVersion::V2,
|
||||
Some(validator_id.clone()),
|
||||
)
|
||||
.await;
|
||||
expect_declare_msg(&mut virtual_overseer, &test_state, &peer).await;
|
||||
|
||||
// Head `b` is not a leaf, but both advertisements are still relevant.
|
||||
send_peer_view_change(&mut virtual_overseer, &peer, vec![head_b]).await;
|
||||
let hashes: Vec<_> = candidates.iter().map(|(candidate, _)| candidate.hash()).collect();
|
||||
expect_advertise_collation_msg(&mut virtual_overseer, &[peer], head_b, hashes).await;
|
||||
|
||||
for (candidate, pov_block) in candidates {
|
||||
let (pending_response, rx) = oneshot::channel();
|
||||
req_v2_cfg
|
||||
.inbound_queue
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.send(RawIncomingRequest {
|
||||
peer,
|
||||
payload: CollationFetchingRequest {
|
||||
relay_parent: head_b,
|
||||
para_id: test_state.para_id,
|
||||
candidate_hash: candidate.hash(),
|
||||
}
|
||||
.encode(),
|
||||
pending_response,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_matches!(
|
||||
rx.await,
|
||||
Ok(full_response) => {
|
||||
// Response is the same for v2.
|
||||
let (receipt, pov) = decode_collation_response(
|
||||
full_response.result
|
||||
.expect("We should have a proper answer").as_ref()
|
||||
);
|
||||
assert_eq!(receipt, candidate);
|
||||
assert_eq!(pov, pov_block);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
TestHarness { virtual_overseer, req_v2_cfg }
|
||||
},
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,204 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! The Collator Protocol allows collators and validators talk to each other.
|
||||
//! This subsystem implements both sides of the collator protocol.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
#![deny(unused_crate_dependencies)]
|
||||
#![recursion_limit = "256"]
|
||||
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use futures::{
|
||||
stream::{FusedStream, StreamExt},
|
||||
FutureExt, TryFutureExt,
|
||||
};
|
||||
|
||||
use pezkuwi_node_subsystem_util::reputation::ReputationAggregator;
|
||||
use sp_keystore::KeystorePtr;
|
||||
|
||||
use pezkuwi_node_network_protocol::{
|
||||
request_response::{v2 as protocol_v2, IncomingRequestReceiver},
|
||||
PeerId, UnifiedReputationChange as Rep,
|
||||
};
|
||||
use pezkuwi_primitives::CollatorPair;
|
||||
|
||||
use pezkuwi_node_subsystem::{errors::SubsystemError, overseer, DummySubsystem, SpawnedSubsystem};
|
||||
|
||||
mod collator_side;
|
||||
mod validator_side;
|
||||
#[cfg(feature = "experimental-collator-protocol")]
|
||||
mod validator_side_experimental;
|
||||
|
||||
const LOG_TARGET: &'static str = "teyrchain::collator-protocol";
|
||||
const LOG_TARGET_STATS: &'static str = "teyrchain::collator-protocol::stats";
|
||||
|
||||
/// A collator eviction policy - how fast to evict collators which are inactive.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct CollatorEvictionPolicy {
|
||||
/// How fast to evict collators who are inactive.
|
||||
pub inactive_collator: Duration,
|
||||
/// How fast to evict peers which don't declare their para.
|
||||
pub undeclared: Duration,
|
||||
}
|
||||
|
||||
impl Default for CollatorEvictionPolicy {
|
||||
fn default() -> Self {
|
||||
CollatorEvictionPolicy {
|
||||
inactive_collator: Duration::from_secs(24),
|
||||
undeclared: Duration::from_secs(1),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// What side of the collator protocol is being engaged
|
||||
pub enum ProtocolSide {
|
||||
/// Validators operate on the relay chain.
|
||||
Validator {
|
||||
/// The keystore holding validator keys.
|
||||
keystore: KeystorePtr,
|
||||
/// An eviction policy for inactive peers or validators.
|
||||
eviction_policy: CollatorEvictionPolicy,
|
||||
/// Prometheus metrics for validators.
|
||||
metrics: validator_side::Metrics,
|
||||
/// List of invulnerable collators which is handled with a priority.
|
||||
invulnerables: HashSet<PeerId>,
|
||||
/// Override for `HOLD_OFF_DURATION` constant .
|
||||
collator_protocol_hold_off: Option<Duration>,
|
||||
},
|
||||
/// Experimental variant of the validator side. Do not use in production.
|
||||
#[cfg(feature = "experimental-collator-protocol")]
|
||||
ValidatorExperimental {
|
||||
/// The keystore holding validator keys.
|
||||
keystore: KeystorePtr,
|
||||
/// Prometheus metrics for validators.
|
||||
metrics: validator_side_experimental::Metrics,
|
||||
},
|
||||
/// Collators operate on a teyrchain.
|
||||
Collator {
|
||||
/// Local peer id.
|
||||
peer_id: PeerId,
|
||||
/// Teyrchain collator pair.
|
||||
collator_pair: CollatorPair,
|
||||
/// Receiver for v2 collation fetching requests.
|
||||
request_receiver_v2: IncomingRequestReceiver<protocol_v2::CollationFetchingRequest>,
|
||||
/// Metrics.
|
||||
metrics: collator_side::Metrics,
|
||||
},
|
||||
/// No protocol side, just disable it.
|
||||
None,
|
||||
}
|
||||
|
||||
/// The collator protocol subsystem.
|
||||
pub struct CollatorProtocolSubsystem {
|
||||
protocol_side: ProtocolSide,
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)]
|
||||
impl CollatorProtocolSubsystem {
|
||||
/// Start the collator protocol.
|
||||
/// If `id` is `Some` this is a collator side of the protocol.
|
||||
/// If `id` is `None` this is a validator side of the protocol.
|
||||
/// Caller must provide a registry for prometheus metrics.
|
||||
pub fn new(protocol_side: ProtocolSide) -> Self {
|
||||
Self { protocol_side }
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::subsystem(CollatorProtocol, error=SubsystemError, prefix=self::overseer)]
|
||||
impl<Context> CollatorProtocolSubsystem {
|
||||
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
||||
let future = match self.protocol_side {
|
||||
ProtocolSide::Validator {
|
||||
keystore,
|
||||
eviction_policy,
|
||||
metrics,
|
||||
invulnerables,
|
||||
collator_protocol_hold_off,
|
||||
} => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?invulnerables,
|
||||
?collator_protocol_hold_off,
|
||||
"AH collator protocol params",
|
||||
);
|
||||
validator_side::run(
|
||||
ctx,
|
||||
keystore,
|
||||
eviction_policy,
|
||||
metrics,
|
||||
invulnerables,
|
||||
collator_protocol_hold_off,
|
||||
)
|
||||
.map_err(|e| SubsystemError::with_origin("collator-protocol", e))
|
||||
.boxed()
|
||||
},
|
||||
#[cfg(feature = "experimental-collator-protocol")]
|
||||
ProtocolSide::ValidatorExperimental { keystore, metrics } =>
|
||||
validator_side_experimental::run(ctx, keystore, metrics)
|
||||
.map_err(|e| SubsystemError::with_origin("collator-protocol", e))
|
||||
.boxed(),
|
||||
ProtocolSide::Collator { peer_id, collator_pair, request_receiver_v2, metrics } =>
|
||||
collator_side::run(ctx, peer_id, collator_pair, request_receiver_v2, metrics)
|
||||
.map_err(|e| SubsystemError::with_origin("collator-protocol", e))
|
||||
.boxed(),
|
||||
ProtocolSide::None => return DummySubsystem.start(ctx),
|
||||
};
|
||||
|
||||
SpawnedSubsystem { name: "collator-protocol-subsystem", future }
|
||||
}
|
||||
}
|
||||
|
||||
/// Modify the reputation of a peer based on its behavior.
|
||||
async fn modify_reputation(
|
||||
reputation: &mut ReputationAggregator,
|
||||
sender: &mut impl overseer::CollatorProtocolSenderTrait,
|
||||
peer: PeerId,
|
||||
rep: Rep,
|
||||
) {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
rep = ?rep,
|
||||
peer_id = %peer,
|
||||
"reputation change for peer",
|
||||
);
|
||||
|
||||
reputation.modify(sender, peer, rep).await;
|
||||
}
|
||||
|
||||
/// Wait until tick and return the timestamp for the following one.
|
||||
async fn wait_until_next_tick(last_poll: Instant, period: Duration) -> Instant {
|
||||
let now = Instant::now();
|
||||
let next_poll = last_poll + period;
|
||||
|
||||
if next_poll > now {
|
||||
futures_timer::Delay::new(next_poll - now).await
|
||||
}
|
||||
|
||||
Instant::now()
|
||||
}
|
||||
|
||||
/// Returns an infinite stream that yields with an interval of `period`.
|
||||
fn tick_stream(period: Duration) -> impl FusedStream<Item = ()> {
|
||||
futures::stream::unfold(Instant::now(), move |next_check| async move {
|
||||
Some(((), wait_until_next_tick(next_check, period).await))
|
||||
})
|
||||
.fuse()
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,392 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Primitives for tracking collations-related data.
|
||||
//!
|
||||
//! Usually a path of collations is as follows:
|
||||
//! 1. First, collation must be advertised by collator.
|
||||
//! 2. The validator inspects the claim queue and decides if the collation should be fetched
|
||||
//! based on the entries there. A teyrchain can't have more fetched collations than the
|
||||
//! entries in the claim queue at a specific relay parent. When calculating this limit the
|
||||
//! validator counts all advertisements within its view not just at the relay parent.
|
||||
//! 3. If the advertisement was accepted, it's queued for fetch (per relay parent).
|
||||
//! 4. Once it's requested, the collation is said to be pending fetch
|
||||
//! (`CollationStatus::Fetching`).
|
||||
//! 5. Pending fetch collation becomes pending validation
|
||||
//! (`CollationStatus::WaitingOnValidation`) once received, we send it to backing for
|
||||
//! validation.
|
||||
//! 6. If it turns to be invalid or async backing allows seconding another candidate, carry on
|
||||
//! with the next advertisement, otherwise we're done with this relay parent.
|
||||
//!
|
||||
//! ┌───────────────────────────────────┐
|
||||
//! └─▶Waiting ─▶ Fetching ─▶ WaitingOnValidation
|
||||
|
||||
use std::{
|
||||
collections::{BTreeMap, VecDeque},
|
||||
future::Future,
|
||||
pin::Pin,
|
||||
task::Poll,
|
||||
};
|
||||
|
||||
use futures::{future::BoxFuture, FutureExt};
|
||||
use pezkuwi_node_network_protocol::{
|
||||
peer_set::CollationVersion,
|
||||
request_response::{outgoing::RequestError, v1 as request_v1, OutgoingResult},
|
||||
PeerId,
|
||||
};
|
||||
use pezkuwi_node_primitives::PoV;
|
||||
use pezkuwi_node_subsystem_util::metrics::prometheus::prometheus::HistogramTimer;
|
||||
use pezkuwi_primitives::{
|
||||
CandidateHash, CandidateReceiptV2 as CandidateReceipt, CollatorId, Hash, HeadData,
|
||||
Id as ParaId, PersistedValidationData,
|
||||
};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use super::error::SecondingError;
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
/// Candidate supplied with a para head it's built on top of.
|
||||
#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
|
||||
pub struct ProspectiveCandidate {
|
||||
/// Candidate hash.
|
||||
pub candidate_hash: CandidateHash,
|
||||
/// Parent head-data hash as supplied in advertisement.
|
||||
pub parent_head_data_hash: Hash,
|
||||
}
|
||||
|
||||
impl ProspectiveCandidate {
|
||||
pub fn candidate_hash(&self) -> CandidateHash {
|
||||
self.candidate_hash
|
||||
}
|
||||
}
|
||||
|
||||
/// Identifier of a fetched collation.
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
|
||||
pub struct FetchedCollation {
|
||||
/// Candidate's relay parent.
|
||||
pub relay_parent: Hash,
|
||||
/// Teyrchain id.
|
||||
pub para_id: ParaId,
|
||||
/// Candidate hash.
|
||||
pub candidate_hash: CandidateHash,
|
||||
}
|
||||
|
||||
impl From<&CandidateReceipt<Hash>> for FetchedCollation {
|
||||
fn from(receipt: &CandidateReceipt<Hash>) -> Self {
|
||||
let descriptor = receipt.descriptor();
|
||||
Self {
|
||||
relay_parent: descriptor.relay_parent(),
|
||||
para_id: descriptor.para_id(),
|
||||
candidate_hash: receipt.hash(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Identifier of a collation being requested.
|
||||
#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
|
||||
pub struct PendingCollation {
|
||||
/// Candidate's relay parent.
|
||||
pub relay_parent: Hash,
|
||||
/// Teyrchain id.
|
||||
pub para_id: ParaId,
|
||||
/// Peer that advertised this collation.
|
||||
pub peer_id: PeerId,
|
||||
/// Optional candidate hash and parent head-data hash if were
|
||||
/// supplied in advertisement.
|
||||
pub prospective_candidate: Option<ProspectiveCandidate>,
|
||||
/// Hash of the candidate's commitments.
|
||||
pub commitments_hash: Option<Hash>,
|
||||
}
|
||||
|
||||
impl PendingCollation {
|
||||
pub fn new(
|
||||
relay_parent: Hash,
|
||||
para_id: ParaId,
|
||||
peer_id: &PeerId,
|
||||
prospective_candidate: Option<ProspectiveCandidate>,
|
||||
) -> Self {
|
||||
Self {
|
||||
relay_parent,
|
||||
para_id,
|
||||
peer_id: *peer_id,
|
||||
prospective_candidate,
|
||||
commitments_hash: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An identifier for a fetched collation that was blocked from being seconded because we don't have
|
||||
/// access to the parent's HeadData. Can be retried once the candidate outputting this head data is
|
||||
/// seconded.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
||||
pub struct BlockedCollationId {
|
||||
/// Para id.
|
||||
pub para_id: ParaId,
|
||||
/// Hash of the parent head data.
|
||||
pub parent_head_data_hash: Hash,
|
||||
}
|
||||
|
||||
/// Performs a sanity check between advertised and fetched collations.
|
||||
pub fn fetched_collation_sanity_check(
|
||||
advertised: &PendingCollation,
|
||||
fetched: &CandidateReceipt,
|
||||
persisted_validation_data: &PersistedValidationData,
|
||||
maybe_parent_head_and_hash: Option<(HeadData, Hash)>,
|
||||
) -> Result<(), SecondingError> {
|
||||
if persisted_validation_data.hash() != fetched.descriptor().persisted_validation_data_hash() {
|
||||
return Err(SecondingError::PersistedValidationDataMismatch);
|
||||
}
|
||||
|
||||
if advertised
|
||||
.prospective_candidate
|
||||
.map_or(false, |pc| pc.candidate_hash() != fetched.hash())
|
||||
{
|
||||
return Err(SecondingError::CandidateHashMismatch);
|
||||
}
|
||||
|
||||
if advertised.relay_parent != fetched.descriptor.relay_parent() {
|
||||
return Err(SecondingError::RelayParentMismatch);
|
||||
}
|
||||
|
||||
if maybe_parent_head_and_hash.map_or(false, |(head, hash)| head.hash() != hash) {
|
||||
return Err(SecondingError::ParentHeadDataMismatch);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Identifier for a requested collation and the respective collator that advertised it.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CollationEvent {
|
||||
/// Collator id.
|
||||
pub collator_id: CollatorId,
|
||||
/// The network protocol version the collator is using.
|
||||
pub collator_protocol_version: CollationVersion,
|
||||
/// The requested collation data.
|
||||
pub pending_collation: PendingCollation,
|
||||
}
|
||||
|
||||
/// Fetched collation data.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PendingCollationFetch {
|
||||
/// Collation identifier.
|
||||
pub collation_event: CollationEvent,
|
||||
/// Candidate receipt.
|
||||
pub candidate_receipt: CandidateReceipt,
|
||||
/// Proof of validity.
|
||||
pub pov: PoV,
|
||||
/// Optional teyrchain parent head data.
|
||||
/// Only needed for elastic scaling.
|
||||
pub maybe_parent_head_data: Option<HeadData>,
|
||||
}
|
||||
|
||||
/// The status of the collations in [`CollationsPerRelayParent`].
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum CollationStatus {
|
||||
/// We are waiting for a collation to be advertised to us.
|
||||
Waiting,
|
||||
/// We are currently fetching a collation for the specified `ParaId`.
|
||||
Fetching(ParaId),
|
||||
/// We are waiting that a collation is being validated.
|
||||
WaitingOnValidation,
|
||||
}
|
||||
|
||||
impl Default for CollationStatus {
|
||||
fn default() -> Self {
|
||||
Self::Waiting
|
||||
}
|
||||
}
|
||||
|
||||
impl CollationStatus {
|
||||
/// Downgrades to `Waiting`
|
||||
pub fn back_to_waiting(&mut self) {
|
||||
*self = Self::Waiting
|
||||
}
|
||||
}
|
||||
|
||||
/// The number of claims in the claim queue and seconded candidates count for a specific `ParaId`.
|
||||
#[derive(Default, Debug)]
|
||||
struct CandidatesStatePerPara {
|
||||
/// How many collations have been seconded.
|
||||
pub seconded_per_para: usize,
|
||||
// Claims in the claim queue for the `ParaId`.
|
||||
pub claims_per_para: usize,
|
||||
}
|
||||
|
||||
/// Information about collations per relay parent.
|
||||
pub struct Collations {
|
||||
/// What is the current status in regards to a collation for this relay parent?
|
||||
pub status: CollationStatus,
|
||||
/// Collator we're fetching from, optionally which candidate was requested.
|
||||
///
|
||||
/// This is the currently last started fetch, which did not exceed `MAX_UNSHARED_DOWNLOAD_TIME`
|
||||
/// yet.
|
||||
pub fetching_from: Option<(CollatorId, Option<CandidateHash>)>,
|
||||
/// Collation that were advertised to us, but we did not yet request or fetch. Grouped by
|
||||
/// `ParaId`.
|
||||
waiting_queue: BTreeMap<ParaId, VecDeque<(PendingCollation, CollatorId)>>,
|
||||
/// Number of seconded candidates and claims in the claim queue per `ParaId`.
|
||||
candidates_state: BTreeMap<ParaId, CandidatesStatePerPara>,
|
||||
}
|
||||
|
||||
impl Collations {
|
||||
pub(super) fn new(group_assignments: &Vec<ParaId>) -> Self {
|
||||
let mut candidates_state = BTreeMap::<ParaId, CandidatesStatePerPara>::new();
|
||||
|
||||
for para_id in group_assignments {
|
||||
candidates_state.entry(*para_id).or_default().claims_per_para += 1;
|
||||
}
|
||||
|
||||
Self {
|
||||
status: Default::default(),
|
||||
fetching_from: None,
|
||||
waiting_queue: Default::default(),
|
||||
candidates_state,
|
||||
}
|
||||
}
|
||||
|
||||
/// Note a seconded collation for a given para.
|
||||
pub(super) fn note_seconded(&mut self, para_id: ParaId) {
|
||||
self.candidates_state.entry(para_id).or_default().seconded_per_para += 1;
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?para_id,
|
||||
new_count=self.candidates_state.entry(para_id).or_default().seconded_per_para,
|
||||
"Note seconded."
|
||||
);
|
||||
self.status.back_to_waiting();
|
||||
}
|
||||
|
||||
/// Adds a new collation to the waiting queue for the relay parent. This function doesn't
|
||||
/// perform any limits check. The caller should assure that the collation limit is respected.
|
||||
pub(super) fn add_to_waiting_queue(&mut self, collation: (PendingCollation, CollatorId)) {
|
||||
self.waiting_queue.entry(collation.0.para_id).or_default().push_back(collation);
|
||||
}
|
||||
|
||||
/// Picks a collation to fetch from the waiting queue.
|
||||
/// When fetching collations we need to ensure that each teyrchain has got a fair core time
|
||||
/// share depending on its assignments in the claim queue. This means that the number of
|
||||
/// collations seconded per teyrchain should ideally be equal to the number of claims for the
|
||||
/// particular teyrchain in the claim queue.
|
||||
///
|
||||
/// To achieve this each seconded collation is mapped to an entry from the claim queue. The next
|
||||
/// fetch is the first unfulfilled entry from the claim queue for which there is an
|
||||
/// advertisement.
|
||||
///
|
||||
/// `unfulfilled_claim_queue_entries` represents all claim queue entries which are still not
|
||||
/// fulfilled.
|
||||
pub(super) fn pick_a_collation_to_fetch(
|
||||
&mut self,
|
||||
unfulfilled_claim_queue_entries: Vec<ParaId>,
|
||||
) -> Option<(PendingCollation, CollatorId)> {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
waiting_queue=?self.waiting_queue,
|
||||
candidates_state=?self.candidates_state,
|
||||
?unfulfilled_claim_queue_entries,
|
||||
"Pick a collation to fetch."
|
||||
);
|
||||
|
||||
for assignment in unfulfilled_claim_queue_entries {
|
||||
// if there is an unfulfilled assignment - return it
|
||||
if let Some(collation) = self
|
||||
.waiting_queue
|
||||
.get_mut(&assignment)
|
||||
.and_then(|collations| collations.pop_front())
|
||||
{
|
||||
return Some(collation);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub(super) fn seconded_for_para(&self, para_id: &ParaId) -> usize {
|
||||
self.candidates_state
|
||||
.get(¶_id)
|
||||
.map(|state| state.seconded_per_para)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub(super) fn queued_for_para(&self, para_id: &ParaId) -> usize {
|
||||
self.waiting_queue.get(para_id).map(|queue| queue.len()).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
// Any error that can occur when awaiting a collation fetch response.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(super) enum CollationFetchError {
|
||||
#[error("Future was cancelled.")]
|
||||
Cancelled,
|
||||
#[error("{0}")]
|
||||
Request(#[from] RequestError),
|
||||
}
|
||||
|
||||
/// Future that concludes when the collator has responded to our collation fetch request
|
||||
/// or the request was cancelled by the validator.
|
||||
pub(super) struct CollationFetchRequest {
|
||||
/// Info about the requested collation.
|
||||
pub pending_collation: PendingCollation,
|
||||
/// Collator id.
|
||||
pub collator_id: CollatorId,
|
||||
/// The network protocol version the collator is using.
|
||||
pub collator_protocol_version: CollationVersion,
|
||||
/// Responses from collator.
|
||||
pub from_collator: BoxFuture<'static, OutgoingResult<request_v1::CollationFetchingResponse>>,
|
||||
/// Handle used for checking if this request was cancelled.
|
||||
pub cancellation_token: CancellationToken,
|
||||
/// A metric histogram for the lifetime of the request
|
||||
pub _lifetime_timer: Option<HistogramTimer>,
|
||||
}
|
||||
|
||||
impl Future for CollationFetchRequest {
|
||||
type Output = (
|
||||
CollationEvent,
|
||||
std::result::Result<request_v1::CollationFetchingResponse, CollationFetchError>,
|
||||
);
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Self::Output> {
|
||||
// First check if this fetch request was cancelled.
|
||||
let cancelled = match std::pin::pin!(self.cancellation_token.cancelled()).poll(cx) {
|
||||
Poll::Ready(()) => true,
|
||||
Poll::Pending => false,
|
||||
};
|
||||
|
||||
if cancelled {
|
||||
return Poll::Ready((
|
||||
CollationEvent {
|
||||
collator_protocol_version: self.collator_protocol_version,
|
||||
collator_id: self.collator_id.clone(),
|
||||
pending_collation: self.pending_collation,
|
||||
},
|
||||
Err(CollationFetchError::Cancelled),
|
||||
));
|
||||
}
|
||||
|
||||
let res = self.from_collator.poll_unpin(cx).map(|res| {
|
||||
(
|
||||
CollationEvent {
|
||||
collator_protocol_version: self.collator_protocol_version,
|
||||
collator_id: self.collator_id.clone(),
|
||||
pending_collation: self.pending_collation,
|
||||
},
|
||||
res.map_err(CollationFetchError::Request),
|
||||
)
|
||||
});
|
||||
|
||||
res
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use fatality::thiserror::Error;
|
||||
use futures::channel::oneshot;
|
||||
|
||||
use pezkuwi_node_subsystem::RuntimeApiError;
|
||||
use pezkuwi_node_subsystem_util::backing_implicit_view;
|
||||
use pezkuwi_primitives::CandidateDescriptorVersion;
|
||||
|
||||
/// General result.
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
/// General subsystem error.
|
||||
#[derive(Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error(transparent)]
|
||||
ImplicitViewFetchError(backing_implicit_view::FetchError),
|
||||
|
||||
#[error("Response receiver for active validators request cancelled")]
|
||||
CancelledActiveValidators(oneshot::Canceled),
|
||||
|
||||
#[error("Response receiver for validator groups request cancelled")]
|
||||
CancelledValidatorGroups(oneshot::Canceled),
|
||||
|
||||
#[error("Response receiver for session index request cancelled")]
|
||||
CancelledSessionIndex(oneshot::Canceled),
|
||||
|
||||
#[error("Response receiver for claim queue request cancelled")]
|
||||
CancelledClaimQueue(oneshot::Canceled),
|
||||
|
||||
#[error("Response receiver for node features request cancelled")]
|
||||
CancelledNodeFeatures(oneshot::Canceled),
|
||||
|
||||
#[error("No state for the relay parent")]
|
||||
RelayParentStateNotFound,
|
||||
|
||||
#[error("Error while accessing Runtime API")]
|
||||
RuntimeApi(#[from] RuntimeApiError),
|
||||
}
|
||||
|
||||
/// An error occurred when attempting to start seconding a candidate.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum SecondingError {
|
||||
#[error("Error while accessing Runtime API")]
|
||||
RuntimeApi(#[from] RuntimeApiError),
|
||||
|
||||
#[error("Response receiver for persisted validation data request cancelled")]
|
||||
CancelledRuntimePersistedValidationData(oneshot::Canceled),
|
||||
|
||||
#[error("Response receiver for prospective validation data request cancelled")]
|
||||
CancelledProspectiveValidationData(oneshot::Canceled),
|
||||
|
||||
#[error("Persisted validation data is not available")]
|
||||
PersistedValidationDataNotFound,
|
||||
|
||||
#[error("Persisted validation data hash doesn't match one in the candidate receipt.")]
|
||||
PersistedValidationDataMismatch,
|
||||
|
||||
#[error("Candidate hash doesn't match the advertisement")]
|
||||
CandidateHashMismatch,
|
||||
|
||||
#[error("Relay parent hash doesn't match the advertisement")]
|
||||
RelayParentMismatch,
|
||||
|
||||
#[error("Received duplicate collation from the peer")]
|
||||
Duplicate,
|
||||
|
||||
#[error("The provided parent head data does not match the hash")]
|
||||
ParentHeadDataMismatch,
|
||||
|
||||
#[error("Core index {0} present in descriptor is different than the assigned core {1}")]
|
||||
InvalidCoreIndex(u32, u32),
|
||||
|
||||
#[error("Session index {0} present in descriptor is different than the expected one {1}")]
|
||||
InvalidSessionIndex(u32, u32),
|
||||
|
||||
#[error("Invalid candidate receipt version {0:?}")]
|
||||
InvalidReceiptVersion(CandidateDescriptorVersion),
|
||||
}
|
||||
|
||||
impl SecondingError {
|
||||
/// Returns true if an error indicates that a peer is malicious.
|
||||
pub fn is_malicious(&self) -> bool {
|
||||
use SecondingError::*;
|
||||
matches!(
|
||||
self,
|
||||
PersistedValidationDataMismatch |
|
||||
CandidateHashMismatch |
|
||||
RelayParentMismatch |
|
||||
ParentHeadDataMismatch |
|
||||
InvalidCoreIndex(_, _) |
|
||||
InvalidSessionIndex(_, _) |
|
||||
InvalidReceiptVersion(_)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Failed to request a collation due to an error.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum FetchError {
|
||||
#[error("Collation was not previously advertised")]
|
||||
NotAdvertised,
|
||||
|
||||
#[error("Peer is unknown")]
|
||||
UnknownPeer,
|
||||
|
||||
#[error("Collation was already requested")]
|
||||
AlreadyRequested,
|
||||
|
||||
#[error("Relay parent went out of view")]
|
||||
RelayParentOutOfView,
|
||||
|
||||
#[error("Peer's protocol doesn't match the advertisement")]
|
||||
ProtocolMismatch,
|
||||
}
|
||||
|
||||
/// Represents a `RelayParentHoldOffState` error
|
||||
#[derive(Debug, Error)]
|
||||
pub enum HoldOffError {
|
||||
#[error("`on_hold_off_complete` called in `NotStarted`")]
|
||||
InvalidStateNotStarted,
|
||||
#[error("`on_hold_off_complete` called in `Done`")]
|
||||
InvalidStateDone,
|
||||
#[error("`on_hold_off_complete` called in the right state but there are no advertisements in the queue")]
|
||||
QueueEmpty,
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_subsystem_util::metrics::{self, prometheus};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
impl Metrics {
|
||||
pub fn on_request(&self, succeeded: std::result::Result<(), ()>) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
match succeeded {
|
||||
Ok(()) => metrics.collation_requests.with_label_values(&["succeeded"]).inc(),
|
||||
Err(()) => metrics.collation_requests.with_label_values(&["failed"]).inc(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Provide a timer for `process_msg` which observes on drop.
|
||||
pub fn time_process_msg(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.process_msg.start_timer())
|
||||
}
|
||||
|
||||
/// Provide a timer for `handle_collation_request_result` which observes on drop.
|
||||
pub fn time_handle_collation_request_result(
|
||||
&self,
|
||||
) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0
|
||||
.as_ref()
|
||||
.map(|metrics| metrics.handle_collation_request_result.start_timer())
|
||||
}
|
||||
|
||||
/// Note the current number of collator peers.
|
||||
pub fn note_collator_peer_count(&self, collator_peers: usize) {
|
||||
self.0
|
||||
.as_ref()
|
||||
.map(|metrics| metrics.collator_peer_count.set(collator_peers as u64));
|
||||
}
|
||||
|
||||
/// Provide a timer for `CollationFetchRequest` structure which observes on drop.
|
||||
pub fn time_collation_request_duration(
|
||||
&self,
|
||||
) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.collation_request_duration.start_timer())
|
||||
}
|
||||
|
||||
/// Provide a timer for `request_unblocked_collations` which observes on drop.
|
||||
pub fn time_request_unblocked_collations(
|
||||
&self,
|
||||
) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0
|
||||
.as_ref()
|
||||
.map(|metrics| metrics.request_unblocked_collations.start_timer())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
collation_requests: prometheus::CounterVec<prometheus::U64>,
|
||||
process_msg: prometheus::Histogram,
|
||||
handle_collation_request_result: prometheus::Histogram,
|
||||
collator_peer_count: prometheus::Gauge<prometheus::U64>,
|
||||
collation_request_duration: prometheus::Histogram,
|
||||
request_unblocked_collations: prometheus::Histogram,
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(
|
||||
registry: &prometheus::Registry,
|
||||
) -> std::result::Result<Self, prometheus::PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
collation_requests: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_teyrchain_collation_requests_total",
|
||||
"Number of collations requested from Collators.",
|
||||
),
|
||||
&["success"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
process_msg: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collator_protocol_validator_process_msg",
|
||||
"Time spent within `collator_protocol_validator::process_msg`",
|
||||
)
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
handle_collation_request_result: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collator_protocol_validator_handle_collation_request_result",
|
||||
"Time spent within `collator_protocol_validator::handle_collation_request_result`",
|
||||
)
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
collator_peer_count: prometheus::register(
|
||||
prometheus::Gauge::new(
|
||||
"pezkuwi_teyrchain_collator_peer_count",
|
||||
"Amount of collator peers connected",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
collation_request_duration: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collator_protocol_validator_collation_request_duration",
|
||||
"Lifetime of the `CollationFetchRequest` structure",
|
||||
).buckets(vec![0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.75, 0.9, 1.0, 1.2, 1.5, 1.75]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
request_unblocked_collations: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_collator_protocol_validator_request_unblocked_collations",
|
||||
"Time spent within `collator_protocol_validator::request_unblocked_collations`",
|
||||
)
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+3080
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,159 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::num::NonZeroU16;
|
||||
|
||||
use pezkuwi_node_network_protocol::peer_set::CollationVersion;
|
||||
use pezkuwi_primitives::Id as ParaId;
|
||||
|
||||
/// Maximum reputation score.
|
||||
pub const MAX_SCORE: u16 = 5000;
|
||||
|
||||
/// Limit for the total number connected peers.
|
||||
pub const CONNECTED_PEERS_LIMIT: NonZeroU16 = NonZeroU16::new(300).expect("300 is greater than 0");
|
||||
|
||||
/// Limit for the total number of connected peers for a paraid.
|
||||
/// Must be smaller than `CONNECTED_PEERS_LIMIT`.
|
||||
pub const CONNECTED_PEERS_PARA_LIMIT: NonZeroU16 = const {
|
||||
assert!(CONNECTED_PEERS_LIMIT.get() >= 100);
|
||||
NonZeroU16::new(100).expect("100 is greater than 0")
|
||||
};
|
||||
|
||||
/// Maximum number of relay parents to process for reputation bumps on startup and between finality
|
||||
/// notifications.
|
||||
pub const MAX_STARTUP_ANCESTRY_LOOKBACK: u32 = 20;
|
||||
|
||||
/// Reputation bump for getting a valid candidate included.
|
||||
pub const VALID_INCLUDED_CANDIDATE_BUMP: u16 = 50;
|
||||
|
||||
/// Reputation slash for peer inactivity (for each included candidate of the para that was not
|
||||
/// authored by the peer)
|
||||
pub const INACTIVITY_DECAY: u16 = 1;
|
||||
|
||||
/// Maximum number of stored peer scores for a paraid. Should be greater than
|
||||
/// `CONNECTED_PEERS_PARA_LIMIT`.
|
||||
pub const MAX_STORED_SCORES_PER_PARA: u8 = 150;
|
||||
/// Reputation score type.
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy, Default)]
|
||||
pub struct Score(u16);
|
||||
|
||||
impl Score {
|
||||
/// Create a new instance. Fail if over the `MAX_SCORE`.
|
||||
pub const fn new(val: u16) -> Option<Self> {
|
||||
if val > MAX_SCORE {
|
||||
None
|
||||
} else {
|
||||
Some(Self(val))
|
||||
}
|
||||
}
|
||||
|
||||
/// Add `val` to the inner value, saturating at `MAX_SCORE`.
|
||||
pub fn saturating_add(&mut self, val: u16) {
|
||||
if (self.0 + val) <= MAX_SCORE {
|
||||
self.0 += val;
|
||||
} else {
|
||||
self.0 = MAX_SCORE;
|
||||
}
|
||||
}
|
||||
|
||||
/// Subtract `val` from the inner value, saturating at 0.
|
||||
pub fn saturating_sub(&mut self, val: u16) {
|
||||
self.0 = self.0.saturating_sub(val);
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Score> for u16 {
|
||||
fn from(value: Score) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about a connected peer.
|
||||
#[derive(PartialEq, Debug, Clone)]
|
||||
pub struct PeerInfo {
|
||||
/// Protocol version.
|
||||
pub version: CollationVersion,
|
||||
/// State of the peer.
|
||||
pub state: PeerState,
|
||||
}
|
||||
|
||||
/// State of a connected peer
|
||||
#[derive(PartialEq, Debug, Clone)]
|
||||
pub enum PeerState {
|
||||
/// Connected.
|
||||
Connected,
|
||||
/// Peer has declared.
|
||||
Collating(ParaId),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// Test that the `Score` functions are working correctly.
|
||||
#[test]
|
||||
fn score_functions() {
|
||||
assert!(MAX_SCORE > 50);
|
||||
|
||||
// Test that the constructor returns None for values that exceed the limit.
|
||||
for score in (0..MAX_SCORE).step_by(10) {
|
||||
assert_eq!(u16::from(Score::new(score).unwrap()), score);
|
||||
}
|
||||
assert_eq!(u16::from(Score::new(MAX_SCORE).unwrap()), MAX_SCORE);
|
||||
for score in ((MAX_SCORE + 1)..(MAX_SCORE + 50)).step_by(5) {
|
||||
assert_eq!(Score::new(score), None);
|
||||
}
|
||||
|
||||
// Test saturating arithmetic functions.
|
||||
let score = Score::new(50).unwrap();
|
||||
|
||||
// Test addition with value that does not go over the limit.
|
||||
for other_score in (0..(MAX_SCORE - 50)).step_by(10) {
|
||||
let expected_value = u16::from(score) + other_score;
|
||||
|
||||
let mut score = score;
|
||||
score.saturating_add(other_score);
|
||||
|
||||
assert_eq!(expected_value, u16::from(score));
|
||||
}
|
||||
|
||||
// Test overflowing addition.
|
||||
for other_score in ((MAX_SCORE - 50)..MAX_SCORE).step_by(10) {
|
||||
let mut score = score;
|
||||
score.saturating_add(other_score);
|
||||
|
||||
assert_eq!(MAX_SCORE, u16::from(score));
|
||||
}
|
||||
|
||||
// Test subtraction with value that does not go under zero.
|
||||
for other_score in (0..50).step_by(10) {
|
||||
let expected_value = u16::from(score) - other_score;
|
||||
|
||||
let mut score = score;
|
||||
score.saturating_sub(other_score);
|
||||
|
||||
assert_eq!(expected_value, u16::from(score));
|
||||
}
|
||||
|
||||
// Test underflowing subtraction.
|
||||
for other_score in (50..100).step_by(10) {
|
||||
let mut score = score;
|
||||
score.saturating_sub(other_score);
|
||||
|
||||
assert_eq!(0, u16::from(score));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use fatality::Nested;
|
||||
use pezkuwi_node_subsystem::{ChainApiError, SubsystemError};
|
||||
use pezkuwi_node_subsystem_util::runtime;
|
||||
use pezkuwi_primitives::Hash;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type FatalResult<T> = std::result::Result<T, FatalError>;
|
||||
|
||||
#[fatality::fatality(splitable)]
|
||||
pub enum Error {
|
||||
#[fatal]
|
||||
#[error("Oneshot for receiving ancestors from chain API got cancelled")]
|
||||
CanceledAncestors,
|
||||
#[fatal]
|
||||
#[error("Oneshot for receiving finalized block number from chain API got cancelled")]
|
||||
CanceledFinalizedBlockNumber,
|
||||
#[fatal]
|
||||
#[error("Oneshot for receiving finalized block hash from chain API got cancelled")]
|
||||
CanceledFinalizedBlockHash,
|
||||
#[error("Finalized block hash for {0} not found")]
|
||||
FinalizedBlockNotFound(u32),
|
||||
#[error(transparent)]
|
||||
ChainApi(#[from] ChainApiError),
|
||||
#[fatal(forward)]
|
||||
#[error("Error while accessing runtime information {0}")]
|
||||
Runtime(#[from] runtime::Error),
|
||||
#[fatal]
|
||||
#[error("Receiving message from overseer failed: {0}")]
|
||||
SubsystemReceive(#[source] SubsystemError),
|
||||
}
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error. This utility function is meant to
|
||||
/// consume top-level errors by simply logging them
|
||||
pub fn log_error(result: Result<()>) -> FatalResult<()> {
|
||||
match result.into_nested()? {
|
||||
Ok(()) => Ok(()),
|
||||
Err(jfyi) => {
|
||||
jfyi.log();
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
impl JfyiError {
|
||||
/// Log a `JfyiError`.
|
||||
pub fn log(self) {
|
||||
gum::warn!(target: LOG_TARGET, error = ?self);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_subsystem_util::metrics::{self, prometheus};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Metrics;
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(
|
||||
_registry: &prometheus::Registry,
|
||||
) -> std::result::Result<Self, prometheus::PrometheusError> {
|
||||
Ok(Metrics)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,144 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#![allow(unused)]
|
||||
|
||||
// See reasoning in Cargo.toml why this temporary useless import is needed.
|
||||
use tokio as _;
|
||||
|
||||
mod common;
|
||||
mod error;
|
||||
mod metrics;
|
||||
mod peer_manager;
|
||||
mod state;
|
||||
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use common::MAX_STORED_SCORES_PER_PARA;
|
||||
use error::{log_error, FatalError, FatalResult, Result};
|
||||
use fatality::Split;
|
||||
use peer_manager::{Db, PeerManager};
|
||||
use pezkuwi_node_subsystem::{
|
||||
overseer, ActivatedLeaf, CollatorProtocolSenderTrait, FromOrchestra, OverseerSignal,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util::{
|
||||
find_validator_group, request_claim_queue, request_validator_groups, request_validators,
|
||||
runtime::recv_runtime, signing_key_and_index,
|
||||
};
|
||||
use pezkuwi_primitives::{Hash, Id as ParaId};
|
||||
use sp_keystore::KeystorePtr;
|
||||
use state::State;
|
||||
|
||||
pub use metrics::Metrics;
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
/// The main run loop.
|
||||
#[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)]
|
||||
pub(crate) async fn run<Context>(
|
||||
mut ctx: Context,
|
||||
keystore: KeystorePtr,
|
||||
metrics: Metrics,
|
||||
) -> FatalResult<()> {
|
||||
if let Some(_state) = initialize(&mut ctx, keystore, metrics).await? {
|
||||
// run_inner(state);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)]
|
||||
async fn initialize<Context>(
|
||||
ctx: &mut Context,
|
||||
keystore: KeystorePtr,
|
||||
metrics: Metrics,
|
||||
) -> FatalResult<Option<State<Db>>> {
|
||||
loop {
|
||||
let first_leaf = match wait_for_first_leaf(ctx).await? {
|
||||
Some(activated_leaf) => activated_leaf,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
let scheduled_paras = match scheduled_paras(ctx.sender(), first_leaf.hash, &keystore).await
|
||||
{
|
||||
Ok(paras) => paras,
|
||||
Err(err) => {
|
||||
log_error(Err(err))?;
|
||||
continue;
|
||||
},
|
||||
};
|
||||
|
||||
let backend = Db::new(MAX_STORED_SCORES_PER_PARA).await;
|
||||
|
||||
match PeerManager::startup(backend, ctx.sender(), scheduled_paras.into_iter().collect())
|
||||
.await
|
||||
{
|
||||
Ok(peer_manager) => return Ok(Some(State::new(peer_manager, keystore, metrics))),
|
||||
Err(err) => {
|
||||
log_error(Err(err))?;
|
||||
continue;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wait for `ActiveLeavesUpdate`, returns `None` if `Conclude` signal came first.
|
||||
#[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)]
|
||||
async fn wait_for_first_leaf<Context>(ctx: &mut Context) -> FatalResult<Option<ActivatedLeaf>> {
|
||||
loop {
|
||||
match ctx.recv().await.map_err(FatalError::SubsystemReceive)? {
|
||||
FromOrchestra::Signal(OverseerSignal::Conclude) => return Ok(None),
|
||||
FromOrchestra::Signal(OverseerSignal::ActiveLeaves(update)) => {
|
||||
if let Some(activated) = update.activated {
|
||||
return Ok(Some(activated));
|
||||
}
|
||||
},
|
||||
FromOrchestra::Signal(OverseerSignal::BlockFinalized(_, _)) => {},
|
||||
FromOrchestra::Communication { msg } => {
|
||||
// TODO: we should actually disconnect peers connected on collation protocol while
|
||||
// we're still bootstrapping. OR buffer these messages until we've bootstrapped.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?msg,
|
||||
"Received msg before first active leaves update. This is not expected - message will be dropped."
|
||||
)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn scheduled_paras<Sender: CollatorProtocolSenderTrait>(
|
||||
sender: &mut Sender,
|
||||
hash: Hash,
|
||||
keystore: &KeystorePtr,
|
||||
) -> Result<VecDeque<ParaId>> {
|
||||
let validators = recv_runtime(request_validators(hash, sender).await).await?;
|
||||
|
||||
let (groups, rotation_info) =
|
||||
recv_runtime(request_validator_groups(hash, sender).await).await?;
|
||||
|
||||
let core_now = if let Some(group) = signing_key_and_index(&validators, keystore)
|
||||
.and_then(|(_, index)| find_validator_group(&groups, index))
|
||||
{
|
||||
rotation_info.core_for_group(group, groups.len())
|
||||
} else {
|
||||
gum::trace!(target: LOG_TARGET, ?hash, "Not a validator");
|
||||
return Ok(VecDeque::new());
|
||||
};
|
||||
|
||||
let mut claim_queue = recv_runtime(request_claim_queue(hash, sender).await).await?;
|
||||
Ok(claim_queue.remove(&core_now).unwrap_or_else(|| VecDeque::new()))
|
||||
}
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::validator_side_experimental::{common::Score, peer_manager::ReputationUpdate};
|
||||
use async_trait::async_trait;
|
||||
use pezkuwi_node_network_protocol::PeerId;
|
||||
use pezkuwi_primitives::{BlockNumber, Id as ParaId};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
|
||||
/// Trait describing the interface of the reputation database.
|
||||
#[async_trait]
|
||||
pub trait Backend {
|
||||
/// Return the latest finalized block for which the backend processed bumps.
|
||||
async fn processed_finalized_block_number(&self) -> Option<BlockNumber>;
|
||||
/// Get the peer's stored reputation for this paraid, if any.
|
||||
async fn query(&self, peer_id: &PeerId, para_id: &ParaId) -> Option<Score>;
|
||||
/// Slash the peer's reputation for this paraid, with the given value.
|
||||
async fn slash(&mut self, peer_id: &PeerId, para_id: &ParaId, value: Score);
|
||||
/// Prune all data for paraids that are no longer in this registered set.
|
||||
async fn prune_paras(&mut self, registered_paras: BTreeSet<ParaId>);
|
||||
/// Process the reputation bumps, returning all the reputation changes that were done in
|
||||
/// consequence. This is needed because a reputation bump for a para also means a reputation
|
||||
/// decay for the other collators of that para (if the `decay_value` param is present) and
|
||||
/// because if the number of stored reputations go over the `stored_limit_per_para`, we'll 100%
|
||||
/// slash the least recently bumped peers. `leaf_number` needs to be at least equal to the
|
||||
/// `processed_finalized_block_number`
|
||||
async fn process_bumps(
|
||||
&mut self,
|
||||
leaf_number: BlockNumber,
|
||||
bumps: BTreeMap<ParaId, HashMap<PeerId, Score>>,
|
||||
decay_value: Option<Score>,
|
||||
) -> Vec<ReputationUpdate>;
|
||||
}
|
||||
+1280
File diff suppressed because it is too large
Load Diff
+765
@@ -0,0 +1,765 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::validator_side_experimental::{
|
||||
common::Score,
|
||||
peer_manager::{backend::Backend, ReputationUpdate, ReputationUpdateKind},
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use pezkuwi_node_network_protocol::PeerId;
|
||||
use pezkuwi_primitives::{BlockNumber, Hash, Id as ParaId};
|
||||
use std::{
|
||||
collections::{btree_map, hash_map, BTreeMap, BTreeSet, HashMap},
|
||||
time::{SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
|
||||
/// This is an in-memory temporary implementation for the DB, to be used only for prototyping and
|
||||
/// testing purposes.
|
||||
pub struct Db {
|
||||
db: BTreeMap<ParaId, HashMap<PeerId, ScoreEntry>>,
|
||||
last_finalized: Option<BlockNumber>,
|
||||
stored_limit_per_para: u8,
|
||||
}
|
||||
|
||||
impl Db {
|
||||
/// Create a new instance of the in-memory DB.
|
||||
///
|
||||
/// `stored_limit_per_para` is the maximum number of reputations that can be stored per para.
|
||||
pub async fn new(stored_limit_per_para: u8) -> Self {
|
||||
Self { db: BTreeMap::new(), last_finalized: None, stored_limit_per_para }
|
||||
}
|
||||
}
|
||||
|
||||
type Timestamp = u128;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct ScoreEntry {
|
||||
score: Score,
|
||||
last_bumped: Timestamp,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Backend for Db {
|
||||
async fn processed_finalized_block_number(&self) -> Option<BlockNumber> {
|
||||
self.last_finalized
|
||||
}
|
||||
|
||||
async fn query(&self, peer_id: &PeerId, para_id: &ParaId) -> Option<Score> {
|
||||
self.db.get(para_id).and_then(|per_para| per_para.get(peer_id).map(|e| e.score))
|
||||
}
|
||||
|
||||
async fn slash(&mut self, peer_id: &PeerId, para_id: &ParaId, value: Score) {
|
||||
if let btree_map::Entry::Occupied(mut per_para_entry) = self.db.entry(*para_id) {
|
||||
if let hash_map::Entry::Occupied(mut e) = per_para_entry.get_mut().entry(*peer_id) {
|
||||
let score = e.get_mut().score;
|
||||
// Remove the entry if it goes to zero.
|
||||
if score <= value {
|
||||
e.remove();
|
||||
} else {
|
||||
e.get_mut().score.saturating_sub(value.into());
|
||||
}
|
||||
}
|
||||
|
||||
// If the per_para length went to 0, remove it completely
|
||||
if per_para_entry.get().is_empty() {
|
||||
per_para_entry.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn prune_paras(&mut self, registered_paras: BTreeSet<ParaId>) {
|
||||
self.db.retain(|para, _| registered_paras.contains(¶));
|
||||
}
|
||||
|
||||
async fn process_bumps(
|
||||
&mut self,
|
||||
leaf_number: BlockNumber,
|
||||
bumps: BTreeMap<ParaId, HashMap<PeerId, Score>>,
|
||||
decay_value: Option<Score>,
|
||||
) -> Vec<ReputationUpdate> {
|
||||
if self.last_finalized.unwrap_or(0) >= leaf_number {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
self.last_finalized = Some(leaf_number);
|
||||
self.bump_reputations(bumps, decay_value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Db {
|
||||
fn bump_reputations(
|
||||
&mut self,
|
||||
bumps: BTreeMap<ParaId, HashMap<PeerId, Score>>,
|
||||
maybe_decay_value: Option<Score>,
|
||||
) -> Vec<ReputationUpdate> {
|
||||
let mut reported_updates = vec![];
|
||||
let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_millis();
|
||||
|
||||
for (para, bumps_per_para) in bumps {
|
||||
reported_updates.reserve(bumps_per_para.len());
|
||||
|
||||
for (peer_id, bump) in bumps_per_para.iter() {
|
||||
if u16::from(*bump) == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.db
|
||||
.entry(para)
|
||||
.or_default()
|
||||
.entry(*peer_id)
|
||||
.and_modify(|e| {
|
||||
e.score.saturating_add(u16::from(*bump));
|
||||
e.last_bumped = now;
|
||||
})
|
||||
.or_insert(ScoreEntry { score: *bump, last_bumped: now });
|
||||
|
||||
reported_updates.push(ReputationUpdate {
|
||||
peer_id: *peer_id,
|
||||
para_id: para,
|
||||
value: *bump,
|
||||
kind: ReputationUpdateKind::Bump,
|
||||
});
|
||||
}
|
||||
|
||||
if let btree_map::Entry::Occupied(mut per_para_entry) = self.db.entry(para) {
|
||||
if let Some(decay_value) = maybe_decay_value {
|
||||
let peers_to_slash = per_para_entry
|
||||
.get()
|
||||
.keys()
|
||||
.filter(|peer_id| !bumps_per_para.contains_key(peer_id))
|
||||
.copied()
|
||||
.collect::<Vec<PeerId>>();
|
||||
|
||||
for peer_id in peers_to_slash {
|
||||
if let hash_map::Entry::Occupied(mut e) =
|
||||
per_para_entry.get_mut().entry(peer_id)
|
||||
{
|
||||
// Remove the entry if it goes to zero.
|
||||
if e.get_mut().score <= decay_value {
|
||||
let score = e.remove().score;
|
||||
reported_updates.push(ReputationUpdate {
|
||||
peer_id,
|
||||
para_id: para,
|
||||
value: score,
|
||||
kind: ReputationUpdateKind::Slash,
|
||||
});
|
||||
} else {
|
||||
e.get_mut().score.saturating_sub(decay_value.into());
|
||||
reported_updates.push(ReputationUpdate {
|
||||
peer_id,
|
||||
para_id: para,
|
||||
value: decay_value,
|
||||
kind: ReputationUpdateKind::Slash,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let per_para_limit = self.stored_limit_per_para as usize;
|
||||
if per_para_entry.get().is_empty() {
|
||||
// If the per_para length went to 0, remove it completely
|
||||
per_para_entry.remove();
|
||||
} else if per_para_entry.get().len() > per_para_limit {
|
||||
// We have exceeded the maximum capacity, in which case we need to prune
|
||||
// the least recently bumped values
|
||||
let diff = per_para_entry.get().len() - per_para_limit;
|
||||
Self::prune_for_para(¶, &mut per_para_entry, diff, &mut reported_updates);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reported_updates
|
||||
}
|
||||
|
||||
fn prune_for_para(
|
||||
para_id: &ParaId,
|
||||
per_para: &mut btree_map::OccupiedEntry<ParaId, HashMap<PeerId, ScoreEntry>>,
|
||||
diff: usize,
|
||||
reported_updates: &mut Vec<ReputationUpdate>,
|
||||
) {
|
||||
for _ in 0..diff {
|
||||
let (peer_id_to_remove, score) = per_para
|
||||
.get()
|
||||
.iter()
|
||||
.min_by_key(|(_peer, entry)| entry.last_bumped)
|
||||
.map(|(peer, entry)| (*peer, entry.score))
|
||||
.expect("We know there are enough reps over the limit");
|
||||
|
||||
per_para.get_mut().remove(&peer_id_to_remove);
|
||||
|
||||
reported_updates.push(ReputationUpdate {
|
||||
peer_id: peer_id_to_remove,
|
||||
para_id: *para_id,
|
||||
value: score,
|
||||
kind: ReputationUpdateKind::Slash,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn len(&self) -> usize {
|
||||
self.db.len()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
// Test different types of reputation updates and their effects.
|
||||
async fn test_reputation_updates() {
|
||||
let mut db = Db::new(10).await;
|
||||
assert_eq!(db.processed_finalized_block_number().await, None);
|
||||
assert_eq!(db.len(), 0);
|
||||
|
||||
// Test empty update with no decay.
|
||||
assert!(db.process_bumps(10, Default::default(), None).await.is_empty());
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(10));
|
||||
assert_eq!(db.len(), 0);
|
||||
|
||||
// Test a query on a non-existant entry.
|
||||
assert_eq!(db.query(&PeerId::random(), &ParaId::from(1000)).await, None);
|
||||
|
||||
// Test empty update with decay.
|
||||
assert!(db
|
||||
.process_bumps(11, Default::default(), Some(Score::new(1).unwrap()))
|
||||
.await
|
||||
.is_empty());
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(11));
|
||||
assert_eq!(db.len(), 0);
|
||||
|
||||
// Test empty update with a leaf number smaller than the latest one.
|
||||
assert!(db
|
||||
.process_bumps(5, Default::default(), Some(Score::new(1).unwrap()))
|
||||
.await
|
||||
.is_empty());
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(11));
|
||||
assert_eq!(db.len(), 0);
|
||||
|
||||
// Test an update with zeroed score.
|
||||
assert!(db
|
||||
.process_bumps(
|
||||
12,
|
||||
[(
|
||||
ParaId::from(100),
|
||||
[(PeerId::random(), Score::new(0).unwrap())].into_iter().collect()
|
||||
)]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(1).unwrap())
|
||||
)
|
||||
.await
|
||||
.is_empty());
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(12));
|
||||
assert_eq!(db.len(), 0);
|
||||
|
||||
// Reuse the same 12 block height, it should not be taken into consideration.
|
||||
let first_peer_id = PeerId::random();
|
||||
let first_para_id = ParaId::from(100);
|
||||
assert!(db
|
||||
.process_bumps(
|
||||
12,
|
||||
[(first_para_id, [(first_peer_id, Score::new(10).unwrap())].into_iter().collect())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(1).unwrap())
|
||||
)
|
||||
.await
|
||||
.is_empty());
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(12));
|
||||
assert_eq!(db.len(), 0);
|
||||
assert_eq!(db.query(&first_peer_id, &first_para_id).await, None);
|
||||
|
||||
// Test a non-zero update on an empty DB.
|
||||
assert_eq!(
|
||||
db.process_bumps(
|
||||
13,
|
||||
[(first_para_id, [(first_peer_id, Score::new(10).unwrap())].into_iter().collect())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(1).unwrap())
|
||||
)
|
||||
.await,
|
||||
vec![ReputationUpdate {
|
||||
peer_id: first_peer_id,
|
||||
para_id: first_para_id,
|
||||
kind: ReputationUpdateKind::Bump,
|
||||
value: Score::new(10).unwrap()
|
||||
}]
|
||||
);
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(13));
|
||||
assert_eq!(db.len(), 1);
|
||||
assert_eq!(
|
||||
db.query(&first_peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(10).unwrap()
|
||||
);
|
||||
// Query a non-existant peer_id for this para.
|
||||
assert_eq!(db.query(&PeerId::random(), &first_para_id).await, None);
|
||||
// Query this peer's rep for a different para.
|
||||
assert_eq!(db.query(&first_peer_id, &ParaId::from(200)).await, None);
|
||||
|
||||
// Test a subsequent update with a lower block height. Will be ignored.
|
||||
assert!(db
|
||||
.process_bumps(
|
||||
10,
|
||||
[(first_para_id, [(first_peer_id, Score::new(10).unwrap())].into_iter().collect())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(1).unwrap())
|
||||
)
|
||||
.await
|
||||
.is_empty());
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(13));
|
||||
assert_eq!(db.len(), 1);
|
||||
assert_eq!(
|
||||
db.query(&first_peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(10).unwrap()
|
||||
);
|
||||
|
||||
let second_para_id = ParaId::from(200);
|
||||
let second_peer_id = PeerId::random();
|
||||
// Test a subsequent update with no decay.
|
||||
assert_eq!(
|
||||
db.process_bumps(
|
||||
14,
|
||||
[
|
||||
(
|
||||
first_para_id,
|
||||
[(second_peer_id, Score::new(10).unwrap())].into_iter().collect()
|
||||
),
|
||||
(
|
||||
second_para_id,
|
||||
[(first_peer_id, Score::new(5).unwrap())].into_iter().collect()
|
||||
)
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
None
|
||||
)
|
||||
.await,
|
||||
vec![
|
||||
ReputationUpdate {
|
||||
peer_id: second_peer_id,
|
||||
para_id: first_para_id,
|
||||
kind: ReputationUpdateKind::Bump,
|
||||
value: Score::new(10).unwrap()
|
||||
},
|
||||
ReputationUpdate {
|
||||
peer_id: first_peer_id,
|
||||
para_id: second_para_id,
|
||||
kind: ReputationUpdateKind::Bump,
|
||||
value: Score::new(5).unwrap()
|
||||
}
|
||||
]
|
||||
);
|
||||
assert_eq!(db.len(), 2);
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(14));
|
||||
assert_eq!(
|
||||
db.query(&first_peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(10).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
db.query(&second_peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(10).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
db.query(&first_peer_id, &second_para_id).await.unwrap(),
|
||||
Score::new(5).unwrap()
|
||||
);
|
||||
|
||||
// Empty update with decay has no effect.
|
||||
assert!(db
|
||||
.process_bumps(15, Default::default(), Some(Score::new(1).unwrap()))
|
||||
.await
|
||||
.is_empty());
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(15));
|
||||
assert_eq!(db.len(), 2);
|
||||
assert_eq!(
|
||||
db.query(&first_peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(10).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
db.query(&second_peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(10).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
db.query(&first_peer_id, &second_para_id).await.unwrap(),
|
||||
Score::new(5).unwrap()
|
||||
);
|
||||
|
||||
// Test a subsequent update with decay.
|
||||
assert_eq!(
|
||||
db.process_bumps(
|
||||
16,
|
||||
[
|
||||
(
|
||||
first_para_id,
|
||||
[(first_peer_id, Score::new(10).unwrap())].into_iter().collect()
|
||||
),
|
||||
(
|
||||
second_para_id,
|
||||
[(second_peer_id, Score::new(10).unwrap())].into_iter().collect()
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(1).unwrap())
|
||||
)
|
||||
.await,
|
||||
vec![
|
||||
ReputationUpdate {
|
||||
peer_id: first_peer_id,
|
||||
para_id: first_para_id,
|
||||
kind: ReputationUpdateKind::Bump,
|
||||
value: Score::new(10).unwrap()
|
||||
},
|
||||
ReputationUpdate {
|
||||
peer_id: second_peer_id,
|
||||
para_id: first_para_id,
|
||||
kind: ReputationUpdateKind::Slash,
|
||||
value: Score::new(1).unwrap()
|
||||
},
|
||||
ReputationUpdate {
|
||||
peer_id: second_peer_id,
|
||||
para_id: second_para_id,
|
||||
kind: ReputationUpdateKind::Bump,
|
||||
value: Score::new(10).unwrap()
|
||||
},
|
||||
ReputationUpdate {
|
||||
peer_id: first_peer_id,
|
||||
para_id: second_para_id,
|
||||
kind: ReputationUpdateKind::Slash,
|
||||
value: Score::new(1).unwrap()
|
||||
},
|
||||
]
|
||||
);
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(16));
|
||||
assert_eq!(db.len(), 2);
|
||||
assert_eq!(
|
||||
db.query(&first_peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(20).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
db.query(&second_peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(9).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
db.query(&first_peer_id, &second_para_id).await.unwrap(),
|
||||
Score::new(4).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
db.query(&second_peer_id, &second_para_id).await.unwrap(),
|
||||
Score::new(10).unwrap()
|
||||
);
|
||||
|
||||
// Test a decay that makes the reputation go to 0 (The peer's entry will be removed)
|
||||
assert_eq!(
|
||||
db.process_bumps(
|
||||
17,
|
||||
[(
|
||||
second_para_id,
|
||||
[(second_peer_id, Score::new(10).unwrap())].into_iter().collect()
|
||||
),]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(5).unwrap())
|
||||
)
|
||||
.await,
|
||||
vec![
|
||||
ReputationUpdate {
|
||||
peer_id: second_peer_id,
|
||||
para_id: second_para_id,
|
||||
kind: ReputationUpdateKind::Bump,
|
||||
value: Score::new(10).unwrap()
|
||||
},
|
||||
ReputationUpdate {
|
||||
peer_id: first_peer_id,
|
||||
para_id: second_para_id,
|
||||
kind: ReputationUpdateKind::Slash,
|
||||
value: Score::new(4).unwrap()
|
||||
}
|
||||
]
|
||||
);
|
||||
assert_eq!(db.processed_finalized_block_number().await, Some(17));
|
||||
assert_eq!(db.len(), 2);
|
||||
assert_eq!(
|
||||
db.query(&first_peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(20).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
db.query(&second_peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(9).unwrap()
|
||||
);
|
||||
assert_eq!(db.query(&first_peer_id, &second_para_id).await, None);
|
||||
assert_eq!(
|
||||
db.query(&second_peer_id, &second_para_id).await.unwrap(),
|
||||
Score::new(20).unwrap()
|
||||
);
|
||||
|
||||
// Test an update which ends up pruning least recently used entries. The per-para limit is
|
||||
// 10.
|
||||
let mut db = Db::new(10).await;
|
||||
let peer_ids = (0..10).map(|_| PeerId::random()).collect::<Vec<_>>();
|
||||
|
||||
// Add an equal reputation for all peers.
|
||||
assert_eq!(
|
||||
db.process_bumps(
|
||||
1,
|
||||
[(
|
||||
first_para_id,
|
||||
peer_ids.iter().map(|peer_id| (*peer_id, Score::new(10).unwrap())).collect()
|
||||
)]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.len(),
|
||||
10
|
||||
);
|
||||
assert_eq!(db.len(), 1);
|
||||
|
||||
for peer_id in peer_ids.iter() {
|
||||
assert_eq!(db.query(peer_id, &first_para_id).await.unwrap(), Score::new(10).unwrap());
|
||||
}
|
||||
|
||||
// Now sleep for one second and then bump the reputations of all peers except for the one
|
||||
// with 4th index. We need to sleep so that the update time of the 4th peer is older than
|
||||
// the rest.
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
assert_eq!(
|
||||
db.process_bumps(
|
||||
2,
|
||||
[(
|
||||
first_para_id,
|
||||
peer_ids
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(
|
||||
|(i, peer_id)| (i != 4).then_some((*peer_id, Score::new(10).unwrap()))
|
||||
)
|
||||
.collect()
|
||||
)]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(5).unwrap()),
|
||||
)
|
||||
.await
|
||||
.len(),
|
||||
10
|
||||
);
|
||||
|
||||
for (i, peer_id) in peer_ids.iter().enumerate() {
|
||||
if i == 4 {
|
||||
assert_eq!(
|
||||
db.query(peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(5).unwrap()
|
||||
);
|
||||
} else {
|
||||
assert_eq!(
|
||||
db.query(peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(20).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Now add a 11th peer. It should evict the 4th peer.
|
||||
let new_peer = PeerId::random();
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
assert_eq!(
|
||||
db.process_bumps(
|
||||
3,
|
||||
[(first_para_id, [(new_peer, Score::new(10).unwrap())].into_iter().collect())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(5).unwrap()),
|
||||
)
|
||||
.await
|
||||
.len(),
|
||||
11
|
||||
);
|
||||
for (i, peer_id) in peer_ids.iter().enumerate() {
|
||||
if i == 4 {
|
||||
assert_eq!(db.query(peer_id, &first_para_id).await, None);
|
||||
} else {
|
||||
assert_eq!(
|
||||
db.query(peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(15).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
assert_eq!(db.query(&new_peer, &first_para_id).await.unwrap(), Score::new(10).unwrap());
|
||||
|
||||
// Now try adding yet another peer. The decay would naturally evict the new peer so no need
|
||||
// to evict the least recently bumped.
|
||||
let yet_another_peer = PeerId::random();
|
||||
assert_eq!(
|
||||
db.process_bumps(
|
||||
4,
|
||||
[(
|
||||
first_para_id,
|
||||
[(yet_another_peer, Score::new(10).unwrap())].into_iter().collect()
|
||||
)]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(10).unwrap()),
|
||||
)
|
||||
.await
|
||||
.len(),
|
||||
11
|
||||
);
|
||||
for (i, peer_id) in peer_ids.iter().enumerate() {
|
||||
if i == 4 {
|
||||
assert_eq!(db.query(peer_id, &first_para_id).await, None);
|
||||
} else {
|
||||
assert_eq!(
|
||||
db.query(peer_id, &first_para_id).await.unwrap(),
|
||||
Score::new(5).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
assert_eq!(db.query(&new_peer, &first_para_id).await, None);
|
||||
assert_eq!(
|
||||
db.query(&yet_another_peer, &first_para_id).await,
|
||||
Some(Score::new(10).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
// Test reputation slashes.
|
||||
async fn test_slash() {
|
||||
let mut db = Db::new(10).await;
|
||||
|
||||
// Test slash on empty DB
|
||||
let peer_id = PeerId::random();
|
||||
db.slash(&peer_id, &ParaId::from(100), Score::new(50).unwrap()).await;
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(100)).await, None);
|
||||
|
||||
// Test slash on non-existent para
|
||||
let another_peer_id = PeerId::random();
|
||||
assert_eq!(
|
||||
db.process_bumps(
|
||||
1,
|
||||
[
|
||||
(ParaId::from(100), [(peer_id, Score::new(10).unwrap())].into_iter().collect()),
|
||||
(
|
||||
ParaId::from(200),
|
||||
[(another_peer_id, Score::new(12).unwrap())].into_iter().collect()
|
||||
),
|
||||
(ParaId::from(300), [(peer_id, Score::new(15).unwrap())].into_iter().collect())
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(10).unwrap()),
|
||||
)
|
||||
.await
|
||||
.len(),
|
||||
3
|
||||
);
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(100)).await.unwrap(), Score::new(10).unwrap());
|
||||
assert_eq!(
|
||||
db.query(&another_peer_id, &ParaId::from(200)).await.unwrap(),
|
||||
Score::new(12).unwrap()
|
||||
);
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(300)).await.unwrap(), Score::new(15).unwrap());
|
||||
|
||||
db.slash(&peer_id, &ParaId::from(200), Score::new(4).unwrap()).await;
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(100)).await.unwrap(), Score::new(10).unwrap());
|
||||
assert_eq!(
|
||||
db.query(&another_peer_id, &ParaId::from(200)).await.unwrap(),
|
||||
Score::new(12).unwrap()
|
||||
);
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(300)).await.unwrap(), Score::new(15).unwrap());
|
||||
|
||||
// Test regular slash
|
||||
db.slash(&peer_id, &ParaId::from(100), Score::new(4).unwrap()).await;
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(100)).await.unwrap(), Score::new(6).unwrap());
|
||||
|
||||
// Test slash which removes the entry altogether
|
||||
db.slash(&peer_id, &ParaId::from(100), Score::new(8).unwrap()).await;
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(100)).await, None);
|
||||
assert_eq!(db.len(), 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
// Test para pruning.
|
||||
async fn test_prune_paras() {
|
||||
let mut db = Db::new(10).await;
|
||||
|
||||
db.prune_paras(BTreeSet::new()).await;
|
||||
assert_eq!(db.len(), 0);
|
||||
|
||||
db.prune_paras([ParaId::from(100), ParaId::from(200)].into_iter().collect())
|
||||
.await;
|
||||
assert_eq!(db.len(), 0);
|
||||
|
||||
let peer_id = PeerId::random();
|
||||
let another_peer_id = PeerId::random();
|
||||
|
||||
assert_eq!(
|
||||
db.process_bumps(
|
||||
1,
|
||||
[
|
||||
(ParaId::from(100), [(peer_id, Score::new(10).unwrap())].into_iter().collect()),
|
||||
(
|
||||
ParaId::from(200),
|
||||
[(another_peer_id, Score::new(12).unwrap())].into_iter().collect()
|
||||
),
|
||||
(ParaId::from(300), [(peer_id, Score::new(15).unwrap())].into_iter().collect())
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
Some(Score::new(10).unwrap()),
|
||||
)
|
||||
.await
|
||||
.len(),
|
||||
3
|
||||
);
|
||||
assert_eq!(db.len(), 3);
|
||||
|
||||
// Registered paras include the existing ones. Does nothing
|
||||
db.prune_paras(
|
||||
[ParaId::from(100), ParaId::from(200), ParaId::from(300), ParaId::from(400)]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(db.len(), 3);
|
||||
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(100)).await.unwrap(), Score::new(10).unwrap());
|
||||
assert_eq!(
|
||||
db.query(&another_peer_id, &ParaId::from(200)).await.unwrap(),
|
||||
Score::new(12).unwrap()
|
||||
);
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(300)).await.unwrap(), Score::new(15).unwrap());
|
||||
|
||||
// Prunes multiple paras.
|
||||
db.prune_paras([ParaId::from(300)].into_iter().collect()).await;
|
||||
assert_eq!(db.len(), 1);
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(100)).await, None);
|
||||
assert_eq!(db.query(&another_peer_id, &ParaId::from(200)).await, None);
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(300)).await.unwrap(), Score::new(15).unwrap());
|
||||
|
||||
// Prunes all paras.
|
||||
db.prune_paras(BTreeSet::new()).await;
|
||||
assert_eq!(db.len(), 0);
|
||||
assert_eq!(db.query(&peer_id, &ParaId::from(300)).await, None);
|
||||
}
|
||||
}
|
||||
+518
@@ -0,0 +1,518 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
mod backend;
|
||||
mod connected;
|
||||
mod db;
|
||||
|
||||
use futures::channel::oneshot;
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
|
||||
use crate::{
|
||||
validator_side_experimental::{
|
||||
common::{
|
||||
PeerInfo, PeerState, Score, CONNECTED_PEERS_LIMIT, CONNECTED_PEERS_PARA_LIMIT,
|
||||
INACTIVITY_DECAY, MAX_STARTUP_ANCESTRY_LOOKBACK, MAX_STORED_SCORES_PER_PARA,
|
||||
VALID_INCLUDED_CANDIDATE_BUMP,
|
||||
},
|
||||
error::{Error, Result},
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
pub use backend::Backend;
|
||||
use connected::ConnectedPeers;
|
||||
pub use db::Db;
|
||||
use pezkuwi_node_network_protocol::{
|
||||
peer_set::{CollationVersion, PeerSet},
|
||||
PeerId,
|
||||
};
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::{ChainApiMessage, NetworkBridgeTxMessage},
|
||||
ActivatedLeaf, CollatorProtocolSenderTrait,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util::{
|
||||
request_candidate_events, request_candidates_pending_availability, runtime::recv_runtime,
|
||||
};
|
||||
use pezkuwi_primitives::{
|
||||
BlockNumber, CandidateDescriptorVersion, CandidateEvent, CandidateHash, Hash, Id as ParaId,
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct ReputationUpdate {
|
||||
pub peer_id: PeerId,
|
||||
pub para_id: ParaId,
|
||||
pub value: Score,
|
||||
pub kind: ReputationUpdateKind,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum ReputationUpdateKind {
|
||||
Bump,
|
||||
Slash,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum TryAcceptOutcome {
|
||||
Added,
|
||||
// This can hold more than one `PeerId` because before receiving the `Declare` message,
|
||||
// one peer can hold connection slots for multiple paraids.
|
||||
// The set can also be empty if this peer replaced some other peer's slot but that other peer
|
||||
// maintained a connection slot for another para (therefore not disconnected).
|
||||
// The number of peers in the set is bound to the number of scheduled paras.
|
||||
Replaced(HashSet<PeerId>),
|
||||
Rejected,
|
||||
}
|
||||
|
||||
impl TryAcceptOutcome {
|
||||
fn combine(self, other: Self) -> Self {
|
||||
use TryAcceptOutcome::*;
|
||||
match (self, other) {
|
||||
(Added, Added) => Added,
|
||||
(Rejected, Rejected) => Rejected,
|
||||
(Added, Rejected) | (Rejected, Added) => Added,
|
||||
(Replaced(mut replaced_a), Replaced(replaced_b)) => {
|
||||
replaced_a.extend(replaced_b);
|
||||
Replaced(replaced_a)
|
||||
},
|
||||
(_, Replaced(replaced)) | (Replaced(replaced), _) => Replaced(replaced),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum DeclarationOutcome {
|
||||
Rejected,
|
||||
Switched(ParaId),
|
||||
Accepted,
|
||||
}
|
||||
|
||||
pub struct PeerManager<B> {
|
||||
db: B,
|
||||
connected: ConnectedPeers,
|
||||
}
|
||||
|
||||
impl<B: Backend> PeerManager<B> {
|
||||
/// Initialize the peer manager (called on subsystem startup, after the node finished syncing to
|
||||
/// the tip of the chain).
|
||||
pub async fn startup<Sender: CollatorProtocolSenderTrait>(
|
||||
backend: B,
|
||||
sender: &mut Sender,
|
||||
scheduled_paras: BTreeSet<ParaId>,
|
||||
) -> Result<Self> {
|
||||
let mut instance = Self {
|
||||
db: backend,
|
||||
connected: ConnectedPeers::new(
|
||||
scheduled_paras,
|
||||
CONNECTED_PEERS_LIMIT,
|
||||
CONNECTED_PEERS_PARA_LIMIT,
|
||||
),
|
||||
};
|
||||
|
||||
let (latest_finalized_block_number, latest_finalized_block_hash) =
|
||||
get_latest_finalized_block(sender).await?;
|
||||
|
||||
let processed_finalized_block_number =
|
||||
instance.db.processed_finalized_block_number().await.unwrap_or_default();
|
||||
|
||||
let bumps = extract_reputation_bumps_on_new_finalized_block(
|
||||
sender,
|
||||
processed_finalized_block_number,
|
||||
(latest_finalized_block_number, latest_finalized_block_hash),
|
||||
)
|
||||
.await?;
|
||||
|
||||
instance.db.process_bumps(latest_finalized_block_number, bumps, None).await;
|
||||
|
||||
Ok(instance)
|
||||
}
|
||||
|
||||
/// Handle a new block finality notification, by updating peer reputations.
|
||||
pub async fn update_reputations_on_new_finalized_block<Sender: CollatorProtocolSenderTrait>(
|
||||
&mut self,
|
||||
sender: &mut Sender,
|
||||
(finalized_block_hash, finalized_block_number): (Hash, BlockNumber),
|
||||
) -> Result<()> {
|
||||
let processed_finalized_block_number =
|
||||
self.db.processed_finalized_block_number().await.unwrap_or_default();
|
||||
|
||||
let bumps = extract_reputation_bumps_on_new_finalized_block(
|
||||
sender,
|
||||
processed_finalized_block_number,
|
||||
(finalized_block_number, finalized_block_hash),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let updates = self
|
||||
.db
|
||||
.process_bumps(
|
||||
finalized_block_number,
|
||||
bumps,
|
||||
Some(Score::new(INACTIVITY_DECAY).expect("INACTIVITY_DECAY is a valid score")),
|
||||
)
|
||||
.await;
|
||||
for update in updates {
|
||||
self.connected.update_reputation(update);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Process the registered paras and cleanup all data pertaining to any unregistered paras, if
|
||||
/// any. Should be called every N finalized block notifications, since it's expected that para
|
||||
/// deregistrations are rare.
|
||||
pub async fn registered_paras_update(&mut self, registered_paras: BTreeSet<ParaId>) {
|
||||
// Tell the DB to cleanup paras that are no longer registered. No need to clean up the
|
||||
// connected peers state, since it will get automatically cleaned up as the claim queue
|
||||
// gets rid of these stale assignments.
|
||||
self.db.prune_paras(registered_paras).await;
|
||||
}
|
||||
|
||||
/// Process a potential change of the scheduled paras.
|
||||
pub async fn scheduled_paras_update<Sender: CollatorProtocolSenderTrait>(
|
||||
&mut self,
|
||||
sender: &mut Sender,
|
||||
scheduled_paras: BTreeSet<ParaId>,
|
||||
) {
|
||||
let mut prev_scheduled_paras: BTreeSet<_> =
|
||||
self.connected.scheduled_paras().copied().collect();
|
||||
|
||||
if prev_scheduled_paras == scheduled_paras {
|
||||
// Nothing to do if the scheduled paras didn't change.
|
||||
return;
|
||||
}
|
||||
|
||||
// Recreate the connected peers based on the new schedule and try populating it again based
|
||||
// on their reputations. Disconnect any peers that couldn't be kept
|
||||
let mut new_instance =
|
||||
ConnectedPeers::new(scheduled_paras, CONNECTED_PEERS_LIMIT, CONNECTED_PEERS_PARA_LIMIT);
|
||||
|
||||
std::mem::swap(&mut new_instance, &mut self.connected);
|
||||
let prev_instance = new_instance;
|
||||
let (prev_peers, cached_scores) = prev_instance.consume();
|
||||
|
||||
// Build a closure that can be used to first query the in-memory past reputations of the
|
||||
// peers before reaching for the DB.
|
||||
|
||||
// Borrow these for use in the closure.
|
||||
let cached_scores = &cached_scores;
|
||||
let db = &self.db;
|
||||
let reputation_query_fn = |peer_id: PeerId, para_id: ParaId| async move {
|
||||
if let Some(cached_score) =
|
||||
cached_scores.get(¶_id).and_then(|per_para| per_para.get_score(&peer_id))
|
||||
{
|
||||
cached_score
|
||||
} else {
|
||||
db.query(&peer_id, ¶_id).await.unwrap_or_default()
|
||||
}
|
||||
};
|
||||
|
||||
// See which of the old peers we should keep.
|
||||
let mut peers_to_disconnect = HashSet::new();
|
||||
for (peer_id, peer_info) in prev_peers {
|
||||
let outcome = self.connected.try_accept(reputation_query_fn, peer_id, peer_info).await;
|
||||
|
||||
match outcome {
|
||||
TryAcceptOutcome::Rejected => {
|
||||
peers_to_disconnect.insert(peer_id);
|
||||
},
|
||||
TryAcceptOutcome::Replaced(replaced_peer_ids) => {
|
||||
peers_to_disconnect.extend(replaced_peer_ids);
|
||||
},
|
||||
TryAcceptOutcome::Added => {},
|
||||
}
|
||||
}
|
||||
|
||||
// Disconnect peers that couldn't be kept.
|
||||
self.disconnect_peers(sender, peers_to_disconnect).await;
|
||||
}
|
||||
|
||||
/// Process a declaration message of a peer.
|
||||
pub async fn declared<Sender: CollatorProtocolSenderTrait>(
|
||||
&mut self,
|
||||
sender: &mut Sender,
|
||||
peer_id: PeerId,
|
||||
para_id: ParaId,
|
||||
) {
|
||||
let Some(peer_info) = self.connected.peer_info(&peer_id).cloned() else { return };
|
||||
let outcome = self.connected.declared(peer_id, para_id);
|
||||
|
||||
match outcome {
|
||||
DeclarationOutcome::Accepted => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?para_id,
|
||||
?peer_id,
|
||||
"Peer declared",
|
||||
);
|
||||
},
|
||||
DeclarationOutcome::Switched(old_para_id) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?para_id,
|
||||
?old_para_id,
|
||||
?peer_id,
|
||||
"Peer switched collating paraid. Trying to accept it on the new one.",
|
||||
);
|
||||
|
||||
self.try_accept_connection(sender, peer_id, peer_info).await;
|
||||
},
|
||||
DeclarationOutcome::Rejected => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?para_id,
|
||||
?peer_id,
|
||||
"Peer declared but rejected. Going to disconnect.",
|
||||
);
|
||||
|
||||
self.disconnect_peers(sender, [peer_id].into_iter().collect()).await;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Slash a peer's reputation for this paraid.
|
||||
pub async fn slash_reputation(&mut self, peer_id: &PeerId, para_id: &ParaId, value: Score) {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer_id,
|
||||
?para_id,
|
||||
?value,
|
||||
"Slashing peer's reputation",
|
||||
);
|
||||
|
||||
self.db.slash(peer_id, para_id, value).await;
|
||||
self.connected.update_reputation(ReputationUpdate {
|
||||
peer_id: *peer_id,
|
||||
para_id: *para_id,
|
||||
value,
|
||||
kind: ReputationUpdateKind::Slash,
|
||||
});
|
||||
}
|
||||
|
||||
/// Process a peer disconnected event coming from the network.
|
||||
pub fn disconnected(&mut self, peer_id: &PeerId) {
|
||||
self.connected.remove(peer_id);
|
||||
}
|
||||
|
||||
/// A connection was made, triage it. Return whether or not is was kept.
|
||||
pub async fn try_accept_connection<Sender: CollatorProtocolSenderTrait>(
|
||||
&mut self,
|
||||
sender: &mut Sender,
|
||||
peer_id: PeerId,
|
||||
peer_info: PeerInfo,
|
||||
) -> bool {
|
||||
let db = &self.db;
|
||||
let reputation_query_fn = |peer_id: PeerId, para_id: ParaId| async move {
|
||||
// Go straight to the DB. We only store in-memory the reputations of connected peers.
|
||||
db.query(&peer_id, ¶_id).await.unwrap_or_default()
|
||||
};
|
||||
|
||||
let outcome = self.connected.try_accept(reputation_query_fn, peer_id, peer_info).await;
|
||||
|
||||
match outcome {
|
||||
TryAcceptOutcome::Added => true,
|
||||
TryAcceptOutcome::Replaced(other_peers) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
"Peer {:?} replaced the connection slots of other peers: {:?}",
|
||||
peer_id,
|
||||
&other_peers
|
||||
);
|
||||
self.disconnect_peers(sender, other_peers).await;
|
||||
true
|
||||
},
|
||||
TryAcceptOutcome::Rejected => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer_id,
|
||||
"Peer connection was rejected",
|
||||
);
|
||||
self.disconnect_peers(sender, [peer_id].into_iter().collect()).await;
|
||||
false
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve the score of the connected peer. We assume the peer is declared for this paraid.
|
||||
pub fn connected_peer_score(&self, peer_id: &PeerId, para_id: &ParaId) -> Option<Score> {
|
||||
self.connected.peer_score(peer_id, para_id)
|
||||
}
|
||||
|
||||
async fn disconnect_peers<Sender: CollatorProtocolSenderTrait>(
|
||||
&self,
|
||||
sender: &mut Sender,
|
||||
peers: HashSet<PeerId>,
|
||||
) {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peers,
|
||||
"Disconnecting peers",
|
||||
);
|
||||
|
||||
sender
|
||||
.send_message(NetworkBridgeTxMessage::DisconnectPeers(
|
||||
peers.into_iter().collect(),
|
||||
PeerSet::Collation,
|
||||
))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_ancestors<Sender: CollatorProtocolSenderTrait>(
|
||||
sender: &mut Sender,
|
||||
k: usize,
|
||||
hash: Hash,
|
||||
) -> Result<Vec<Hash>> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender
|
||||
.send_message(ChainApiMessage::Ancestors { hash, k, response_channel: tx })
|
||||
.await;
|
||||
|
||||
Ok(rx.await.map_err(|_| Error::CanceledAncestors)??)
|
||||
}
|
||||
|
||||
async fn get_latest_finalized_block<Sender: CollatorProtocolSenderTrait>(
|
||||
sender: &mut Sender,
|
||||
) -> Result<(BlockNumber, Hash)> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender.send_message(ChainApiMessage::FinalizedBlockNumber(tx)).await;
|
||||
|
||||
let block_number = rx.await.map_err(|_| Error::CanceledFinalizedBlockNumber)??;
|
||||
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender.send_message(ChainApiMessage::FinalizedBlockHash(block_number, tx)).await;
|
||||
|
||||
let block_hash = rx
|
||||
.await
|
||||
.map_err(|_| Error::CanceledFinalizedBlockHash)??
|
||||
.ok_or_else(|| Error::FinalizedBlockNotFound(block_number))?;
|
||||
|
||||
Ok((block_number, block_hash))
|
||||
}
|
||||
|
||||
async fn extract_reputation_bumps_on_new_finalized_block<Sender: CollatorProtocolSenderTrait>(
|
||||
sender: &mut Sender,
|
||||
processed_finalized_block_number: BlockNumber,
|
||||
(latest_finalized_block_number, latest_finalized_block_hash): (BlockNumber, Hash),
|
||||
) -> Result<BTreeMap<ParaId, HashMap<PeerId, Score>>> {
|
||||
if latest_finalized_block_number < processed_finalized_block_number {
|
||||
// Shouldn't be possible, but in this case there is no other initialisation needed.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
latest_finalized_block_number,
|
||||
?latest_finalized_block_hash,
|
||||
"Peer manager stored finalized block number {} is higher than the latest finalized block.",
|
||||
processed_finalized_block_number,
|
||||
);
|
||||
return Ok(BTreeMap::new());
|
||||
}
|
||||
|
||||
let ancestry_len = std::cmp::min(
|
||||
latest_finalized_block_number.saturating_sub(processed_finalized_block_number),
|
||||
MAX_STARTUP_ANCESTRY_LOOKBACK,
|
||||
);
|
||||
|
||||
if ancestry_len == 0 {
|
||||
return Ok(BTreeMap::new());
|
||||
}
|
||||
|
||||
let mut ancestors =
|
||||
get_ancestors(sender, ancestry_len as usize, latest_finalized_block_hash).await?;
|
||||
ancestors.push(latest_finalized_block_hash);
|
||||
ancestors.reverse();
|
||||
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?latest_finalized_block_hash,
|
||||
processed_finalized_block_number,
|
||||
"Processing reputation bumps for finalized relay parent {} and its {} ancestors",
|
||||
latest_finalized_block_number,
|
||||
ancestry_len
|
||||
);
|
||||
|
||||
let mut v2_candidates_per_rp: HashMap<Hash, BTreeMap<ParaId, HashSet<CandidateHash>>> =
|
||||
HashMap::with_capacity(ancestors.len());
|
||||
|
||||
for i in 1..ancestors.len() {
|
||||
let rp = ancestors[i];
|
||||
let parent_rp = ancestors[i - 1];
|
||||
let candidate_events = recv_runtime(request_candidate_events(rp, sender).await).await?;
|
||||
|
||||
for event in candidate_events {
|
||||
if let CandidateEvent::CandidateIncluded(receipt, _, _, _) = event {
|
||||
// Only v2 receipts can contain UMP signals.
|
||||
if receipt.descriptor.version() == CandidateDescriptorVersion::V2 {
|
||||
v2_candidates_per_rp
|
||||
.entry(parent_rp)
|
||||
.or_default()
|
||||
.entry(receipt.descriptor.para_id())
|
||||
.or_default()
|
||||
.insert(receipt.hash());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This could be removed if we implemented https://github.com/pezkuwichain/pezkuwi-sdk/issues/152.
|
||||
let mut updates: BTreeMap<ParaId, HashMap<PeerId, Score>> = BTreeMap::new();
|
||||
for (rp, per_para) in v2_candidates_per_rp {
|
||||
for (para_id, included_candidates) in per_para {
|
||||
let candidates_pending_availability =
|
||||
recv_runtime(request_candidates_pending_availability(rp, para_id, sender).await)
|
||||
.await?;
|
||||
|
||||
for candidate in candidates_pending_availability {
|
||||
let candidate_hash = candidate.hash();
|
||||
if included_candidates.contains(&candidate_hash) {
|
||||
match candidate.commitments.ump_signals() {
|
||||
Ok(ump_signals) => {
|
||||
if let Some(approved_peer) = ump_signals.approved_peer() {
|
||||
match PeerId::from_bytes(approved_peer) {
|
||||
Ok(peer_id) => updates
|
||||
.entry(para_id)
|
||||
.or_default()
|
||||
.entry(peer_id)
|
||||
.or_default()
|
||||
.saturating_add(VALID_INCLUDED_CANDIDATE_BUMP),
|
||||
Err(err) => {
|
||||
// Collator sent an invalid peerid. It's only harming
|
||||
// itself.
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?candidate_hash,
|
||||
"UMP signal contains invalid ApprovedPeer id: {}",
|
||||
err
|
||||
);
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
// This should never happen, as the ump signals are checked during
|
||||
// on-chain backing.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?candidate_hash,
|
||||
"Failed to parse UMP signals for included candidate: {}",
|
||||
err
|
||||
);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(updates)
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::validator_side_experimental::{peer_manager::Backend, Metrics, PeerManager};
|
||||
use sp_keystore::KeystorePtr;
|
||||
|
||||
/// All state relevant for the validator side of the protocol lives here.
|
||||
pub struct State<B> {
|
||||
peer_manager: PeerManager<B>,
|
||||
keystore: KeystorePtr,
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
impl<B: Backend> State<B> {
|
||||
/// Instantiate a new subsystem `State`.
|
||||
pub fn new(peer_manager: PeerManager<B>, keystore: KeystorePtr, metrics: Metrics) -> Self {
|
||||
Self { peer_manager, keystore, metrics }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
[package]
|
||||
name = "pezkuwi-dispute-distribution"
|
||||
version = "7.0.0"
|
||||
description = "Pezkuwi Dispute Distribution subsystem, which ensures all concerned validators are aware of a dispute and have the relevant votes."
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
codec = { features = ["std"], workspace = true, default-features = true }
|
||||
fatality = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
futures-timer = { workspace = true }
|
||||
gum = { workspace = true, default-features = true }
|
||||
indexmap = { workspace = true }
|
||||
pezkuwi-node-network-protocol = { workspace = true, default-features = true }
|
||||
pezkuwi-node-primitives = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem-util = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives = { workspace = true, default-features = true }
|
||||
sc-network = { workspace = true, default-features = true }
|
||||
sp-application-crypto = { workspace = true, default-features = true }
|
||||
sp-keystore = { workspace = true, default-features = true }
|
||||
thiserror = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = { workspace = true }
|
||||
async-channel = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
pezkuwi-node-subsystem-test-helpers = { workspace = true }
|
||||
pezkuwi-primitives-test-helpers = { workspace = true }
|
||||
sc-keystore = { workspace = true, default-features = true }
|
||||
sp-keyring = { workspace = true, default-features = true }
|
||||
sp-tracing = { workspace = true, default-features = true }
|
||||
|
||||
[features]
|
||||
runtime-benchmarks = [
|
||||
"gum/runtime-benchmarks",
|
||||
"pezkuwi-node-network-protocol/runtime-benchmarks",
|
||||
"pezkuwi-node-primitives/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-util/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem/runtime-benchmarks",
|
||||
"pezkuwi-primitives-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-primitives/runtime-benchmarks",
|
||||
"sc-network/runtime-benchmarks",
|
||||
"sp-keyring/runtime-benchmarks",
|
||||
]
|
||||
@@ -0,0 +1,72 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
//! Error handling related code and Error/Result definitions.
|
||||
|
||||
use pezkuwi_node_subsystem::SubsystemError;
|
||||
use pezkuwi_node_subsystem_util::runtime;
|
||||
|
||||
use crate::{sender, LOG_TARGET};
|
||||
|
||||
use fatality::Nested;
|
||||
|
||||
#[allow(missing_docs)]
|
||||
#[fatality::fatality(splitable)]
|
||||
pub enum Error {
|
||||
/// Receiving subsystem message from overseer failed.
|
||||
#[fatal]
|
||||
#[error("Receiving message from overseer failed")]
|
||||
SubsystemReceive(#[source] SubsystemError),
|
||||
|
||||
/// Spawning a running task failed.
|
||||
#[fatal]
|
||||
#[error("Spawning subsystem task failed")]
|
||||
SpawnTask(#[source] SubsystemError),
|
||||
|
||||
/// `DisputeSender` mpsc receiver exhausted.
|
||||
#[fatal]
|
||||
#[error("Erasure chunk requester stream exhausted")]
|
||||
SenderExhausted,
|
||||
|
||||
/// Errors coming from `runtime::Runtime`.
|
||||
#[fatal(forward)]
|
||||
#[error("Error while accessing runtime information")]
|
||||
Runtime(#[from] runtime::Error),
|
||||
|
||||
/// Errors coming from `DisputeSender`
|
||||
#[fatal(forward)]
|
||||
#[error("Error while accessing runtime information")]
|
||||
Sender(#[from] sender::Error),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
pub type FatalResult<T> = std::result::Result<T, FatalError>;
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error. This utility function is meant to
|
||||
/// consume top-level errors by simply logging them
|
||||
pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), FatalError> {
|
||||
match result.into_nested()? {
|
||||
Err(jfyi) => {
|
||||
gum::warn!(target: LOG_TARGET, error = ?jfyi, ctx);
|
||||
Ok(())
|
||||
},
|
||||
Ok(()) => Ok(()),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,297 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! # Sending and receiving of `DisputeRequest`s.
|
||||
//!
|
||||
//! This subsystem essentially consists of two parts:
|
||||
//!
|
||||
//! - a sender
|
||||
//! - and a receiver
|
||||
//!
|
||||
//! The sender is responsible for getting our vote out, see `sender`. The receiver handles
|
||||
//! incoming [`DisputeRequest`](v1::DisputeRequest)s and offers spam protection, see `receiver`.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::{channel::mpsc, FutureExt, StreamExt, TryFutureExt};
|
||||
|
||||
use pezkuwi_node_network_protocol::authority_discovery::AuthorityDiscovery;
|
||||
use pezkuwi_node_subsystem_util::nesting_sender::NestingSender;
|
||||
use sp_keystore::KeystorePtr;
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::{incoming::IncomingRequestReceiver, v1};
|
||||
use pezkuwi_node_primitives::DISPUTE_WINDOW;
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::DisputeDistributionMessage, overseer, FromOrchestra, OverseerSignal,
|
||||
SpawnedSubsystem, SubsystemError,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util::{runtime, runtime::RuntimeInfo};
|
||||
|
||||
/// ## The sender [`DisputeSender`]
|
||||
///
|
||||
/// The sender (`DisputeSender`) keeps track of live disputes and makes sure our vote gets out for
|
||||
/// each one of those. The sender is responsible for sending our vote to each validator
|
||||
/// participating in the dispute and to each authority currently authoring blocks. The sending can
|
||||
/// be initiated by sending `DisputeDistributionMessage::SendDispute` message to this subsystem.
|
||||
///
|
||||
/// In addition the `DisputeSender` will query the coordinator for active disputes on each
|
||||
/// [`DisputeSender::update_leaves`] call and will initiate sending (start a `SendTask`) for every,
|
||||
/// to this subsystem, unknown dispute. This is to make sure, we get our vote out, even on
|
||||
/// restarts.
|
||||
///
|
||||
/// The actual work of sending and keeping track of transmission attempts to each validator for a
|
||||
/// particular dispute are done by [`SendTask`]. The purpose of the `DisputeSender` is to keep
|
||||
/// track of all ongoing disputes and start and clean up `SendTask`s accordingly.
|
||||
mod sender;
|
||||
use self::sender::{DisputeSender, DisputeSenderMessage};
|
||||
|
||||
/// ## The receiver [`DisputesReceiver`]
|
||||
///
|
||||
/// The receiving side is implemented as `DisputesReceiver` and is run as a separate long running
|
||||
/// task within this subsystem ([`DisputesReceiver::run`]).
|
||||
///
|
||||
/// Conceptually all the receiver has to do, is waiting for incoming requests which are passed in
|
||||
/// via a dedicated channel and forwarding them to the dispute coordinator via
|
||||
/// `DisputeCoordinatorMessage::ImportStatements`. Being the interface to the network and untrusted
|
||||
/// nodes, the reality is not that simple of course. Before importing statements the receiver will
|
||||
/// batch up imports as well as possible for efficient imports while maintaining timely dispute
|
||||
/// resolution and handling of spamming validators:
|
||||
///
|
||||
/// - Drop all messages from non validator nodes, for this it requires the [`AuthorityDiscovery`]
|
||||
/// service.
|
||||
/// - Drop messages from a node, if it sends at a too high rate.
|
||||
/// - Filter out duplicate messages (over some period of time).
|
||||
/// - Drop any obviously invalid votes (invalid signatures for example).
|
||||
/// - Ban peers whose votes were deemed invalid.
|
||||
///
|
||||
/// In general dispute-distribution works on limiting the work the dispute-coordinator will have to
|
||||
/// do, while at the same time making it aware of new disputes as fast as possible.
|
||||
///
|
||||
/// For successfully imported votes, we will confirm the receipt of the message back to the sender.
|
||||
/// This way a received confirmation guarantees, that the vote has been stored to disk by the
|
||||
/// receiver.
|
||||
mod receiver;
|
||||
use self::receiver::DisputesReceiver;
|
||||
|
||||
/// Error and [`Result`] type for this subsystem.
|
||||
mod error;
|
||||
use error::{log_error, Error, FatalError, FatalResult, Result};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
mod metrics;
|
||||
//// Prometheus `Metrics` for dispute distribution.
|
||||
pub use metrics::Metrics;
|
||||
|
||||
const LOG_TARGET: &'static str = "teyrchain::dispute-distribution";
|
||||
|
||||
/// Rate limit on the `receiver` side.
|
||||
///
|
||||
/// If messages from one peer come in at a higher rate than every `RECEIVE_RATE_LIMIT` on average,
|
||||
/// we start dropping messages from that peer to enforce that limit.
|
||||
pub const RECEIVE_RATE_LIMIT: Duration = Duration::from_millis(100);
|
||||
|
||||
/// Rate limit on the `sender` side.
|
||||
///
|
||||
/// In order to not hit the `RECEIVE_RATE_LIMIT` on the receiving side, we limit out sending rate as
|
||||
/// well.
|
||||
///
|
||||
/// We add 50ms extra, just to have some save margin to the `RECEIVE_RATE_LIMIT`.
|
||||
pub const SEND_RATE_LIMIT: Duration = RECEIVE_RATE_LIMIT.saturating_add(Duration::from_millis(50));
|
||||
|
||||
/// The dispute distribution subsystem.
|
||||
pub struct DisputeDistributionSubsystem<AD> {
|
||||
/// Easy and efficient runtime access for this subsystem.
|
||||
runtime: RuntimeInfo,
|
||||
|
||||
/// Sender for our dispute requests.
|
||||
disputes_sender: DisputeSender<DisputeSenderMessage>,
|
||||
|
||||
/// Receive messages from `DisputeSender` background tasks.
|
||||
sender_rx: mpsc::Receiver<DisputeSenderMessage>,
|
||||
|
||||
/// Receiver for incoming requests.
|
||||
req_receiver: Option<IncomingRequestReceiver<v1::DisputeRequest>>,
|
||||
|
||||
/// Authority discovery service.
|
||||
authority_discovery: AD,
|
||||
|
||||
/// Metrics for this subsystem.
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
#[overseer::subsystem(DisputeDistribution, error = SubsystemError, prefix = self::overseer)]
|
||||
impl<Context, AD> DisputeDistributionSubsystem<AD>
|
||||
where
|
||||
<Context as overseer::DisputeDistributionContextTrait>::Sender:
|
||||
overseer::DisputeDistributionSenderTrait + Sync + Send,
|
||||
AD: AuthorityDiscovery + Clone,
|
||||
{
|
||||
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
||||
let future = self
|
||||
.run(ctx)
|
||||
.map_err(|e| SubsystemError::with_origin("dispute-distribution", e))
|
||||
.boxed();
|
||||
|
||||
SpawnedSubsystem { name: "dispute-distribution-subsystem", future }
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(DisputeDistribution, prefix = self::overseer)]
|
||||
impl<AD> DisputeDistributionSubsystem<AD>
|
||||
where
|
||||
AD: AuthorityDiscovery + Clone,
|
||||
{
|
||||
/// Create a new instance of the dispute distribution.
|
||||
pub fn new(
|
||||
keystore: KeystorePtr,
|
||||
req_receiver: IncomingRequestReceiver<v1::DisputeRequest>,
|
||||
authority_discovery: AD,
|
||||
metrics: Metrics,
|
||||
) -> Self {
|
||||
let runtime = RuntimeInfo::new_with_config(runtime::Config {
|
||||
keystore: Some(keystore),
|
||||
session_cache_lru_size: DISPUTE_WINDOW.get(),
|
||||
});
|
||||
let (tx, sender_rx) = NestingSender::new_root(1);
|
||||
let disputes_sender = DisputeSender::new(tx, metrics.clone());
|
||||
Self {
|
||||
runtime,
|
||||
disputes_sender,
|
||||
sender_rx,
|
||||
req_receiver: Some(req_receiver),
|
||||
authority_discovery,
|
||||
metrics,
|
||||
}
|
||||
}
|
||||
|
||||
/// Start processing work as passed on from the Overseer.
|
||||
async fn run<Context>(mut self, mut ctx: Context) -> std::result::Result<(), FatalError> {
|
||||
let receiver = DisputesReceiver::new(
|
||||
ctx.sender().clone(),
|
||||
self.req_receiver
|
||||
.take()
|
||||
.expect("Must be provided on `new` and we take ownership here. qed."),
|
||||
self.authority_discovery.clone(),
|
||||
self.metrics.clone(),
|
||||
);
|
||||
ctx.spawn("disputes-receiver", receiver.run().boxed())
|
||||
.map_err(FatalError::SpawnTask)?;
|
||||
|
||||
// Process messages for sending side.
|
||||
//
|
||||
// Note: We want the sender to be rate limited and we are currently taking advantage of the
|
||||
// fact that the root task of this subsystem is only concerned with sending: Functions of
|
||||
// `DisputeSender` might back pressure if the rate limit is hit, which will slow down this
|
||||
// loop. If this fact ever changes, we will likely need another task.
|
||||
loop {
|
||||
let message = MuxedMessage::receive(&mut ctx, &mut self.sender_rx).await;
|
||||
match message {
|
||||
MuxedMessage::Subsystem(result) => {
|
||||
let result = match result? {
|
||||
FromOrchestra::Signal(signal) => {
|
||||
match self.handle_signals(&mut ctx, signal).await {
|
||||
Ok(SignalResult::Conclude) => return Ok(()),
|
||||
Ok(SignalResult::Continue) => Ok(()),
|
||||
Err(f) => Err(f),
|
||||
}
|
||||
},
|
||||
FromOrchestra::Communication { msg } =>
|
||||
self.handle_subsystem_message(&mut ctx, msg).await,
|
||||
};
|
||||
log_error(result, "on FromOrchestra")?;
|
||||
},
|
||||
MuxedMessage::Sender(result) => {
|
||||
let result = self
|
||||
.disputes_sender
|
||||
.on_message(
|
||||
&mut ctx,
|
||||
&mut self.runtime,
|
||||
result.ok_or(FatalError::SenderExhausted)?,
|
||||
)
|
||||
.await
|
||||
.map_err(Error::Sender);
|
||||
log_error(result, "on_message")?;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle overseer signals.
|
||||
async fn handle_signals<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
signal: OverseerSignal,
|
||||
) -> Result<SignalResult> {
|
||||
match signal {
|
||||
OverseerSignal::Conclude => return Ok(SignalResult::Conclude),
|
||||
OverseerSignal::ActiveLeaves(update) => {
|
||||
self.disputes_sender.update_leaves(ctx, &mut self.runtime, update).await?;
|
||||
},
|
||||
OverseerSignal::BlockFinalized(_, _) => {},
|
||||
};
|
||||
Ok(SignalResult::Continue)
|
||||
}
|
||||
|
||||
/// Handle `DisputeDistributionMessage`s.
|
||||
async fn handle_subsystem_message<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
msg: DisputeDistributionMessage,
|
||||
) -> Result<()> {
|
||||
match msg {
|
||||
DisputeDistributionMessage::SendDispute(dispute_msg) =>
|
||||
self.disputes_sender.start_sender(ctx, &mut self.runtime, dispute_msg).await?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Messages to be handled in this subsystem.
|
||||
#[derive(Debug)]
|
||||
enum MuxedMessage {
|
||||
/// Messages from other subsystems.
|
||||
Subsystem(FatalResult<FromOrchestra<DisputeDistributionMessage>>),
|
||||
/// Messages from spawned sender background tasks.
|
||||
Sender(Option<DisputeSenderMessage>),
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(DisputeDistribution, prefix = self::overseer)]
|
||||
impl MuxedMessage {
|
||||
async fn receive<Context>(
|
||||
ctx: &mut Context,
|
||||
from_sender: &mut mpsc::Receiver<DisputeSenderMessage>,
|
||||
) -> Self {
|
||||
// We are only fusing here to make `select` happy, in reality we will quit if the stream
|
||||
// ends.
|
||||
let from_overseer = ctx.recv().fuse();
|
||||
futures::pin_mut!(from_overseer, from_sender);
|
||||
// We select biased to make sure we finish up loose ends, before starting new work.
|
||||
futures::select_biased!(
|
||||
msg = from_sender.next() => MuxedMessage::Sender(msg),
|
||||
msg = from_overseer => MuxedMessage::Subsystem(msg.map_err(FatalError::SubsystemReceive)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of handling signal from overseer.
|
||||
enum SignalResult {
|
||||
/// Overseer asked us to conclude.
|
||||
Conclude,
|
||||
/// We can continue processing events.
|
||||
Continue,
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_subsystem_util::{
|
||||
metrics,
|
||||
metrics::{
|
||||
prometheus,
|
||||
prometheus::{Counter, CounterVec, Opts, PrometheusError, Registry, U64},
|
||||
},
|
||||
};
|
||||
|
||||
/// Label for success counters.
|
||||
pub const SUCCEEDED: &'static str = "succeeded";
|
||||
|
||||
/// Label for fail counters.
|
||||
pub const FAILED: &'static str = "failed";
|
||||
|
||||
/// Dispute Distribution metrics.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
/// Number of sent dispute requests (succeeded and failed).
|
||||
sent_requests: CounterVec<U64>,
|
||||
|
||||
/// Number of requests received.
|
||||
///
|
||||
/// This is all requests coming in, regardless of whether they are processed or dropped.
|
||||
received_requests: Counter<U64>,
|
||||
|
||||
/// Number of requests for which `ImportStatements` returned.
|
||||
///
|
||||
/// We both have successful imports and failed imports here.
|
||||
imported_requests: CounterVec<U64>,
|
||||
|
||||
/// The duration of issued dispute request to response.
|
||||
time_dispute_request: prometheus::Histogram,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
/// Create new dummy metrics, not reporting anything.
|
||||
pub fn new_dummy() -> Self {
|
||||
Metrics(None)
|
||||
}
|
||||
|
||||
/// Increment counter on finished request sending.
|
||||
pub fn on_sent_request(&self, label: &'static str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.sent_requests.with_label_values(&[label]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment counter on served disputes.
|
||||
pub fn on_received_request(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.received_requests.inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Statements have been imported.
|
||||
pub fn on_imported(&self, label: &'static str, num_requests: usize) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.imported_requests
|
||||
.with_label_values(&[label])
|
||||
.inc_by(num_requests as u64)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a timer to time request/response duration.
|
||||
pub fn time_dispute_request(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.time_dispute_request.start_timer())
|
||||
}
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(registry: &Registry) -> Result<Self, PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
sent_requests: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_dispute_distribution_sent_requests",
|
||||
"Total number of sent requests.",
|
||||
),
|
||||
&["success"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
received_requests: prometheus::register(
|
||||
Counter::new(
|
||||
"pezkuwi_teyrchain_dispute_distribution_received_requests",
|
||||
"Total number of received dispute requests.",
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
imported_requests: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"pezkuwi_teyrchain_dispute_distribution_imported_requests",
|
||||
"Total number of imported requests.",
|
||||
),
|
||||
&["success"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
time_dispute_request: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_dispute_distribution_time_dispute_request",
|
||||
"Time needed for dispute votes to get confirmed/fail getting transmitted.",
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,209 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{collections::HashMap, time::Instant};
|
||||
|
||||
use gum::CandidateHash;
|
||||
use pezkuwi_node_network_protocol::{
|
||||
request_response::{incoming::OutgoingResponseSender, v1::DisputeRequest},
|
||||
PeerId,
|
||||
};
|
||||
use pezkuwi_node_primitives::SignedDisputeStatement;
|
||||
use pezkuwi_primitives::{CandidateReceiptV2 as CandidateReceipt, ValidatorIndex};
|
||||
|
||||
use crate::receiver::{BATCH_COLLECTING_INTERVAL, MIN_KEEP_BATCH_ALIVE_VOTES};
|
||||
|
||||
use super::MAX_BATCH_LIFETIME;
|
||||
|
||||
/// A batch of votes to be imported into the `dispute-coordinator`.
|
||||
///
|
||||
/// Vote imports are way more efficient when performed in batches, hence we batch together incoming
|
||||
/// votes until the rate of incoming votes falls below a threshold, then we import into the dispute
|
||||
/// coordinator.
|
||||
///
|
||||
/// A `Batch` keeps track of the votes to be imported and the current incoming rate, on rate update
|
||||
/// it will "flush" in case the incoming rate dropped too low, preparing the import.
|
||||
pub struct Batch {
|
||||
/// The actual candidate this batch is concerned with.
|
||||
candidate_receipt: CandidateReceipt,
|
||||
|
||||
/// Cache of `CandidateHash` (candidate_receipt.hash()).
|
||||
candidate_hash: CandidateHash,
|
||||
|
||||
/// All valid votes received in this batch so far.
|
||||
///
|
||||
/// We differentiate between valid and invalid votes, so we can detect (and drop) duplicates,
|
||||
/// while still allowing validators to equivocate.
|
||||
///
|
||||
/// Detecting and rejecting duplicates is crucial in order to effectively enforce
|
||||
/// `MIN_KEEP_BATCH_ALIVE_VOTES` per `BATCH_COLLECTING_INTERVAL`. If we would count duplicates
|
||||
/// here, the mechanism would be broken.
|
||||
valid_votes: HashMap<ValidatorIndex, SignedDisputeStatement>,
|
||||
|
||||
/// All invalid votes received in this batch so far.
|
||||
invalid_votes: HashMap<ValidatorIndex, SignedDisputeStatement>,
|
||||
|
||||
/// How many votes have been batched since the last tick/creation.
|
||||
votes_batched_since_last_tick: u32,
|
||||
|
||||
/// Expiry time for the batch.
|
||||
///
|
||||
/// By this time the latest this batch will get flushed.
|
||||
best_before: Instant,
|
||||
|
||||
/// Requesters waiting for a response.
|
||||
requesters: Vec<(PeerId, OutgoingResponseSender<DisputeRequest>)>,
|
||||
}
|
||||
|
||||
/// Result of checking a batch every `BATCH_COLLECTING_INTERVAL`.
|
||||
pub(super) enum TickResult {
|
||||
/// Batch is still alive, please call `tick` again at the given `Instant`.
|
||||
Alive(Batch, Instant),
|
||||
/// Batch is done, ready for import!
|
||||
Done(PreparedImport),
|
||||
}
|
||||
|
||||
/// Ready for import.
|
||||
pub struct PreparedImport {
|
||||
pub candidate_receipt: CandidateReceipt,
|
||||
pub statements: Vec<(SignedDisputeStatement, ValidatorIndex)>,
|
||||
/// Information about original requesters.
|
||||
pub requesters: Vec<(PeerId, OutgoingResponseSender<DisputeRequest>)>,
|
||||
}
|
||||
|
||||
impl From<Batch> for PreparedImport {
|
||||
fn from(batch: Batch) -> Self {
|
||||
let Batch {
|
||||
candidate_receipt,
|
||||
valid_votes,
|
||||
invalid_votes,
|
||||
requesters: pending_responses,
|
||||
..
|
||||
} = batch;
|
||||
|
||||
let statements = valid_votes
|
||||
.into_iter()
|
||||
.chain(invalid_votes.into_iter())
|
||||
.map(|(index, statement)| (statement, index))
|
||||
.collect();
|
||||
|
||||
Self { candidate_receipt, statements, requesters: pending_responses }
|
||||
}
|
||||
}
|
||||
|
||||
impl Batch {
|
||||
/// Create a new empty batch based on the given `CandidateReceipt`.
|
||||
///
|
||||
/// To create a `Batch` use Batches::find_batch`.
|
||||
///
|
||||
/// Arguments:
|
||||
///
|
||||
/// * `candidate_receipt` - The candidate this batch is meant to track votes for.
|
||||
/// * `now` - current time stamp for calculating the first tick.
|
||||
///
|
||||
/// Returns: A batch and the first `Instant` you are supposed to call `tick`.
|
||||
pub(super) fn new(candidate_receipt: CandidateReceipt, now: Instant) -> (Self, Instant) {
|
||||
let s = Self {
|
||||
candidate_hash: candidate_receipt.hash(),
|
||||
candidate_receipt,
|
||||
valid_votes: HashMap::new(),
|
||||
invalid_votes: HashMap::new(),
|
||||
votes_batched_since_last_tick: 0,
|
||||
best_before: Instant::now() + MAX_BATCH_LIFETIME,
|
||||
requesters: Vec::new(),
|
||||
};
|
||||
let next_tick = s.calculate_next_tick(now);
|
||||
(s, next_tick)
|
||||
}
|
||||
|
||||
/// Receipt of the candidate this batch is batching votes for.
|
||||
pub fn candidate_receipt(&self) -> &CandidateReceipt {
|
||||
&self.candidate_receipt
|
||||
}
|
||||
|
||||
/// Add votes from a validator into the batch.
|
||||
///
|
||||
/// The statements are supposed to be the valid and invalid statements received in a
|
||||
/// `DisputeRequest`.
|
||||
///
|
||||
/// The given `pending_response` is the corresponding response sender for responding to `peer`.
|
||||
/// If at least one of the votes is new as far as this batch is concerned we record the
|
||||
/// pending_response, for later use. In case both votes are known already, we return the
|
||||
/// response sender as an `Err` value.
|
||||
pub fn add_votes(
|
||||
&mut self,
|
||||
valid_vote: (SignedDisputeStatement, ValidatorIndex),
|
||||
invalid_vote: (SignedDisputeStatement, ValidatorIndex),
|
||||
peer: PeerId,
|
||||
pending_response: OutgoingResponseSender<DisputeRequest>,
|
||||
) -> Result<(), OutgoingResponseSender<DisputeRequest>> {
|
||||
debug_assert!(valid_vote.0.candidate_hash() == invalid_vote.0.candidate_hash());
|
||||
debug_assert!(valid_vote.0.candidate_hash() == &self.candidate_hash);
|
||||
|
||||
let mut duplicate = true;
|
||||
|
||||
if self.valid_votes.insert(valid_vote.1, valid_vote.0).is_none() {
|
||||
self.votes_batched_since_last_tick += 1;
|
||||
duplicate = false;
|
||||
}
|
||||
if self.invalid_votes.insert(invalid_vote.1, invalid_vote.0).is_none() {
|
||||
self.votes_batched_since_last_tick += 1;
|
||||
duplicate = false;
|
||||
}
|
||||
|
||||
if duplicate {
|
||||
Err(pending_response)
|
||||
} else {
|
||||
self.requesters.push((peer, pending_response));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Check batch for liveness.
|
||||
///
|
||||
/// This function is supposed to be called at instants given at construction and as returned as
|
||||
/// part of `TickResult`.
|
||||
pub(super) fn tick(mut self, now: Instant) -> TickResult {
|
||||
if self.votes_batched_since_last_tick >= MIN_KEEP_BATCH_ALIVE_VOTES &&
|
||||
now < self.best_before
|
||||
{
|
||||
// Still good:
|
||||
let next_tick = self.calculate_next_tick(now);
|
||||
// Reset counter:
|
||||
self.votes_batched_since_last_tick = 0;
|
||||
TickResult::Alive(self, next_tick)
|
||||
} else {
|
||||
TickResult::Done(PreparedImport::from(self))
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate when the next tick should happen.
|
||||
///
|
||||
/// This will usually return `now + BATCH_COLLECTING_INTERVAL`, except if the lifetime of this
|
||||
/// batch would exceed `MAX_BATCH_LIFETIME`.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `now` - The current time.
|
||||
fn calculate_next_tick(&self, now: Instant) -> Instant {
|
||||
let next_tick = now + BATCH_COLLECTING_INTERVAL;
|
||||
if next_tick < self.best_before {
|
||||
next_tick
|
||||
} else {
|
||||
self.best_before
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{
|
||||
collections::{hash_map, HashMap},
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use futures::future::pending;
|
||||
|
||||
use pezkuwi_node_network_protocol::request_response::DISPUTE_REQUEST_TIMEOUT;
|
||||
use pezkuwi_primitives::{CandidateHash, CandidateReceiptV2 as CandidateReceipt};
|
||||
|
||||
use crate::{
|
||||
receiver::batches::{batch::TickResult, waiting_queue::PendingWake},
|
||||
LOG_TARGET,
|
||||
};
|
||||
|
||||
pub use self::batch::{Batch, PreparedImport};
|
||||
use self::waiting_queue::WaitingQueue;
|
||||
|
||||
use super::{
|
||||
error::{JfyiError, JfyiResult},
|
||||
BATCH_COLLECTING_INTERVAL,
|
||||
};
|
||||
|
||||
/// A single batch (per candidate) as managed by `Batches`.
|
||||
mod batch;
|
||||
|
||||
/// Queue events in time and wait for them to become ready.
|
||||
mod waiting_queue;
|
||||
|
||||
/// Safe-guard in case votes trickle in real slow.
|
||||
///
|
||||
/// If the batch life time exceeded the time the sender is willing to wait for a confirmation, we
|
||||
/// would trigger pointless re-sends.
|
||||
const MAX_BATCH_LIFETIME: Duration = DISPUTE_REQUEST_TIMEOUT.saturating_sub(Duration::from_secs(2));
|
||||
|
||||
/// Limit the number of batches that can be alive at any given time.
|
||||
///
|
||||
/// Reasoning for this number, see guide.
|
||||
pub const MAX_BATCHES: usize = 1000;
|
||||
|
||||
/// Manage batches.
|
||||
///
|
||||
/// - Batches can be found via `find_batch()` in order to add votes to them/check they exist.
|
||||
/// - Batches can be checked for being ready for flushing in order to import contained votes.
|
||||
pub struct Batches {
|
||||
/// The batches we manage.
|
||||
///
|
||||
/// Kept invariants:
|
||||
/// For each entry in `batches`, there exists an entry in `waiting_queue` as well - we wait on
|
||||
/// all batches!
|
||||
batches: HashMap<CandidateHash, Batch>,
|
||||
/// Waiting queue for waiting for batches to become ready for `tick`.
|
||||
///
|
||||
/// Kept invariants by `Batches`:
|
||||
/// For each entry in the `waiting_queue` there exists a corresponding entry in `batches`.
|
||||
waiting_queue: WaitingQueue<CandidateHash>,
|
||||
}
|
||||
|
||||
/// A found batch is either really found or got created so it can be found.
|
||||
pub enum FoundBatch<'a> {
|
||||
/// Batch just got created.
|
||||
Created(&'a mut Batch),
|
||||
/// Batch already existed.
|
||||
Found(&'a mut Batch),
|
||||
}
|
||||
|
||||
impl Batches {
|
||||
/// Create new empty `Batches`.
|
||||
pub fn new() -> Self {
|
||||
debug_assert!(
|
||||
MAX_BATCH_LIFETIME > BATCH_COLLECTING_INTERVAL,
|
||||
"Unexpectedly low `MAX_BATCH_LIFETIME`, please check parameters."
|
||||
);
|
||||
Self { batches: HashMap::new(), waiting_queue: WaitingQueue::new() }
|
||||
}
|
||||
|
||||
/// Find a particular batch.
|
||||
///
|
||||
/// That is either find it, or we create it as reflected by the result `FoundBatch`.
|
||||
pub fn find_batch(
|
||||
&mut self,
|
||||
candidate_hash: CandidateHash,
|
||||
candidate_receipt: CandidateReceipt,
|
||||
) -> JfyiResult<FoundBatch<'_>> {
|
||||
if self.batches.len() >= MAX_BATCHES {
|
||||
return Err(JfyiError::MaxBatchLimitReached);
|
||||
}
|
||||
debug_assert!(candidate_hash == candidate_receipt.hash());
|
||||
let result = match self.batches.entry(candidate_hash) {
|
||||
hash_map::Entry::Vacant(vacant) => {
|
||||
let now = Instant::now();
|
||||
let (created, ready_at) = Batch::new(candidate_receipt, now);
|
||||
let pending_wake = PendingWake { payload: candidate_hash, ready_at };
|
||||
self.waiting_queue.push(pending_wake);
|
||||
FoundBatch::Created(vacant.insert(created))
|
||||
},
|
||||
hash_map::Entry::Occupied(occupied) => FoundBatch::Found(occupied.into_mut()),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Wait for the next `tick` to check for ready batches.
|
||||
///
|
||||
/// This function blocks (returns `Poll::Pending`) until at least one batch can be
|
||||
/// checked for readiness meaning that `BATCH_COLLECTING_INTERVAL` has passed since the last
|
||||
/// check for that batch or it reached end of life.
|
||||
///
|
||||
/// If this `Batches` instance is empty (does not actually contain any batches), then this
|
||||
/// function will always return `Poll::Pending`.
|
||||
///
|
||||
/// Returns: A `Vec` of all `PreparedImport`s from batches that became ready.
|
||||
pub async fn check_batches(&mut self) -> Vec<PreparedImport> {
|
||||
let now = Instant::now();
|
||||
|
||||
let mut imports = Vec::new();
|
||||
|
||||
// Wait for at least one batch to become ready:
|
||||
self.waiting_queue.wait_ready(now).await;
|
||||
|
||||
// Process all ready entries:
|
||||
while let Some(wake) = self.waiting_queue.pop_ready(now) {
|
||||
let batch = self.batches.remove(&wake.payload);
|
||||
debug_assert!(
|
||||
batch.is_some(),
|
||||
"Entries referenced in `waiting_queue` are supposed to exist!"
|
||||
);
|
||||
let batch = match batch {
|
||||
None => return pending().await,
|
||||
Some(batch) => batch,
|
||||
};
|
||||
match batch.tick(now) {
|
||||
TickResult::Done(import) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?wake.payload,
|
||||
"Batch became ready."
|
||||
);
|
||||
imports.push(import);
|
||||
},
|
||||
TickResult::Alive(old_batch, next_tick) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?wake.payload,
|
||||
"Batch found to be still alive on check."
|
||||
);
|
||||
let pending_wake = PendingWake { payload: wake.payload, ready_at: next_tick };
|
||||
self.waiting_queue.push(pending_wake);
|
||||
self.batches.insert(wake.payload, old_batch);
|
||||
},
|
||||
}
|
||||
}
|
||||
imports
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,204 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{cmp::Ordering, collections::BinaryHeap, time::Instant};
|
||||
|
||||
use futures::future::pending;
|
||||
use futures_timer::Delay;
|
||||
|
||||
/// Wait asynchronously for given `Instant`s one after the other.
|
||||
///
|
||||
/// `PendingWake`s can be inserted and `WaitingQueue` makes `wait_ready()` to always wait for the
|
||||
/// next `Instant` in the queue.
|
||||
pub struct WaitingQueue<Payload> {
|
||||
/// All pending wakes we are supposed to wait on in order.
|
||||
pending_wakes: BinaryHeap<PendingWake<Payload>>,
|
||||
/// Wait for next `PendingWake`.
|
||||
timer: Option<Delay>,
|
||||
}
|
||||
|
||||
/// Represents some event waiting to be processed at `ready_at`.
|
||||
///
|
||||
/// This is an event in `WaitingQueue`. It provides an `Ord` instance, that sorts descending with
|
||||
/// regard to `Instant` (so we get a `min-heap` with the earliest `Instant` at the top).
|
||||
#[derive(Eq, PartialEq)]
|
||||
pub struct PendingWake<Payload> {
|
||||
pub payload: Payload,
|
||||
pub ready_at: Instant,
|
||||
}
|
||||
|
||||
impl<Payload: Eq + Ord> WaitingQueue<Payload> {
|
||||
/// Get a new empty `WaitingQueue`.
|
||||
///
|
||||
/// If you call `pop` on this queue immediately, it will always return `Poll::Pending`.
|
||||
pub fn new() -> Self {
|
||||
Self { pending_wakes: BinaryHeap::new(), timer: None }
|
||||
}
|
||||
|
||||
/// Push a `PendingWake`.
|
||||
///
|
||||
/// The next call to `wait_ready` will make sure to wake soon enough to process that new event
|
||||
/// in a timely manner.
|
||||
pub fn push(&mut self, wake: PendingWake<Payload>) {
|
||||
self.pending_wakes.push(wake);
|
||||
// Reset timer as it is potentially obsolete now:
|
||||
self.timer = None;
|
||||
}
|
||||
|
||||
/// Pop the next ready item.
|
||||
///
|
||||
/// This function does not wait, if nothing is ready right now as determined by the passed
|
||||
/// `now` time stamp, this function simply returns `None`.
|
||||
pub fn pop_ready(&mut self, now: Instant) -> Option<PendingWake<Payload>> {
|
||||
let is_ready = self.pending_wakes.peek().map_or(false, |p| p.ready_at <= now);
|
||||
if is_ready {
|
||||
Some(self.pending_wakes.pop().expect("We just peeked. qed."))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Don't pop, just wait until something is ready.
|
||||
///
|
||||
/// Once this function returns `Poll::Ready(())` `pop_ready()` will return `Some`, if passed
|
||||
/// the same `Instant`.
|
||||
///
|
||||
/// Whether ready or not is determined based on the passed time stamp `now` which should be the
|
||||
/// current time as returned by `Instant::now()`
|
||||
///
|
||||
/// This function waits asynchronously for an item to become ready. If there is no more item,
|
||||
/// this call will wait forever (return Poll::Pending without scheduling a wake).
|
||||
pub async fn wait_ready(&mut self, now: Instant) {
|
||||
if let Some(timer) = &mut self.timer {
|
||||
// Previous timer was not done yet.
|
||||
timer.await
|
||||
}
|
||||
|
||||
let next_waiting = self.pending_wakes.peek();
|
||||
let is_ready = next_waiting.map_or(false, |p| p.ready_at <= now);
|
||||
if is_ready {
|
||||
return;
|
||||
}
|
||||
|
||||
self.timer = next_waiting.map(|p| Delay::new(p.ready_at.duration_since(now)));
|
||||
match &mut self.timer {
|
||||
None => return pending().await,
|
||||
Some(timer) => timer.await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Payload: Eq + Ord> PartialOrd<PendingWake<Payload>> for PendingWake<Payload> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<Payload: Ord> Ord for PendingWake<Payload> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
// Reverse order for min-heap:
|
||||
match other.ready_at.cmp(&self.ready_at) {
|
||||
Ordering::Equal => other.payload.cmp(&self.payload),
|
||||
o => o,
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{
|
||||
task::Poll,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use futures::{future::poll_fn, pin_mut, Future};
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
use super::{PendingWake, WaitingQueue};
|
||||
|
||||
#[test]
|
||||
fn wait_ready_waits_for_earliest_event_always() {
|
||||
sp_tracing::try_init_simple();
|
||||
let mut queue = WaitingQueue::new();
|
||||
let now = Instant::now();
|
||||
let start = now;
|
||||
queue.push(PendingWake { payload: 1u32, ready_at: now + Duration::from_millis(3) });
|
||||
// Push another one in order:
|
||||
queue.push(PendingWake { payload: 2u32, ready_at: now + Duration::from_millis(5) });
|
||||
// Push one out of order:
|
||||
queue.push(PendingWake { payload: 0u32, ready_at: now + Duration::from_millis(1) });
|
||||
// Push another one at same timestamp (should become ready at the same time)
|
||||
queue.push(PendingWake { payload: 10u32, ready_at: now + Duration::from_millis(1) });
|
||||
|
||||
futures::executor::block_on(async move {
|
||||
// No time passed yet - nothing should be ready.
|
||||
assert!(queue.pop_ready(now).is_none(), "No time has passed, nothing should be ready");
|
||||
|
||||
// Receive them in order at expected times:
|
||||
queue.wait_ready(now).await;
|
||||
gum::trace!(target: LOG_TARGET, "After first wait.");
|
||||
|
||||
let now = start + Duration::from_millis(1);
|
||||
assert!(Instant::now() - start >= Duration::from_millis(1));
|
||||
assert_eq!(queue.pop_ready(now).map(|p| p.payload), Some(0u32));
|
||||
// One more should be ready:
|
||||
assert_eq!(queue.pop_ready(now).map(|p| p.payload), Some(10u32));
|
||||
assert!(queue.pop_ready(now).is_none(), "No more entry expected to be ready.");
|
||||
|
||||
queue.wait_ready(now).await;
|
||||
gum::trace!(target: LOG_TARGET, "After second wait.");
|
||||
let now = start + Duration::from_millis(3);
|
||||
assert!(Instant::now() - start >= Duration::from_millis(3));
|
||||
assert_eq!(queue.pop_ready(now).map(|p| p.payload), Some(1u32));
|
||||
assert!(queue.pop_ready(now).is_none(), "No more entry expected to be ready.");
|
||||
|
||||
// Push in between wait:
|
||||
poll_fn(|cx| {
|
||||
let fut = queue.wait_ready(now);
|
||||
pin_mut!(fut);
|
||||
assert_matches!(fut.poll(cx), Poll::Pending);
|
||||
Poll::Ready(())
|
||||
})
|
||||
.await;
|
||||
queue.push(PendingWake { payload: 3u32, ready_at: start + Duration::from_millis(4) });
|
||||
|
||||
queue.wait_ready(now).await;
|
||||
// Newly pushed element should have become ready:
|
||||
gum::trace!(target: LOG_TARGET, "After third wait.");
|
||||
let now = start + Duration::from_millis(4);
|
||||
assert!(Instant::now() - start >= Duration::from_millis(4));
|
||||
assert_eq!(queue.pop_ready(now).map(|p| p.payload), Some(3u32));
|
||||
assert!(queue.pop_ready(now).is_none(), "No more entry expected to be ready.");
|
||||
|
||||
queue.wait_ready(now).await;
|
||||
gum::trace!(target: LOG_TARGET, "After fourth wait.");
|
||||
let now = start + Duration::from_millis(5);
|
||||
assert!(Instant::now() - start >= Duration::from_millis(5));
|
||||
assert_eq!(queue.pop_ready(now).map(|p| p.payload), Some(2u32));
|
||||
assert!(queue.pop_ready(now).is_none(), "No more entry expected to be ready.");
|
||||
|
||||
// queue empty - should wait forever now:
|
||||
poll_fn(|cx| {
|
||||
let fut = queue.wait_ready(now);
|
||||
pin_mut!(fut);
|
||||
assert_matches!(fut.poll(cx), Poll::Pending);
|
||||
Poll::Ready(())
|
||||
})
|
||||
.await;
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
//! Error handling related code and Error/Result definitions.
|
||||
|
||||
use fatality::Nested;
|
||||
|
||||
use gum::CandidateHash;
|
||||
use pezkuwi_node_network_protocol::{request_response::incoming, PeerId};
|
||||
use pezkuwi_node_subsystem_util::runtime;
|
||||
use pezkuwi_primitives::AuthorityDiscoveryId;
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
#[allow(missing_docs)]
|
||||
#[fatality::fatality(splitable)]
|
||||
pub enum Error {
|
||||
#[fatal(forward)]
|
||||
#[error("Error while accessing runtime information")]
|
||||
Runtime(#[from] runtime::Error),
|
||||
|
||||
#[fatal(forward)]
|
||||
#[error("Retrieving next incoming request failed.")]
|
||||
IncomingRequest(#[from] incoming::Error),
|
||||
|
||||
#[error("Sending back response to peers {0:#?} failed.")]
|
||||
SendResponses(Vec<PeerId>),
|
||||
|
||||
#[error("Changing peer's ({0}) reputation failed.")]
|
||||
SetPeerReputation(PeerId),
|
||||
|
||||
#[error("Dispute request with invalid signatures, from peer {0}.")]
|
||||
InvalidSignature(PeerId),
|
||||
|
||||
#[error("Received votes from peer {0} have been completely redundant.")]
|
||||
RedundantMessage(PeerId),
|
||||
|
||||
#[error("Import of dispute got canceled for candidate {0} - import failed for some reason.")]
|
||||
ImportCanceled(CandidateHash),
|
||||
|
||||
#[error("Peer {0} attempted to participate in dispute and is not a validator.")]
|
||||
NotAValidator(PeerId),
|
||||
|
||||
#[error("Force flush for batch that could not be found attempted, candidate hash: {0}")]
|
||||
ForceFlushBatchDoesNotExist(CandidateHash),
|
||||
|
||||
// Should never happen in practice:
|
||||
#[error("We needed to drop messages, because we reached limit on concurrent batches.")]
|
||||
MaxBatchLimitReached,
|
||||
|
||||
#[error("Authority {0} sent messages at a too high rate.")]
|
||||
AuthorityFlooding(AuthorityDiscoveryId),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
pub type JfyiResult<T> = std::result::Result<T, JfyiError>;
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error. This utility function is meant to
|
||||
/// consume top-level errors by simply logging them.
|
||||
pub fn log_error(result: Result<()>) -> std::result::Result<(), FatalError> {
|
||||
match result.into_nested()? {
|
||||
Err(error @ JfyiError::ImportCanceled(_)) => {
|
||||
gum::debug!(target: LOG_TARGET, error = ?error);
|
||||
Ok(())
|
||||
},
|
||||
Err(JfyiError::NotAValidator(peer)) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
"Dropping message from peer (unknown authority id)"
|
||||
);
|
||||
Ok(())
|
||||
},
|
||||
Err(error) => {
|
||||
gum::warn!(target: LOG_TARGET, error = ?error);
|
||||
Ok(())
|
||||
},
|
||||
Ok(()) => Ok(()),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,522 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{
|
||||
pin::Pin,
|
||||
task::{Context, Poll},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use futures::{
|
||||
channel::oneshot,
|
||||
future::poll_fn,
|
||||
pin_mut,
|
||||
stream::{FuturesUnordered, StreamExt},
|
||||
Future,
|
||||
};
|
||||
|
||||
use gum::CandidateHash;
|
||||
use pezkuwi_node_network_protocol::{
|
||||
authority_discovery::AuthorityDiscovery,
|
||||
request_response::{
|
||||
incoming::{self, OutgoingResponse, OutgoingResponseSender},
|
||||
v1::{DisputeRequest, DisputeResponse},
|
||||
IncomingRequest, IncomingRequestReceiver,
|
||||
},
|
||||
PeerId, UnifiedReputationChange as Rep,
|
||||
};
|
||||
use pezkuwi_node_primitives::DISPUTE_WINDOW;
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::{DisputeCoordinatorMessage, ImportStatementsResult},
|
||||
overseer,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util::{runtime, runtime::RuntimeInfo};
|
||||
|
||||
use crate::{
|
||||
metrics::{FAILED, SUCCEEDED},
|
||||
Metrics, LOG_TARGET,
|
||||
};
|
||||
|
||||
mod error;
|
||||
|
||||
/// Rate limiting queues for incoming requests by peers.
|
||||
mod peer_queues;
|
||||
|
||||
/// Batch imports together.
|
||||
mod batches;
|
||||
|
||||
use self::{
|
||||
batches::{Batches, FoundBatch, PreparedImport},
|
||||
error::{log_error, JfyiError, JfyiResult, Result},
|
||||
peer_queues::PeerQueues,
|
||||
};
|
||||
|
||||
const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded.");
|
||||
const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Signatures were invalid.");
|
||||
const COST_NOT_A_VALIDATOR: Rep = Rep::CostMajor("Reporting peer was not a validator.");
|
||||
|
||||
/// Invalid imports can be caused by flooding, e.g. by a disabled validator.
|
||||
const COST_INVALID_IMPORT: Rep =
|
||||
Rep::CostMinor("Import was deemed invalid by dispute-coordinator.");
|
||||
|
||||
/// How many votes must have arrived in the last `BATCH_COLLECTING_INTERVAL`
|
||||
///
|
||||
/// in order for a batch to stay alive and not get flushed/imported to the dispute-coordinator.
|
||||
///
|
||||
/// This ensures a timely import of batches.
|
||||
#[cfg(not(test))]
|
||||
pub const MIN_KEEP_BATCH_ALIVE_VOTES: u32 = 10;
|
||||
#[cfg(test)]
|
||||
pub const MIN_KEEP_BATCH_ALIVE_VOTES: u32 = 2;
|
||||
|
||||
/// Time we allow to pass for new votes to trickle in.
|
||||
///
|
||||
/// See `MIN_KEEP_BATCH_ALIVE_VOTES` above.
|
||||
/// Should be greater or equal to `RECEIVE_RATE_LIMIT` (there is no point in checking any faster).
|
||||
pub const BATCH_COLLECTING_INTERVAL: Duration = Duration::from_millis(500);
|
||||
|
||||
/// State for handling incoming `DisputeRequest` messages.
|
||||
pub struct DisputesReceiver<Sender, AD> {
|
||||
/// Access to session information.
|
||||
runtime: RuntimeInfo,
|
||||
|
||||
/// Subsystem sender for communication with other subsystems.
|
||||
sender: Sender,
|
||||
|
||||
/// Channel to retrieve incoming requests from.
|
||||
receiver: IncomingRequestReceiver<DisputeRequest>,
|
||||
|
||||
/// Rate limiting queue for each peer (only authorities).
|
||||
peer_queues: PeerQueues,
|
||||
|
||||
/// Currently active batches of imports per candidate.
|
||||
batches: Batches,
|
||||
|
||||
/// Authority discovery service:
|
||||
authority_discovery: AD,
|
||||
|
||||
/// Imports currently being processed by the `dispute-coordinator`.
|
||||
pending_imports: FuturesUnordered<PendingImport>,
|
||||
|
||||
/// Log received requests.
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
/// Messages as handled by this receiver internally.
|
||||
enum MuxedMessage {
|
||||
/// An import got confirmed by the coordinator.
|
||||
///
|
||||
/// We need to handle those for two reasons:
|
||||
///
|
||||
/// - We need to make sure responses are actually sent (therefore we need to await futures
|
||||
/// promptly).
|
||||
/// - We need to punish peers whose import got rejected.
|
||||
ConfirmedImport(ImportResult),
|
||||
|
||||
/// A new request has arrived and should be handled.
|
||||
NewRequest(IncomingRequest<DisputeRequest>),
|
||||
|
||||
/// Rate limit timer hit - is time to process one row of messages.
|
||||
///
|
||||
/// This is the result of calling `self.peer_queues.pop_reqs()`.
|
||||
WakePeerQueuesPopReqs(Vec<IncomingRequest<DisputeRequest>>),
|
||||
|
||||
/// It is time to check batches.
|
||||
///
|
||||
/// Every `BATCH_COLLECTING_INTERVAL` we check whether less than `MIN_KEEP_BATCH_ALIVE_VOTES`
|
||||
/// new votes arrived, if so the batch is ready for import.
|
||||
///
|
||||
/// This is the result of calling `self.batches.check_batches()`.
|
||||
WakeCheckBatches(Vec<PreparedImport>),
|
||||
}
|
||||
|
||||
impl<Sender, AD> DisputesReceiver<Sender, AD>
|
||||
where
|
||||
AD: AuthorityDiscovery,
|
||||
Sender: overseer::DisputeDistributionSenderTrait,
|
||||
{
|
||||
/// Create a new receiver which can be `run`.
|
||||
pub fn new(
|
||||
sender: Sender,
|
||||
receiver: IncomingRequestReceiver<DisputeRequest>,
|
||||
authority_discovery: AD,
|
||||
metrics: Metrics,
|
||||
) -> Self {
|
||||
let runtime = RuntimeInfo::new_with_config(runtime::Config {
|
||||
keystore: None,
|
||||
session_cache_lru_size: DISPUTE_WINDOW.get(),
|
||||
});
|
||||
Self {
|
||||
runtime,
|
||||
sender,
|
||||
receiver,
|
||||
peer_queues: PeerQueues::new(),
|
||||
batches: Batches::new(),
|
||||
authority_discovery,
|
||||
pending_imports: FuturesUnordered::new(),
|
||||
metrics,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get that receiver started.
|
||||
///
|
||||
/// This is an endless loop and should be spawned into its own task.
|
||||
pub async fn run(mut self) {
|
||||
loop {
|
||||
match log_error(self.run_inner().await) {
|
||||
Ok(()) => {},
|
||||
Err(fatal) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
error = ?fatal,
|
||||
"Shutting down"
|
||||
);
|
||||
return;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Actual work happening here in three phases:
|
||||
///
|
||||
/// 1. Receive and queue incoming messages until the rate limit timer hits.
|
||||
/// 2. Do import/batching for the head of all queues.
|
||||
/// 3. Check and flush any ready batches.
|
||||
async fn run_inner(&mut self) -> Result<()> {
|
||||
let msg = self.receive_message().await?;
|
||||
|
||||
match msg {
|
||||
MuxedMessage::NewRequest(req) => {
|
||||
// Phase 1:
|
||||
self.metrics.on_received_request();
|
||||
self.dispatch_to_queues(req).await?;
|
||||
},
|
||||
MuxedMessage::WakePeerQueuesPopReqs(reqs) => {
|
||||
// Phase 2:
|
||||
for req in reqs {
|
||||
// No early return - we cannot cancel imports of one peer, because the import of
|
||||
// another failed:
|
||||
match log_error(self.start_import_or_batch(req).await) {
|
||||
Ok(()) => {},
|
||||
Err(fatal) => return Err(fatal.into()),
|
||||
}
|
||||
}
|
||||
},
|
||||
MuxedMessage::WakeCheckBatches(ready_imports) => {
|
||||
// Phase 3:
|
||||
self.import_ready_batches(ready_imports).await;
|
||||
},
|
||||
MuxedMessage::ConfirmedImport(import_result) => {
|
||||
self.update_imported_requests_metrics(&import_result);
|
||||
// Confirm imports to requesters/punish them on invalid imports:
|
||||
send_responses_to_requesters(import_result).await?;
|
||||
},
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Receive one `MuxedMessage`.
|
||||
///
|
||||
///
|
||||
/// Dispatching events to messages as they happen.
|
||||
async fn receive_message(&mut self) -> Result<MuxedMessage> {
|
||||
poll_fn(|ctx| {
|
||||
// In case of Ready(None), we want to wait for pending requests:
|
||||
if let Poll::Ready(Some(v)) = self.pending_imports.poll_next_unpin(ctx) {
|
||||
return Poll::Ready(Ok(MuxedMessage::ConfirmedImport(v?)));
|
||||
}
|
||||
|
||||
let rate_limited = self.peer_queues.pop_reqs();
|
||||
pin_mut!(rate_limited);
|
||||
// We poll rate_limit before batches, so we don't unnecessarily delay importing to
|
||||
// batches.
|
||||
if let Poll::Ready(reqs) = rate_limited.poll(ctx) {
|
||||
return Poll::Ready(Ok(MuxedMessage::WakePeerQueuesPopReqs(reqs)));
|
||||
}
|
||||
|
||||
let ready_batches = self.batches.check_batches();
|
||||
pin_mut!(ready_batches);
|
||||
if let Poll::Ready(ready_batches) = ready_batches.poll(ctx) {
|
||||
return Poll::Ready(Ok(MuxedMessage::WakeCheckBatches(ready_batches)));
|
||||
}
|
||||
|
||||
let next_req = self.receiver.recv(|| vec![COST_INVALID_REQUEST]);
|
||||
pin_mut!(next_req);
|
||||
if let Poll::Ready(r) = next_req.poll(ctx) {
|
||||
return match r {
|
||||
Err(e) => Poll::Ready(Err(incoming::Error::from(e).into())),
|
||||
Ok(v) => Poll::Ready(Ok(MuxedMessage::NewRequest(v))),
|
||||
};
|
||||
}
|
||||
Poll::Pending
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Process incoming requests.
|
||||
///
|
||||
/// - Check sender is authority
|
||||
/// - Dispatch message to corresponding queue in `peer_queues`.
|
||||
/// - If queue is full, drop message and change reputation of sender.
|
||||
async fn dispatch_to_queues(&mut self, req: IncomingRequest<DisputeRequest>) -> JfyiResult<()> {
|
||||
let peer = req.peer;
|
||||
// Only accept messages from validators, in case there are multiple `AuthorityId`s, we
|
||||
// just take the first one. On session boundaries this might allow validators to double
|
||||
// their rate limit for a short period of time, which seems acceptable.
|
||||
let authority_id = match self
|
||||
.authority_discovery
|
||||
.get_authority_ids_by_peer_id(peer)
|
||||
.await
|
||||
.and_then(|s| s.into_iter().next())
|
||||
{
|
||||
None => {
|
||||
req.send_outgoing_response(OutgoingResponse {
|
||||
result: Err(()),
|
||||
reputation_changes: vec![COST_NOT_A_VALIDATOR],
|
||||
sent_feedback: None,
|
||||
})
|
||||
.map_err(|_| JfyiError::SendResponses(vec![peer]))?;
|
||||
return Err(JfyiError::NotAValidator(peer).into());
|
||||
},
|
||||
Some(auth_id) => auth_id,
|
||||
};
|
||||
|
||||
// Queue request:
|
||||
if let Err((authority_id, req)) = self.peer_queues.push_req(authority_id, req) {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?authority_id,
|
||||
?peer,
|
||||
"Peer hit the rate limit - dropping message."
|
||||
);
|
||||
req.send_outgoing_response(OutgoingResponse {
|
||||
result: Err(()),
|
||||
reputation_changes: vec![],
|
||||
sent_feedback: None,
|
||||
})
|
||||
.map_err(|_| JfyiError::SendResponses(vec![peer]))?;
|
||||
return Err(JfyiError::AuthorityFlooding(authority_id));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start importing votes for the given request or batch.
|
||||
///
|
||||
/// Signature check and in case we already have an existing batch we import to that batch,
|
||||
/// otherwise import to `dispute-coordinator` directly and open a batch.
|
||||
async fn start_import_or_batch(
|
||||
&mut self,
|
||||
incoming: IncomingRequest<DisputeRequest>,
|
||||
) -> Result<()> {
|
||||
let IncomingRequest { peer, payload, pending_response } = incoming;
|
||||
|
||||
let info = self
|
||||
.runtime
|
||||
.get_session_info_by_index(
|
||||
&mut self.sender,
|
||||
payload.0.candidate_receipt.descriptor.relay_parent(),
|
||||
payload.0.session_index,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let votes_result = payload.0.try_into_signed_votes(&info.session_info);
|
||||
|
||||
let (candidate_receipt, valid_vote, invalid_vote) = match votes_result {
|
||||
Err(()) => {
|
||||
// Signature invalid:
|
||||
pending_response
|
||||
.send_outgoing_response(OutgoingResponse {
|
||||
result: Err(()),
|
||||
reputation_changes: vec![COST_INVALID_SIGNATURE],
|
||||
sent_feedback: None,
|
||||
})
|
||||
.map_err(|_| JfyiError::SetPeerReputation(peer))?;
|
||||
|
||||
return Err(From::from(JfyiError::InvalidSignature(peer)));
|
||||
},
|
||||
Ok(votes) => votes,
|
||||
};
|
||||
|
||||
let candidate_hash = *valid_vote.0.candidate_hash();
|
||||
|
||||
match self.batches.find_batch(candidate_hash, candidate_receipt)? {
|
||||
FoundBatch::Created(batch) => {
|
||||
// There was no entry yet - start import immediately:
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?candidate_hash,
|
||||
?peer,
|
||||
"No batch yet - triggering immediate import"
|
||||
);
|
||||
let import = PreparedImport {
|
||||
candidate_receipt: batch.candidate_receipt().clone(),
|
||||
statements: vec![valid_vote, invalid_vote],
|
||||
requesters: vec![(peer, pending_response)],
|
||||
};
|
||||
self.start_import(import).await;
|
||||
},
|
||||
FoundBatch::Found(batch) => {
|
||||
gum::trace!(target: LOG_TARGET, ?candidate_hash, "Batch exists - batching request");
|
||||
let batch_result =
|
||||
batch.add_votes(valid_vote, invalid_vote, peer, pending_response);
|
||||
|
||||
if let Err(pending_response) = batch_result {
|
||||
// We don't expect honest peers to send redundant votes within a single batch,
|
||||
// as the timeout for retry is much higher. Still we don't want to punish the
|
||||
// node as it might not be the node's fault. Some other (malicious) node could
|
||||
// have been faster sending the same votes in order to harm the reputation of
|
||||
// that honest node. Given that we already have a rate limit, if a validator
|
||||
// chooses to waste available rate with redundant votes - so be it. The actual
|
||||
// dispute resolution is unaffected.
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
"Peer sent completely redundant votes within a single batch - that looks fishy!",
|
||||
);
|
||||
pending_response
|
||||
.send_outgoing_response(OutgoingResponse {
|
||||
// While we have seen duplicate votes, we cannot confirm as we don't
|
||||
// know yet whether the batch is going to be confirmed, so we assume
|
||||
// the worst. We don't want to push the pending response to the batch
|
||||
// either as that would be unbounded, only limited by the rate limit.
|
||||
result: Err(()),
|
||||
reputation_changes: Vec::new(),
|
||||
sent_feedback: None,
|
||||
})
|
||||
.map_err(|_| JfyiError::SendResponses(vec![peer]))?;
|
||||
return Err(From::from(JfyiError::RedundantMessage(peer)));
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Trigger import into the dispute-coordinator of ready batches (`PreparedImport`s).
|
||||
async fn import_ready_batches(&mut self, ready_imports: Vec<PreparedImport>) {
|
||||
for import in ready_imports {
|
||||
self.start_import(import).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Start import and add response receiver to `pending_imports`.
|
||||
async fn start_import(&mut self, import: PreparedImport) {
|
||||
let PreparedImport { candidate_receipt, statements, requesters } = import;
|
||||
let (session_index, candidate_hash) = match statements.iter().next() {
|
||||
None => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?candidate_receipt.hash(),
|
||||
"Not importing empty batch"
|
||||
);
|
||||
return;
|
||||
},
|
||||
Some(vote) => (vote.0.session_index(), *vote.0.candidate_hash()),
|
||||
};
|
||||
|
||||
let (pending_confirmation, confirmation_rx) = oneshot::channel();
|
||||
self.sender
|
||||
.send_message(DisputeCoordinatorMessage::ImportStatements {
|
||||
candidate_receipt,
|
||||
session: session_index,
|
||||
statements,
|
||||
pending_confirmation: Some(pending_confirmation),
|
||||
})
|
||||
.await;
|
||||
|
||||
let pending =
|
||||
PendingImport { candidate_hash, requesters, pending_response: confirmation_rx };
|
||||
|
||||
self.pending_imports.push(pending);
|
||||
}
|
||||
|
||||
fn update_imported_requests_metrics(&self, result: &ImportResult) {
|
||||
let label = match result.result {
|
||||
ImportStatementsResult::ValidImport => SUCCEEDED,
|
||||
ImportStatementsResult::InvalidImport => FAILED,
|
||||
};
|
||||
self.metrics.on_imported(label, result.requesters.len());
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_responses_to_requesters(import_result: ImportResult) -> JfyiResult<()> {
|
||||
let ImportResult { requesters, result } = import_result;
|
||||
|
||||
let mk_response = match result {
|
||||
ImportStatementsResult::ValidImport => || OutgoingResponse {
|
||||
result: Ok(DisputeResponse::Confirmed),
|
||||
reputation_changes: Vec::new(),
|
||||
sent_feedback: None,
|
||||
},
|
||||
ImportStatementsResult::InvalidImport => || OutgoingResponse {
|
||||
result: Err(()),
|
||||
reputation_changes: vec![COST_INVALID_IMPORT],
|
||||
sent_feedback: None,
|
||||
},
|
||||
};
|
||||
|
||||
let mut sending_failed_for = Vec::new();
|
||||
for (peer, pending_response) in requesters {
|
||||
if let Err(()) = pending_response.send_outgoing_response(mk_response()) {
|
||||
sending_failed_for.push(peer);
|
||||
}
|
||||
}
|
||||
|
||||
if !sending_failed_for.is_empty() {
|
||||
Err(JfyiError::SendResponses(sending_failed_for))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A future that resolves into an `ImportResult` when ready.
|
||||
///
|
||||
/// This future is used on `dispute-coordinator` import messages for the oneshot response receiver
|
||||
/// to:
|
||||
/// - Keep track of concerned `CandidateHash` for reporting errors.
|
||||
/// - Keep track of requesting peers so we can confirm the import/punish them on invalid imports.
|
||||
struct PendingImport {
|
||||
candidate_hash: CandidateHash,
|
||||
requesters: Vec<(PeerId, OutgoingResponseSender<DisputeRequest>)>,
|
||||
pending_response: oneshot::Receiver<ImportStatementsResult>,
|
||||
}
|
||||
|
||||
/// A `PendingImport` becomes an `ImportResult` once done.
|
||||
struct ImportResult {
|
||||
/// Requesters of that import.
|
||||
requesters: Vec<(PeerId, OutgoingResponseSender<DisputeRequest>)>,
|
||||
/// Actual result of the import.
|
||||
result: ImportStatementsResult,
|
||||
}
|
||||
|
||||
impl PendingImport {
|
||||
async fn wait_for_result(&mut self) -> JfyiResult<ImportResult> {
|
||||
let result = (&mut self.pending_response)
|
||||
.await
|
||||
.map_err(|_| JfyiError::ImportCanceled(self.candidate_hash))?;
|
||||
Ok(ImportResult { requesters: std::mem::take(&mut self.requesters), result })
|
||||
}
|
||||
}
|
||||
|
||||
impl Future for PendingImport {
|
||||
type Output = JfyiResult<ImportResult>;
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
let fut = self.wait_for_result();
|
||||
pin_mut!(fut);
|
||||
fut.poll(cx)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::collections::{hash_map::Entry, HashMap, VecDeque};
|
||||
|
||||
use futures::future::pending;
|
||||
use futures_timer::Delay;
|
||||
use pezkuwi_node_network_protocol::request_response::{v1::DisputeRequest, IncomingRequest};
|
||||
use pezkuwi_primitives::AuthorityDiscoveryId;
|
||||
|
||||
use crate::RECEIVE_RATE_LIMIT;
|
||||
|
||||
/// How many messages we are willing to queue per peer (validator).
|
||||
///
|
||||
/// The larger this value is, the larger bursts are allowed to be without us dropping messages. On
|
||||
/// the flip side this gets allocated per validator, so for a size of 10 this will result
|
||||
/// in `10_000 * size_of(IncomingRequest)` in the worst case.
|
||||
///
|
||||
/// `PEER_QUEUE_CAPACITY` must not be 0 for obvious reasons.
|
||||
#[cfg(not(test))]
|
||||
pub const PEER_QUEUE_CAPACITY: usize = 10;
|
||||
#[cfg(test)]
|
||||
pub const PEER_QUEUE_CAPACITY: usize = 2;
|
||||
|
||||
/// Queues for messages from authority peers for rate limiting.
|
||||
///
|
||||
/// Invariants ensured:
|
||||
///
|
||||
/// 1. No queue will ever have more than `PEER_QUEUE_CAPACITY` elements.
|
||||
/// 2. There are no empty queues. Whenever a queue gets empty, it is removed. This way checking
|
||||
/// whether there are any messages queued is cheap.
|
||||
/// 3. As long as not empty, `pop_reqs` will, if called in sequence, not return `Ready` more often
|
||||
/// than once for every `RECEIVE_RATE_LIMIT`, but it will always return Ready eventually.
|
||||
/// 4. If empty `pop_reqs` will never return `Ready`, but will always be `Pending`.
|
||||
pub struct PeerQueues {
|
||||
/// Actual queues.
|
||||
queues: HashMap<AuthorityDiscoveryId, VecDeque<IncomingRequest<DisputeRequest>>>,
|
||||
|
||||
/// Delay timer for establishing the rate limit.
|
||||
rate_limit_timer: Option<Delay>,
|
||||
}
|
||||
|
||||
impl PeerQueues {
|
||||
/// New empty `PeerQueues`.
|
||||
pub fn new() -> Self {
|
||||
Self { queues: HashMap::new(), rate_limit_timer: None }
|
||||
}
|
||||
|
||||
/// Push an incoming request for a given authority.
|
||||
///
|
||||
/// Returns: `Ok(())` if succeeded, `Err((args))` if capacity is reached.
|
||||
pub fn push_req(
|
||||
&mut self,
|
||||
peer: AuthorityDiscoveryId,
|
||||
req: IncomingRequest<DisputeRequest>,
|
||||
) -> Result<(), (AuthorityDiscoveryId, IncomingRequest<DisputeRequest>)> {
|
||||
let queue = match self.queues.entry(peer) {
|
||||
Entry::Vacant(vacant) => vacant.insert(VecDeque::new()),
|
||||
Entry::Occupied(occupied) => {
|
||||
if occupied.get().len() >= PEER_QUEUE_CAPACITY {
|
||||
return Err((occupied.key().clone(), req));
|
||||
}
|
||||
occupied.into_mut()
|
||||
},
|
||||
};
|
||||
queue.push_back(req);
|
||||
|
||||
// We have at least one element to process - rate limit `timer` needs to exist now:
|
||||
self.ensure_timer();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Pop all heads and return them for processing.
|
||||
///
|
||||
/// This gets one message from each peer that has sent at least one.
|
||||
///
|
||||
/// This function is rate limited, if called in sequence it will not return more often than
|
||||
/// every `RECEIVE_RATE_LIMIT`.
|
||||
///
|
||||
/// NOTE: If empty this function will not return `Ready` at all, but will always be `Pending`.
|
||||
pub async fn pop_reqs(&mut self) -> Vec<IncomingRequest<DisputeRequest>> {
|
||||
self.wait_for_timer().await;
|
||||
|
||||
let mut heads = Vec::with_capacity(self.queues.len());
|
||||
let old_queues = std::mem::replace(&mut self.queues, HashMap::new());
|
||||
for (k, mut queue) in old_queues.into_iter() {
|
||||
let front = queue.pop_front();
|
||||
debug_assert!(front.is_some(), "Invariant that queues are never empty is broken.");
|
||||
|
||||
if let Some(front) = front {
|
||||
heads.push(front);
|
||||
}
|
||||
if !queue.is_empty() {
|
||||
self.queues.insert(k, queue);
|
||||
}
|
||||
}
|
||||
|
||||
if !self.is_empty() {
|
||||
// Still not empty - we should get woken at some point.
|
||||
self.ensure_timer();
|
||||
}
|
||||
|
||||
heads
|
||||
}
|
||||
|
||||
/// Whether or not all queues are empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.queues.is_empty()
|
||||
}
|
||||
|
||||
/// Ensure there is an active `timer`.
|
||||
///
|
||||
/// Checks whether one exists and if not creates one.
|
||||
fn ensure_timer(&mut self) -> &mut Delay {
|
||||
self.rate_limit_timer.get_or_insert(Delay::new(RECEIVE_RATE_LIMIT))
|
||||
}
|
||||
|
||||
/// Wait for `timer` if it exists, or be `Pending` forever.
|
||||
///
|
||||
/// Afterwards it gets set back to `None`.
|
||||
async fn wait_for_timer(&mut self) {
|
||||
match self.rate_limit_timer.as_mut() {
|
||||
None => pending().await,
|
||||
Some(timer) => timer.await,
|
||||
}
|
||||
self.rate_limit_timer = None;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
//! Error handling related code and Error/Result definitions.
|
||||
|
||||
use pezkuwi_node_primitives::disputes::DisputeMessageCheckError;
|
||||
use pezkuwi_node_subsystem::SubsystemError;
|
||||
use pezkuwi_node_subsystem_util::runtime;
|
||||
|
||||
#[allow(missing_docs)]
|
||||
#[fatality::fatality(splitable)]
|
||||
pub enum Error {
|
||||
#[fatal]
|
||||
#[error("Spawning subsystem task failed")]
|
||||
SpawnTask(#[source] SubsystemError),
|
||||
|
||||
#[fatal(forward)]
|
||||
#[error("Error while accessing runtime information")]
|
||||
Runtime(#[from] runtime::Error),
|
||||
|
||||
/// We need available active heads for finding relevant authorities.
|
||||
#[error("No active heads available - needed for finding relevant authorities.")]
|
||||
NoActiveHeads,
|
||||
|
||||
/// This error likely indicates a bug in the coordinator.
|
||||
#[error("Oneshot for asking dispute coordinator for active disputes got canceled.")]
|
||||
AskActiveDisputesCanceled,
|
||||
|
||||
/// This error likely indicates a bug in the coordinator.
|
||||
#[error("Oneshot for asking dispute coordinator for candidate votes got canceled.")]
|
||||
AskCandidateVotesCanceled,
|
||||
|
||||
/// This error does indicate a bug in the coordinator.
|
||||
///
|
||||
/// We were not able to successfully construct a `DisputeMessage` from disputes votes.
|
||||
#[error("Invalid dispute encountered")]
|
||||
InvalidDisputeFromCoordinator(#[source] DisputeMessageCheckError),
|
||||
|
||||
/// This error does indicate a bug in the coordinator.
|
||||
///
|
||||
/// We did not receive votes on both sides for `CandidateVotes` received from the coordinator.
|
||||
#[error("Missing votes for valid dispute")]
|
||||
MissingVotesFromCoordinator,
|
||||
|
||||
/// This error does indicate a bug in the coordinator.
|
||||
///
|
||||
/// `SignedDisputeStatement` could not be reconstructed from recorded statements.
|
||||
#[error("Invalid statements from coordinator")]
|
||||
InvalidStatementFromCoordinator,
|
||||
|
||||
/// This error does indicate a bug in the coordinator.
|
||||
///
|
||||
/// A statement's `ValidatorIndex` could not be looked up.
|
||||
#[error("ValidatorIndex of statement could not be found")]
|
||||
InvalidValidatorIndexFromCoordinator,
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type JfyiErrorResult<T> = std::result::Result<T, JfyiError>;
|
||||
@@ -0,0 +1,392 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
pin::Pin,
|
||||
task::Poll,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use futures::{channel::oneshot, future::poll_fn, Future};
|
||||
|
||||
use futures_timer::Delay;
|
||||
use indexmap::{map::Entry, IndexMap};
|
||||
use pezkuwi_node_network_protocol::request_response::v1::DisputeRequest;
|
||||
use pezkuwi_node_primitives::{DisputeMessage, DisputeStatus};
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::DisputeCoordinatorMessage, overseer, ActiveLeavesUpdate, SubsystemSender,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util::{nesting_sender::NestingSender, runtime::RuntimeInfo};
|
||||
use pezkuwi_primitives::{CandidateHash, Hash, SessionIndex};
|
||||
|
||||
/// For each ongoing dispute we have a `SendTask` which takes care of it.
|
||||
///
|
||||
/// It is going to spawn real tasks as it sees fit for getting the votes of the particular dispute
|
||||
/// out.
|
||||
///
|
||||
/// As we assume disputes have a priority, we start sending for disputes in the order
|
||||
/// `start_sender` got called.
|
||||
mod send_task;
|
||||
use send_task::SendTask;
|
||||
pub use send_task::TaskFinish;
|
||||
|
||||
/// Error and [`Result`] type for sender.
|
||||
mod error;
|
||||
pub use error::{Error, FatalError, JfyiError, Result};
|
||||
|
||||
use self::error::JfyiErrorResult;
|
||||
use crate::{Metrics, LOG_TARGET, SEND_RATE_LIMIT};
|
||||
|
||||
/// Messages as sent by background tasks.
|
||||
#[derive(Debug)]
|
||||
pub enum DisputeSenderMessage {
|
||||
/// A task finished.
|
||||
TaskFinish(TaskFinish),
|
||||
/// A request for active disputes to the dispute-coordinator finished.
|
||||
ActiveDisputesReady(JfyiErrorResult<BTreeMap<(SessionIndex, CandidateHash), DisputeStatus>>),
|
||||
}
|
||||
|
||||
/// The `DisputeSender` keeps track of all ongoing disputes we need to send statements out.
|
||||
///
|
||||
/// For each dispute a `SendTask` is responsible for sending to the concerned validators for that
|
||||
/// particular dispute. The `DisputeSender` keeps track of those tasks, informs them about new
|
||||
/// sessions/validator sets and cleans them up when they become obsolete.
|
||||
///
|
||||
/// The unit of work for the `DisputeSender` is a dispute, represented by `SendTask`s.
|
||||
pub struct DisputeSender<M> {
|
||||
/// All heads we currently consider active.
|
||||
active_heads: Vec<Hash>,
|
||||
|
||||
/// List of currently active sessions.
|
||||
///
|
||||
/// Value is the hash that was used for the query.
|
||||
active_sessions: HashMap<SessionIndex, Hash>,
|
||||
|
||||
/// All ongoing dispute sending this subsystem is aware of.
|
||||
///
|
||||
/// Using an `IndexMap` so items can be iterated in the order of insertion.
|
||||
disputes: IndexMap<CandidateHash, SendTask<M>>,
|
||||
|
||||
/// Sender to be cloned for `SendTask`s.
|
||||
tx: NestingSender<M, DisputeSenderMessage>,
|
||||
|
||||
/// `Some` if we are waiting for a response `DisputeCoordinatorMessage::ActiveDisputes`.
|
||||
waiting_for_active_disputes: Option<WaitForActiveDisputesState>,
|
||||
|
||||
/// Future for delaying too frequent creation of dispute sending tasks.
|
||||
rate_limit: RateLimit,
|
||||
|
||||
/// Metrics for reporting stats about sent requests.
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
/// State we keep while waiting for active disputes.
|
||||
///
|
||||
/// When we send `DisputeCoordinatorMessage::ActiveDisputes`, this is the state we keep while
|
||||
/// waiting for the response.
|
||||
struct WaitForActiveDisputesState {
|
||||
/// Have we seen any new sessions since last refresh?
|
||||
have_new_sessions: bool,
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(DisputeDistribution, prefix = self::overseer)]
|
||||
impl<M: 'static + Send + Sync> DisputeSender<M> {
|
||||
/// Create a new `DisputeSender` which can be used to start dispute sending.
|
||||
pub fn new(tx: NestingSender<M, DisputeSenderMessage>, metrics: Metrics) -> Self {
|
||||
Self {
|
||||
active_heads: Vec::new(),
|
||||
active_sessions: HashMap::new(),
|
||||
disputes: IndexMap::new(),
|
||||
tx,
|
||||
waiting_for_active_disputes: None,
|
||||
rate_limit: RateLimit::new(),
|
||||
metrics,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a `SendTask` for a particular new dispute.
|
||||
///
|
||||
/// This function is rate-limited by `SEND_RATE_LIMIT`. It will block if called too frequently
|
||||
/// in order to maintain the limit.
|
||||
pub async fn start_sender<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
msg: DisputeMessage,
|
||||
) -> Result<()> {
|
||||
let req: DisputeRequest = msg.into();
|
||||
let candidate_hash = req.0.candidate_receipt.hash();
|
||||
match self.disputes.entry(candidate_hash) {
|
||||
Entry::Occupied(_) => {
|
||||
gum::trace!(target: LOG_TARGET, ?candidate_hash, "Dispute sending already active.");
|
||||
return Ok(());
|
||||
},
|
||||
Entry::Vacant(vacant) => {
|
||||
self.rate_limit.limit("in start_sender", candidate_hash).await;
|
||||
|
||||
let send_task = SendTask::new(
|
||||
ctx,
|
||||
runtime,
|
||||
&self.active_sessions,
|
||||
NestingSender::new(self.tx.clone(), DisputeSenderMessage::TaskFinish),
|
||||
req,
|
||||
&self.metrics,
|
||||
)
|
||||
.await?;
|
||||
vacant.insert(send_task);
|
||||
},
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Receive message from a background task.
|
||||
pub async fn on_message<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
msg: DisputeSenderMessage,
|
||||
) -> Result<()> {
|
||||
match msg {
|
||||
DisputeSenderMessage::TaskFinish(msg) => {
|
||||
let TaskFinish { candidate_hash, receiver, result } = msg;
|
||||
|
||||
self.metrics.on_sent_request(result.as_metrics_label());
|
||||
|
||||
let task = match self.disputes.get_mut(&candidate_hash) {
|
||||
None => {
|
||||
// Can happen when a dispute ends, with messages still in queue:
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?result,
|
||||
"Received `FromSendingTask::Finished` for non existing dispute."
|
||||
);
|
||||
return Ok(());
|
||||
},
|
||||
Some(task) => task,
|
||||
};
|
||||
task.on_finished_send(&receiver, result);
|
||||
},
|
||||
DisputeSenderMessage::ActiveDisputesReady(result) => {
|
||||
let state = self.waiting_for_active_disputes.take();
|
||||
let have_new_sessions = state.map(|s| s.have_new_sessions).unwrap_or(false);
|
||||
let active_disputes = result?;
|
||||
self.handle_new_active_disputes(ctx, runtime, active_disputes, have_new_sessions)
|
||||
.await?;
|
||||
},
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Take care of a change in active leaves.
|
||||
///
|
||||
/// Update our knowledge on sessions and initiate fetching for new active disputes.
|
||||
pub async fn update_leaves<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
update: ActiveLeavesUpdate,
|
||||
) -> Result<()> {
|
||||
let ActiveLeavesUpdate { activated, deactivated } = update;
|
||||
let deactivated: HashSet<_> = deactivated.into_iter().collect();
|
||||
self.active_heads.retain(|h| !deactivated.contains(h));
|
||||
self.active_heads.extend(activated.into_iter().map(|l| l.hash));
|
||||
|
||||
let have_new_sessions = self.refresh_sessions(ctx, runtime).await?;
|
||||
|
||||
// Not yet waiting for data, request an update:
|
||||
match self.waiting_for_active_disputes.take() {
|
||||
None => {
|
||||
self.waiting_for_active_disputes =
|
||||
Some(WaitForActiveDisputesState { have_new_sessions });
|
||||
let mut sender = ctx.sender().clone();
|
||||
let mut tx = self.tx.clone();
|
||||
|
||||
let get_active_disputes_task = async move {
|
||||
let result = get_active_disputes(&mut sender).await;
|
||||
let result =
|
||||
tx.send_message(DisputeSenderMessage::ActiveDisputesReady(result)).await;
|
||||
if let Err(err) = result {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?err,
|
||||
"Sending `DisputeSenderMessage` from background task failed."
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
ctx.spawn("get_active_disputes", Box::pin(get_active_disputes_task))
|
||||
.map_err(FatalError::SpawnTask)?;
|
||||
},
|
||||
Some(state) => {
|
||||
let have_new_sessions = state.have_new_sessions || have_new_sessions;
|
||||
let new_state = WaitForActiveDisputesState { have_new_sessions };
|
||||
self.waiting_for_active_disputes = Some(new_state);
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
"Dispute coordinator slow? We are still waiting for data on next active leaves update."
|
||||
);
|
||||
},
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle new active disputes response.
|
||||
///
|
||||
/// - Initiate a retry of failed sends which are still active.
|
||||
/// - Get new authorities to send messages to.
|
||||
/// - Get rid of obsolete tasks and disputes.
|
||||
///
|
||||
/// This function ensures the `SEND_RATE_LIMIT`, therefore it might block.
|
||||
async fn handle_new_active_disputes<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
active_disputes: BTreeMap<(SessionIndex, CandidateHash), DisputeStatus>,
|
||||
have_new_sessions: bool,
|
||||
) -> Result<()> {
|
||||
let active_disputes: HashSet<_> =
|
||||
active_disputes.into_iter().map(|((_, c), _)| c).collect();
|
||||
|
||||
// Cleanup obsolete senders (retain keeps order of remaining elements):
|
||||
self.disputes
|
||||
.retain(|candidate_hash, _| active_disputes.contains(candidate_hash));
|
||||
|
||||
// Iterates in order of insertion:
|
||||
let mut should_rate_limit = true;
|
||||
for (candidate_hash, dispute) in self.disputes.iter_mut() {
|
||||
if have_new_sessions || dispute.has_failed_sends() {
|
||||
if should_rate_limit {
|
||||
self.rate_limit
|
||||
.limit("while going through new sessions/failed sends", *candidate_hash)
|
||||
.await;
|
||||
}
|
||||
let sends_happened = dispute
|
||||
.refresh_sends(ctx, runtime, &self.active_sessions, &self.metrics)
|
||||
.await?;
|
||||
// Only rate limit if we actually sent something out _and_ it was not just because
|
||||
// of errors on previous sends.
|
||||
//
|
||||
// Reasoning: It would not be acceptable to slow down the whole subsystem, just
|
||||
// because of a few bad peers having problems. It is actually better to risk
|
||||
// running into their rate limit in that case and accept a minor reputation change.
|
||||
should_rate_limit = sends_happened && have_new_sessions;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Make active sessions correspond to currently active heads.
|
||||
///
|
||||
/// Returns: true if sessions changed.
|
||||
async fn refresh_sessions<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
) -> Result<bool> {
|
||||
let new_sessions = get_active_session_indices(ctx, runtime, &self.active_heads).await?;
|
||||
let new_sessions_raw: HashSet<_> = new_sessions.keys().collect();
|
||||
let old_sessions_raw: HashSet<_> = self.active_sessions.keys().collect();
|
||||
let updated = new_sessions_raw != old_sessions_raw;
|
||||
// Update in any case, so we use current heads for queries:
|
||||
self.active_sessions = new_sessions;
|
||||
Ok(updated)
|
||||
}
|
||||
}
|
||||
|
||||
/// Rate limiting logic.
|
||||
///
|
||||
/// Suitable for the sending side.
|
||||
struct RateLimit {
|
||||
limit: Delay,
|
||||
}
|
||||
|
||||
impl RateLimit {
|
||||
/// Create new `RateLimit` that is immediately ready.
|
||||
fn new() -> Self {
|
||||
// Start with an empty duration, as there has not been any previous call.
|
||||
Self { limit: Delay::new(Duration::new(0, 0)) }
|
||||
}
|
||||
|
||||
/// Initialized with actual `SEND_RATE_LIMIT` duration.
|
||||
fn new_limit() -> Self {
|
||||
Self { limit: Delay::new(SEND_RATE_LIMIT) }
|
||||
}
|
||||
|
||||
/// Wait until ready and prepare for next call.
|
||||
///
|
||||
/// String given as occasion and candidate hash are logged in case the rate limit hit.
|
||||
async fn limit(&mut self, occasion: &'static str, candidate_hash: CandidateHash) {
|
||||
// Wait for rate limit and add some logging:
|
||||
let mut num_wakes: u32 = 0;
|
||||
poll_fn(|cx| {
|
||||
let old_limit = Pin::new(&mut self.limit);
|
||||
match old_limit.poll(cx) {
|
||||
Poll::Pending => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?occasion,
|
||||
?candidate_hash,
|
||||
?num_wakes,
|
||||
"Sending rate limit hit, slowing down requests"
|
||||
);
|
||||
num_wakes += 1;
|
||||
Poll::Pending
|
||||
},
|
||||
Poll::Ready(()) => Poll::Ready(()),
|
||||
}
|
||||
})
|
||||
.await;
|
||||
*self = Self::new_limit();
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve the currently active sessions.
|
||||
///
|
||||
/// List is all indices of all active sessions together with the head that was used for the query.
|
||||
#[overseer::contextbounds(DisputeDistribution, prefix = self::overseer)]
|
||||
async fn get_active_session_indices<Context>(
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
active_heads: &Vec<Hash>,
|
||||
) -> Result<HashMap<SessionIndex, Hash>> {
|
||||
let mut indices = HashMap::new();
|
||||
// Iterate all heads we track as active and fetch the child' session indices.
|
||||
for head in active_heads {
|
||||
let session_index = runtime.get_session_index_for_child(ctx.sender(), *head).await?;
|
||||
// Cache session info
|
||||
if let Err(err) =
|
||||
runtime.get_session_info_by_index(ctx.sender(), *head, session_index).await
|
||||
{
|
||||
gum::debug!(target: LOG_TARGET, ?err, ?session_index, "Can't cache SessionInfo");
|
||||
}
|
||||
indices.insert(session_index, *head);
|
||||
}
|
||||
Ok(indices)
|
||||
}
|
||||
|
||||
/// Retrieve Set of active disputes from the dispute coordinator.
|
||||
async fn get_active_disputes<Sender>(
|
||||
sender: &mut Sender,
|
||||
) -> JfyiErrorResult<BTreeMap<(SessionIndex, CandidateHash), DisputeStatus>>
|
||||
where
|
||||
Sender: SubsystemSender<DisputeCoordinatorMessage>,
|
||||
{
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
sender.send_message(DisputeCoordinatorMessage::ActiveDisputes(tx)).await;
|
||||
rx.await.map_err(|_| JfyiError::AskActiveDisputesCanceled)
|
||||
}
|
||||
@@ -0,0 +1,328 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use futures::{Future, FutureExt};
|
||||
|
||||
use pezkuwi_node_network_protocol::{
|
||||
request_response::{
|
||||
outgoing::RequestError,
|
||||
v1::{DisputeRequest, DisputeResponse},
|
||||
OutgoingRequest, OutgoingResult, Recipient, Requests,
|
||||
},
|
||||
IfDisconnected,
|
||||
};
|
||||
use pezkuwi_node_subsystem::{messages::NetworkBridgeTxMessage, overseer};
|
||||
use pezkuwi_node_subsystem_util::{metrics, nesting_sender::NestingSender, runtime::RuntimeInfo};
|
||||
use pezkuwi_primitives::{AuthorityDiscoveryId, CandidateHash, Hash, SessionIndex, ValidatorIndex};
|
||||
|
||||
use super::error::{FatalError, Result};
|
||||
|
||||
use crate::{
|
||||
metrics::{FAILED, SUCCEEDED},
|
||||
Metrics, LOG_TARGET,
|
||||
};
|
||||
|
||||
/// Delivery status for a particular dispute.
|
||||
///
|
||||
/// Keeps track of all the validators that have to be reached for a dispute.
|
||||
///
|
||||
/// The unit of work for a `SendTask` is an authority/validator.
|
||||
pub struct SendTask<M> {
|
||||
/// The request we are supposed to get out to all `teyrchain` validators of the dispute's
|
||||
/// session and to all current authorities.
|
||||
request: DisputeRequest,
|
||||
|
||||
/// The set of authorities we need to send our messages to. This set will change at session
|
||||
/// boundaries. It will always be at least the `teyrchain` validators of the session where the
|
||||
/// dispute happened and the authorities of the current sessions as determined by active heads.
|
||||
deliveries: HashMap<AuthorityDiscoveryId, DeliveryStatus>,
|
||||
|
||||
/// Whether we have any tasks failed since the last refresh.
|
||||
has_failed_sends: bool,
|
||||
|
||||
/// Sender to be cloned for tasks.
|
||||
tx: NestingSender<M, TaskFinish>,
|
||||
}
|
||||
|
||||
/// Status of a particular vote/statement delivery to a particular validator.
|
||||
enum DeliveryStatus {
|
||||
/// Request is still in flight.
|
||||
Pending,
|
||||
/// Succeeded - no need to send request to this peer anymore.
|
||||
Succeeded,
|
||||
}
|
||||
|
||||
/// A sending task finishes with this result:
|
||||
#[derive(Debug)]
|
||||
pub struct TaskFinish {
|
||||
/// The candidate this task was running for.
|
||||
pub candidate_hash: CandidateHash,
|
||||
/// The authority the request was sent to.
|
||||
pub receiver: AuthorityDiscoveryId,
|
||||
/// The result of the delivery attempt.
|
||||
pub result: TaskResult,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TaskResult {
|
||||
/// Task succeeded in getting the request to its peer.
|
||||
Succeeded,
|
||||
/// Task was not able to get the request out to its peer.
|
||||
///
|
||||
/// It should be retried in that case.
|
||||
Failed(RequestError),
|
||||
}
|
||||
|
||||
impl TaskResult {
|
||||
pub fn as_metrics_label(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Succeeded => SUCCEEDED,
|
||||
Self::Failed(_) => FAILED,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(DisputeDistribution, prefix = self::overseer)]
|
||||
impl<M: 'static + Send + Sync> SendTask<M> {
|
||||
/// Initiates sending a dispute message to peers.
|
||||
///
|
||||
/// Creation of new `SendTask`s is subject to rate limiting. As each `SendTask` will trigger
|
||||
/// sending a message to each validator, hence for employing a per-peer rate limit, we need to
|
||||
/// limit the construction of new `SendTask`s.
|
||||
pub async fn new<Context>(
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
active_sessions: &HashMap<SessionIndex, Hash>,
|
||||
tx: NestingSender<M, TaskFinish>,
|
||||
request: DisputeRequest,
|
||||
metrics: &Metrics,
|
||||
) -> Result<Self> {
|
||||
let mut send_task =
|
||||
Self { request, deliveries: HashMap::new(), has_failed_sends: false, tx };
|
||||
send_task.refresh_sends(ctx, runtime, active_sessions, metrics).await?;
|
||||
Ok(send_task)
|
||||
}
|
||||
|
||||
/// Make sure we are sending to all relevant authorities.
|
||||
///
|
||||
/// This function is called at construction and should also be called whenever a session change
|
||||
/// happens and on a regular basis to ensure we are retrying failed attempts.
|
||||
///
|
||||
/// This might resend to validators and is thus subject to any rate limiting we might want.
|
||||
/// Calls to this function for different instances should be rate limited according to
|
||||
/// `SEND_RATE_LIMIT`.
|
||||
///
|
||||
/// Returns: `True` if this call resulted in new requests.
|
||||
pub async fn refresh_sends<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
active_sessions: &HashMap<SessionIndex, Hash>,
|
||||
metrics: &Metrics,
|
||||
) -> Result<bool> {
|
||||
let new_authorities = self.get_relevant_validators(ctx, runtime, active_sessions).await?;
|
||||
|
||||
// Note this will also contain all authorities for which sending failed previously:
|
||||
let add_authorities: Vec<_> = new_authorities
|
||||
.iter()
|
||||
.filter(|a| !self.deliveries.contains_key(a))
|
||||
.map(Clone::clone)
|
||||
.collect();
|
||||
|
||||
// Get rid of dead/irrelevant tasks/statuses:
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
already_running_deliveries = ?self.deliveries.len(),
|
||||
"Cleaning up deliveries"
|
||||
);
|
||||
self.deliveries.retain(|k, _| new_authorities.contains(k));
|
||||
|
||||
// Start any new tasks that are needed:
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
new_and_failed_authorities = ?add_authorities.len(),
|
||||
overall_authority_set_size = ?new_authorities.len(),
|
||||
already_running_deliveries = ?self.deliveries.len(),
|
||||
"Starting new send requests for authorities."
|
||||
);
|
||||
let new_statuses =
|
||||
send_requests(ctx, self.tx.clone(), add_authorities, self.request.clone(), metrics)
|
||||
.await?;
|
||||
|
||||
let was_empty = new_statuses.is_empty();
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
sent_requests = ?new_statuses.len(),
|
||||
"Requests dispatched."
|
||||
);
|
||||
|
||||
self.has_failed_sends = false;
|
||||
self.deliveries.extend(new_statuses.into_iter());
|
||||
Ok(!was_empty)
|
||||
}
|
||||
|
||||
/// Whether any sends have failed since the last refresh.
|
||||
pub fn has_failed_sends(&self) -> bool {
|
||||
self.has_failed_sends
|
||||
}
|
||||
|
||||
/// Handle a finished response waiting task.
|
||||
///
|
||||
/// Called by `DisputeSender` upon reception of the corresponding message from our spawned
|
||||
/// `wait_response_task`.
|
||||
pub fn on_finished_send(&mut self, authority: &AuthorityDiscoveryId, result: TaskResult) {
|
||||
match result {
|
||||
TaskResult::Failed(err) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?authority,
|
||||
candidate_hash = %self.request.0.candidate_receipt.hash(),
|
||||
%err,
|
||||
"Error sending dispute statements to node."
|
||||
);
|
||||
|
||||
self.has_failed_sends = true;
|
||||
// Remove state, so we know what to try again:
|
||||
self.deliveries.remove(authority);
|
||||
},
|
||||
TaskResult::Succeeded => {
|
||||
let status = match self.deliveries.get_mut(&authority) {
|
||||
None => {
|
||||
// Can happen when a sending became irrelevant while the response was
|
||||
// already queued.
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
candidate = ?self.request.0.candidate_receipt.hash(),
|
||||
?authority,
|
||||
?result,
|
||||
"Received `FromSendingTask::Finished` for non existing task."
|
||||
);
|
||||
return;
|
||||
},
|
||||
Some(status) => status,
|
||||
};
|
||||
// We are done here:
|
||||
*status = DeliveryStatus::Succeeded;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine all validators that should receive the given dispute requests.
|
||||
///
|
||||
/// This is all `teyrchain` validators of the session the candidate occurred and all authorities
|
||||
/// of all currently active sessions, determined by currently active heads.
|
||||
async fn get_relevant_validators<Context>(
|
||||
&self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut RuntimeInfo,
|
||||
active_sessions: &HashMap<SessionIndex, Hash>,
|
||||
) -> Result<HashSet<AuthorityDiscoveryId>> {
|
||||
let ref_head = self.request.0.candidate_receipt.descriptor.relay_parent();
|
||||
// Retrieve all authorities which participated in the teyrchain consensus of the session
|
||||
// in which the candidate was backed.
|
||||
let info = runtime
|
||||
.get_session_info_by_index(ctx.sender(), ref_head, self.request.0.session_index)
|
||||
.await?;
|
||||
let session_info = &info.session_info;
|
||||
let validator_count = session_info.validators.len();
|
||||
let mut authorities: HashSet<_> = session_info
|
||||
.discovery_keys
|
||||
.iter()
|
||||
.take(validator_count)
|
||||
.enumerate()
|
||||
.filter(|(i, _)| Some(ValidatorIndex(*i as _)) != info.validator_info.our_index)
|
||||
.map(|(_, v)| v.clone())
|
||||
.collect();
|
||||
|
||||
// Retrieve all authorities for the current session as indicated by the active
|
||||
// heads we are tracking.
|
||||
for (session_index, head) in active_sessions.iter() {
|
||||
let info =
|
||||
runtime.get_session_info_by_index(ctx.sender(), *head, *session_index).await?;
|
||||
let session_info = &info.session_info;
|
||||
let new_set = session_info
|
||||
.discovery_keys
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| Some(ValidatorIndex(*i as _)) != info.validator_info.our_index)
|
||||
.map(|(_, v)| v.clone());
|
||||
authorities.extend(new_set);
|
||||
}
|
||||
Ok(authorities)
|
||||
}
|
||||
}
|
||||
|
||||
/// Start sending of the given message to all given authorities.
|
||||
///
|
||||
/// And spawn tasks for handling the response.
|
||||
#[overseer::contextbounds(DisputeDistribution, prefix = self::overseer)]
|
||||
async fn send_requests<Context, M: 'static + Send + Sync>(
|
||||
ctx: &mut Context,
|
||||
tx: NestingSender<M, TaskFinish>,
|
||||
receivers: Vec<AuthorityDiscoveryId>,
|
||||
req: DisputeRequest,
|
||||
metrics: &Metrics,
|
||||
) -> Result<HashMap<AuthorityDiscoveryId, DeliveryStatus>> {
|
||||
let mut statuses = HashMap::with_capacity(receivers.len());
|
||||
let mut reqs = Vec::with_capacity(receivers.len());
|
||||
|
||||
for receiver in receivers {
|
||||
let (outgoing, pending_response) =
|
||||
OutgoingRequest::new(Recipient::Authority(receiver.clone()), req.clone());
|
||||
|
||||
reqs.push(Requests::DisputeSendingV1(outgoing));
|
||||
|
||||
let fut = wait_response_task(
|
||||
pending_response,
|
||||
req.0.candidate_receipt.hash(),
|
||||
receiver.clone(),
|
||||
tx.clone(),
|
||||
metrics.time_dispute_request(),
|
||||
);
|
||||
|
||||
ctx.spawn("dispute-sender", fut.boxed()).map_err(FatalError::SpawnTask)?;
|
||||
statuses.insert(receiver, DeliveryStatus::Pending);
|
||||
}
|
||||
|
||||
let msg = NetworkBridgeTxMessage::SendRequests(reqs, IfDisconnected::ImmediateError);
|
||||
ctx.send_message(msg).await;
|
||||
Ok(statuses)
|
||||
}
|
||||
|
||||
/// Future to be spawned in a task for awaiting a response.
|
||||
async fn wait_response_task<M: 'static + Send + Sync>(
|
||||
pending_response: impl Future<Output = OutgoingResult<DisputeResponse>>,
|
||||
candidate_hash: CandidateHash,
|
||||
receiver: AuthorityDiscoveryId,
|
||||
mut tx: NestingSender<M, TaskFinish>,
|
||||
_timer: Option<metrics::prometheus::prometheus::HistogramTimer>,
|
||||
) {
|
||||
let result = pending_response.await;
|
||||
let msg = match result {
|
||||
Err(err) => TaskFinish { candidate_hash, receiver, result: TaskResult::Failed(err) },
|
||||
Ok(DisputeResponse::Confirmed) =>
|
||||
TaskFinish { candidate_hash, receiver, result: TaskResult::Succeeded },
|
||||
};
|
||||
if let Err(err) = tx.send_message(msg).await {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
%err,
|
||||
"Failed to notify subsystem about dispute sending result."
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,230 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
//! Mock data and utility functions for unit tests in this subsystem.
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
sync::{Arc, LazyLock},
|
||||
time::Instant,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use pezkuwi_node_network_protocol::{authority_discovery::AuthorityDiscovery, PeerId};
|
||||
use sc_keystore::LocalKeystore;
|
||||
use sp_application_crypto::AppCrypto;
|
||||
use sp_keyring::Sr25519Keyring;
|
||||
use sp_keystore::{Keystore, KeystorePtr};
|
||||
|
||||
use pezkuwi_node_primitives::{DisputeMessage, SignedDisputeStatement};
|
||||
use pezkuwi_primitives::{
|
||||
AuthorityDiscoveryId, CandidateHash, CandidateReceiptV2 as CandidateReceipt, Hash,
|
||||
SessionIndex, SessionInfo, ValidatorId, ValidatorIndex,
|
||||
};
|
||||
use pezkuwi_primitives_test_helpers::dummy_candidate_descriptor_v2;
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
pub const MOCK_SESSION_INDEX: SessionIndex = 1;
|
||||
pub const MOCK_NEXT_SESSION_INDEX: SessionIndex = 2;
|
||||
pub const MOCK_VALIDATORS: [Sr25519Keyring; 6] = [
|
||||
Sr25519Keyring::Ferdie,
|
||||
Sr25519Keyring::Alice,
|
||||
Sr25519Keyring::Bob,
|
||||
Sr25519Keyring::Charlie,
|
||||
Sr25519Keyring::Dave,
|
||||
Sr25519Keyring::Eve,
|
||||
];
|
||||
|
||||
pub const MOCK_AUTHORITIES_NEXT_SESSION: [Sr25519Keyring; 2] =
|
||||
[Sr25519Keyring::One, Sr25519Keyring::Two];
|
||||
|
||||
pub const FERDIE_INDEX: ValidatorIndex = ValidatorIndex(0);
|
||||
pub const ALICE_INDEX: ValidatorIndex = ValidatorIndex(1);
|
||||
pub const BOB_INDEX: ValidatorIndex = ValidatorIndex(2);
|
||||
pub const CHARLIE_INDEX: ValidatorIndex = ValidatorIndex(3);
|
||||
|
||||
/// Mocked `AuthorityDiscovery` service.
|
||||
pub static MOCK_AUTHORITY_DISCOVERY: LazyLock<MockAuthorityDiscovery> =
|
||||
LazyLock::new(|| MockAuthorityDiscovery::new());
|
||||
// Creating an innocent looking `SessionInfo` is really expensive in a debug build. Around
|
||||
// 700ms on my machine, We therefore cache those keys here:
|
||||
pub static MOCK_VALIDATORS_DISCOVERY_KEYS: LazyLock<HashMap<Sr25519Keyring, AuthorityDiscoveryId>> =
|
||||
LazyLock::new(|| {
|
||||
MOCK_VALIDATORS
|
||||
.iter()
|
||||
.chain(MOCK_AUTHORITIES_NEXT_SESSION.iter())
|
||||
.map(|v| (*v, v.public().into()))
|
||||
.collect()
|
||||
});
|
||||
pub static FERDIE_DISCOVERY_KEY: LazyLock<AuthorityDiscoveryId> =
|
||||
LazyLock::new(|| MOCK_VALIDATORS_DISCOVERY_KEYS.get(&Sr25519Keyring::Ferdie).unwrap().clone());
|
||||
|
||||
pub static MOCK_SESSION_INFO: LazyLock<SessionInfo> = LazyLock::new(|| SessionInfo {
|
||||
validators: MOCK_VALIDATORS.iter().take(4).map(|k| k.public().into()).collect(),
|
||||
discovery_keys: MOCK_VALIDATORS
|
||||
.iter()
|
||||
.map(|k| MOCK_VALIDATORS_DISCOVERY_KEYS.get(&k).unwrap().clone())
|
||||
.collect(),
|
||||
assignment_keys: vec![],
|
||||
validator_groups: Default::default(),
|
||||
n_cores: 0,
|
||||
zeroth_delay_tranche_width: 0,
|
||||
relay_vrf_modulo_samples: 0,
|
||||
n_delay_tranches: 0,
|
||||
no_show_slots: 0,
|
||||
needed_approvals: 0,
|
||||
active_validator_indices: vec![],
|
||||
dispute_period: 6,
|
||||
random_seed: [0u8; 32],
|
||||
});
|
||||
|
||||
/// `SessionInfo` for the second session. (No more validators, but two more authorities.
|
||||
pub static MOCK_NEXT_SESSION_INFO: LazyLock<SessionInfo> = LazyLock::new(|| SessionInfo {
|
||||
discovery_keys: MOCK_AUTHORITIES_NEXT_SESSION
|
||||
.iter()
|
||||
.map(|k| MOCK_VALIDATORS_DISCOVERY_KEYS.get(&k).unwrap().clone())
|
||||
.collect(),
|
||||
validators: Default::default(),
|
||||
assignment_keys: vec![],
|
||||
validator_groups: Default::default(),
|
||||
n_cores: 0,
|
||||
zeroth_delay_tranche_width: 0,
|
||||
relay_vrf_modulo_samples: 0,
|
||||
n_delay_tranches: 0,
|
||||
no_show_slots: 0,
|
||||
needed_approvals: 0,
|
||||
active_validator_indices: vec![],
|
||||
dispute_period: 6,
|
||||
random_seed: [0u8; 32],
|
||||
});
|
||||
|
||||
pub fn make_candidate_receipt(relay_parent: Hash) -> CandidateReceipt {
|
||||
CandidateReceipt {
|
||||
descriptor: dummy_candidate_descriptor_v2(relay_parent),
|
||||
commitments_hash: Hash::random(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn make_explicit_signed(
|
||||
validator: Sr25519Keyring,
|
||||
candidate_hash: CandidateHash,
|
||||
valid: bool,
|
||||
) -> SignedDisputeStatement {
|
||||
let keystore: KeystorePtr = Arc::new(LocalKeystore::in_memory());
|
||||
Keystore::sr25519_generate_new(&*keystore, ValidatorId::ID, Some(&validator.to_seed()))
|
||||
.expect("Insert key into keystore");
|
||||
|
||||
SignedDisputeStatement::sign_explicit(
|
||||
&keystore,
|
||||
valid,
|
||||
candidate_hash,
|
||||
MOCK_SESSION_INDEX,
|
||||
validator.public().into(),
|
||||
)
|
||||
.expect("Keystore should be fine.")
|
||||
.expect("Signing should work.")
|
||||
}
|
||||
|
||||
pub fn make_dispute_message(
|
||||
candidate: CandidateReceipt,
|
||||
valid_validator: ValidatorIndex,
|
||||
invalid_validator: ValidatorIndex,
|
||||
) -> DisputeMessage {
|
||||
let candidate_hash = candidate.hash();
|
||||
let before_request = Instant::now();
|
||||
let valid_vote =
|
||||
make_explicit_signed(MOCK_VALIDATORS[valid_validator.0 as usize], candidate_hash, true);
|
||||
gum::trace!(
|
||||
"Passed time for valid vote: {:#?}",
|
||||
Instant::now().saturating_duration_since(before_request)
|
||||
);
|
||||
let before_request = Instant::now();
|
||||
let invalid_vote =
|
||||
make_explicit_signed(MOCK_VALIDATORS[invalid_validator.0 as usize], candidate_hash, false);
|
||||
gum::trace!(
|
||||
"Passed time for invalid vote: {:#?}",
|
||||
Instant::now().saturating_duration_since(before_request)
|
||||
);
|
||||
DisputeMessage::from_signed_statements(
|
||||
valid_vote,
|
||||
valid_validator,
|
||||
invalid_vote,
|
||||
invalid_validator,
|
||||
candidate,
|
||||
&MOCK_SESSION_INFO,
|
||||
)
|
||||
.expect("DisputeMessage construction should work.")
|
||||
}
|
||||
|
||||
/// Dummy `AuthorityDiscovery` service.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MockAuthorityDiscovery {
|
||||
peer_ids: HashMap<Sr25519Keyring, PeerId>,
|
||||
}
|
||||
|
||||
impl MockAuthorityDiscovery {
|
||||
pub fn new() -> Self {
|
||||
let mut peer_ids = HashMap::new();
|
||||
peer_ids.insert(Sr25519Keyring::Alice, PeerId::random());
|
||||
peer_ids.insert(Sr25519Keyring::Bob, PeerId::random());
|
||||
peer_ids.insert(Sr25519Keyring::Ferdie, PeerId::random());
|
||||
peer_ids.insert(Sr25519Keyring::Charlie, PeerId::random());
|
||||
peer_ids.insert(Sr25519Keyring::Dave, PeerId::random());
|
||||
peer_ids.insert(Sr25519Keyring::Eve, PeerId::random());
|
||||
peer_ids.insert(Sr25519Keyring::One, PeerId::random());
|
||||
peer_ids.insert(Sr25519Keyring::Two, PeerId::random());
|
||||
|
||||
Self { peer_ids }
|
||||
}
|
||||
|
||||
pub fn get_peer_id_by_authority(&self, authority: Sr25519Keyring) -> PeerId {
|
||||
*self.peer_ids.get(&authority).expect("Tester only picks valid authorities")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AuthorityDiscovery for MockAuthorityDiscovery {
|
||||
async fn get_addresses_by_authority_id(
|
||||
&mut self,
|
||||
_authority: pezkuwi_primitives::AuthorityDiscoveryId,
|
||||
) -> Option<HashSet<sc_network::Multiaddr>> {
|
||||
panic!("Not implemented");
|
||||
}
|
||||
|
||||
async fn get_authority_ids_by_peer_id(
|
||||
&mut self,
|
||||
peer_id: pezkuwi_node_network_protocol::PeerId,
|
||||
) -> Option<HashSet<pezkuwi_primitives::AuthorityDiscoveryId>> {
|
||||
for (a, p) in self.peer_ids.iter() {
|
||||
if p == &peer_id {
|
||||
let result =
|
||||
HashSet::from([MOCK_VALIDATORS_DISCOVERY_KEYS.get(&a).unwrap().clone()]);
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
%peer_id,
|
||||
?result,
|
||||
"Returning authority ids for peer id"
|
||||
);
|
||||
return Some(result);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,901 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
//! Subsystem unit tests
|
||||
|
||||
use std::{
|
||||
collections::{BTreeMap, HashSet},
|
||||
task::Poll,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use codec::{Decode, Encode};
|
||||
use futures::{
|
||||
channel::oneshot,
|
||||
future::{poll_fn, ready},
|
||||
pin_mut, Future,
|
||||
};
|
||||
use futures_timer::Delay;
|
||||
|
||||
use sc_network::{config::RequestResponseConfig, ProtocolName};
|
||||
|
||||
use pezkuwi_node_network_protocol::{
|
||||
request_response::{v1::DisputeRequest, IncomingRequest, ReqProtocolNames},
|
||||
PeerId,
|
||||
};
|
||||
use sp_keyring::Sr25519Keyring;
|
||||
|
||||
use pezkuwi_node_network_protocol::{
|
||||
request_response::{v1::DisputeResponse, Recipient, Requests},
|
||||
IfDisconnected,
|
||||
};
|
||||
use pezkuwi_node_primitives::DisputeStatus;
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::{
|
||||
AllMessages, DisputeCoordinatorMessage, DisputeDistributionMessage, ImportStatementsResult,
|
||||
NetworkBridgeTxMessage, RuntimeApiMessage, RuntimeApiRequest,
|
||||
},
|
||||
ActiveLeavesUpdate, FromOrchestra, OverseerSignal,
|
||||
};
|
||||
use pezkuwi_node_subsystem_test_helpers::{
|
||||
mock::{make_ferdie_keystore, new_leaf},
|
||||
subsystem_test_harness, TestSubsystemContextHandle,
|
||||
};
|
||||
use pezkuwi_primitives::{
|
||||
AuthorityDiscoveryId, Block, CandidateHash, CandidateReceiptV2 as CandidateReceipt,
|
||||
ExecutorParams, Hash, NodeFeatures, SessionIndex, SessionInfo,
|
||||
};
|
||||
|
||||
use self::mock::{
|
||||
make_candidate_receipt, make_dispute_message, ALICE_INDEX, FERDIE_DISCOVERY_KEY, FERDIE_INDEX,
|
||||
MOCK_AUTHORITY_DISCOVERY, MOCK_NEXT_SESSION_INDEX, MOCK_NEXT_SESSION_INFO, MOCK_SESSION_INDEX,
|
||||
MOCK_SESSION_INFO,
|
||||
};
|
||||
use crate::{
|
||||
receiver::BATCH_COLLECTING_INTERVAL,
|
||||
tests::mock::{BOB_INDEX, CHARLIE_INDEX},
|
||||
DisputeDistributionSubsystem, Metrics, LOG_TARGET, SEND_RATE_LIMIT,
|
||||
};
|
||||
|
||||
/// Useful mock providers.
|
||||
pub mod mock;
|
||||
|
||||
#[test]
|
||||
fn send_dispute_sends_dispute() {
|
||||
let test = |mut handle: TestSubsystemContextHandle<DisputeDistributionMessage>, _req_cfg| async move {
|
||||
let _ = handle_subsystem_startup(&mut handle, None).await;
|
||||
|
||||
let relay_parent = Hash::random();
|
||||
let candidate = make_candidate_receipt(relay_parent);
|
||||
send_dispute(&mut handle, candidate).await;
|
||||
conclude(&mut handle).await;
|
||||
};
|
||||
test_harness(test);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn send_honors_rate_limit() {
|
||||
sp_tracing::try_init_simple();
|
||||
let test = |mut handle: TestSubsystemContextHandle<DisputeDistributionMessage>, _req_cfg| async move {
|
||||
let _ = handle_subsystem_startup(&mut handle, None).await;
|
||||
|
||||
let relay_parent = Hash::random();
|
||||
let candidate = make_candidate_receipt(relay_parent);
|
||||
let before_request = Instant::now();
|
||||
send_dispute(&mut handle, candidate).await;
|
||||
// First send should not be rate limited:
|
||||
gum::trace!("Passed time: {:#?}", Instant::now().saturating_duration_since(before_request));
|
||||
// This test would likely be flaky on CI:
|
||||
//assert!(Instant::now().saturating_duration_since(before_request) < SEND_RATE_LIMIT);
|
||||
|
||||
let relay_parent = Hash::random();
|
||||
let candidate = make_candidate_receipt(relay_parent);
|
||||
send_dispute(&mut handle, candidate).await;
|
||||
// Second send should be rate limited:
|
||||
gum::trace!(
|
||||
"Passed time for send_dispute: {:#?}",
|
||||
Instant::now().saturating_duration_since(before_request)
|
||||
);
|
||||
assert!(Instant::now() - before_request >= SEND_RATE_LIMIT);
|
||||
conclude(&mut handle).await;
|
||||
};
|
||||
test_harness(test);
|
||||
}
|
||||
|
||||
/// Helper for sending a new dispute to dispute-distribution sender and handling resulting messages.
|
||||
async fn send_dispute(
|
||||
handle: &mut TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
candidate: CandidateReceipt,
|
||||
) {
|
||||
let before_request = Instant::now();
|
||||
let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX);
|
||||
gum::trace!(
|
||||
"Passed time for making message: {:#?}",
|
||||
Instant::now().saturating_duration_since(before_request)
|
||||
);
|
||||
let before_request = Instant::now();
|
||||
handle
|
||||
.send(FromOrchestra::Communication {
|
||||
msg: DisputeDistributionMessage::SendDispute(message.clone()),
|
||||
})
|
||||
.await;
|
||||
gum::trace!(
|
||||
"Passed time for sending message: {:#?}",
|
||||
Instant::now().saturating_duration_since(before_request)
|
||||
);
|
||||
|
||||
let expected_receivers = {
|
||||
let info = &MOCK_SESSION_INFO;
|
||||
info.discovery_keys
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|a| a != &Sr25519Keyring::Ferdie.public().into())
|
||||
.collect()
|
||||
// All validators are also authorities in the first session, so we are
|
||||
// done here.
|
||||
};
|
||||
check_sent_requests(handle, expected_receivers, true).await;
|
||||
}
|
||||
|
||||
// Things to test:
|
||||
// x Request triggers import
|
||||
// x Subsequent imports get batched
|
||||
// x Batch gets flushed.
|
||||
// x Batch gets renewed.
|
||||
// x Non authority requests get dropped.
|
||||
// x Sending rate limit is honored.
|
||||
// x Receiving rate limit is honored.
|
||||
// x Duplicate requests on batch are dropped
|
||||
|
||||
#[test]
|
||||
fn received_non_authorities_are_dropped() {
|
||||
let test = |mut handle: TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
mut req_cfg: RequestResponseConfig| async move {
|
||||
let req_tx = req_cfg.inbound_queue.as_mut().unwrap();
|
||||
let _ = handle_subsystem_startup(&mut handle, None).await;
|
||||
|
||||
let relay_parent = Hash::random();
|
||||
let candidate = make_candidate_receipt(relay_parent);
|
||||
let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX);
|
||||
|
||||
// Non validator request should get dropped:
|
||||
let rx_response =
|
||||
send_network_dispute_request(req_tx, PeerId::random(), message.clone().into()).await;
|
||||
|
||||
assert_matches!(
|
||||
rx_response.await,
|
||||
Ok(resp) => {
|
||||
let sc_network::config::OutgoingResponse {
|
||||
result: _,
|
||||
reputation_changes,
|
||||
sent_feedback: _,
|
||||
} = resp;
|
||||
// Peer should get punished:
|
||||
assert_eq!(reputation_changes.len(), 1);
|
||||
}
|
||||
);
|
||||
conclude(&mut handle).await;
|
||||
};
|
||||
test_harness(test);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn received_request_triggers_import() {
|
||||
let test = |mut handle: TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
mut req_cfg: RequestResponseConfig| async move {
|
||||
let req_tx = req_cfg.inbound_queue.as_mut().unwrap();
|
||||
let _ = handle_subsystem_startup(&mut handle, None).await;
|
||||
|
||||
let relay_parent = Hash::random();
|
||||
let candidate = make_candidate_receipt(relay_parent);
|
||||
let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX);
|
||||
|
||||
nested_network_dispute_request(
|
||||
&mut handle,
|
||||
req_tx,
|
||||
MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Alice),
|
||||
message.clone().into(),
|
||||
ImportStatementsResult::ValidImport,
|
||||
true,
|
||||
move |_handle, _req_tx, _message| ready(()),
|
||||
)
|
||||
.await;
|
||||
|
||||
gum::trace!(target: LOG_TARGET, "Concluding.");
|
||||
conclude(&mut handle).await;
|
||||
};
|
||||
test_harness(test);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn batching_works() {
|
||||
let test = |mut handle: TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
mut req_cfg: RequestResponseConfig| async move {
|
||||
let req_tx = req_cfg.inbound_queue.as_mut().unwrap();
|
||||
let _ = handle_subsystem_startup(&mut handle, None).await;
|
||||
|
||||
let relay_parent = Hash::random();
|
||||
let candidate = make_candidate_receipt(relay_parent);
|
||||
let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX);
|
||||
|
||||
// Initial request should get forwarded immediately:
|
||||
nested_network_dispute_request(
|
||||
&mut handle,
|
||||
req_tx,
|
||||
MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Alice),
|
||||
message.clone().into(),
|
||||
ImportStatementsResult::ValidImport,
|
||||
true,
|
||||
move |_handle, _req_tx, _message| ready(()),
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut rx_responses = Vec::new();
|
||||
|
||||
let message = make_dispute_message(candidate.clone(), BOB_INDEX, FERDIE_INDEX);
|
||||
let peer = MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Bob);
|
||||
rx_responses.push(send_network_dispute_request(req_tx, peer, message.clone().into()).await);
|
||||
|
||||
let message = make_dispute_message(candidate.clone(), CHARLIE_INDEX, FERDIE_INDEX);
|
||||
let peer = MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Charlie);
|
||||
rx_responses.push(send_network_dispute_request(req_tx, peer, message.clone().into()).await);
|
||||
gum::trace!("Imported 3 votes into batch");
|
||||
|
||||
Delay::new(BATCH_COLLECTING_INTERVAL);
|
||||
gum::trace!("Batch should still be alive");
|
||||
// Batch should still be alive (2 new votes):
|
||||
// Let's import two more votes, but fully duplicates - should not extend batch live.
|
||||
gum::trace!("Importing duplicate votes");
|
||||
let mut rx_responses_duplicate = Vec::new();
|
||||
let message = make_dispute_message(candidate.clone(), BOB_INDEX, FERDIE_INDEX);
|
||||
let peer = MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Bob);
|
||||
rx_responses_duplicate
|
||||
.push(send_network_dispute_request(req_tx, peer, message.clone().into()).await);
|
||||
|
||||
let message = make_dispute_message(candidate.clone(), CHARLIE_INDEX, FERDIE_INDEX);
|
||||
let peer = MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Charlie);
|
||||
rx_responses_duplicate
|
||||
.push(send_network_dispute_request(req_tx, peer, message.clone().into()).await);
|
||||
|
||||
for rx_response in rx_responses_duplicate {
|
||||
assert_matches!(
|
||||
rx_response.await,
|
||||
Ok(resp) => {
|
||||
let sc_network::config::OutgoingResponse {
|
||||
result,
|
||||
reputation_changes,
|
||||
sent_feedback: _,
|
||||
} = resp;
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?reputation_changes,
|
||||
"Received reputation changes."
|
||||
);
|
||||
// We don't punish on that.
|
||||
assert_eq!(reputation_changes.len(), 0);
|
||||
|
||||
assert_matches!(result, Err(()));
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Delay::new(BATCH_COLLECTING_INTERVAL).await;
|
||||
gum::trace!("Batch should be ready now (only duplicates have been added)");
|
||||
|
||||
let pending_confirmation = assert_matches!(
|
||||
handle.recv().await,
|
||||
AllMessages::DisputeCoordinator(
|
||||
DisputeCoordinatorMessage::ImportStatements {
|
||||
candidate_receipt: _,
|
||||
session,
|
||||
statements,
|
||||
pending_confirmation: Some(pending_confirmation),
|
||||
}
|
||||
) => {
|
||||
assert_eq!(session, MOCK_SESSION_INDEX);
|
||||
assert_eq!(statements.len(), 3);
|
||||
pending_confirmation
|
||||
}
|
||||
);
|
||||
pending_confirmation.send(ImportStatementsResult::ValidImport).unwrap();
|
||||
|
||||
for rx_response in rx_responses {
|
||||
assert_matches!(
|
||||
rx_response.await,
|
||||
Ok(resp) => {
|
||||
let sc_network::config::OutgoingResponse {
|
||||
result,
|
||||
reputation_changes: _,
|
||||
sent_feedback,
|
||||
} = resp;
|
||||
|
||||
let result = result.unwrap();
|
||||
let decoded =
|
||||
<DisputeResponse as Decode>::decode(&mut result.as_slice()).unwrap();
|
||||
|
||||
assert!(decoded == DisputeResponse::Confirmed);
|
||||
if let Some(sent_feedback) = sent_feedback {
|
||||
sent_feedback.send(()).unwrap();
|
||||
}
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
"Valid import happened."
|
||||
);
|
||||
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
gum::trace!(target: LOG_TARGET, "Concluding.");
|
||||
conclude(&mut handle).await;
|
||||
};
|
||||
test_harness(test);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn receive_rate_limit_is_enforced() {
|
||||
let test = |mut handle: TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
mut req_cfg: RequestResponseConfig| async move {
|
||||
let req_tx = req_cfg.inbound_queue.as_mut().unwrap();
|
||||
let _ = handle_subsystem_startup(&mut handle, None).await;
|
||||
|
||||
let relay_parent = Hash::random();
|
||||
let candidate = make_candidate_receipt(relay_parent);
|
||||
let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX);
|
||||
|
||||
// Initial request should get forwarded immediately:
|
||||
nested_network_dispute_request(
|
||||
&mut handle,
|
||||
req_tx,
|
||||
MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Alice),
|
||||
message.clone().into(),
|
||||
ImportStatementsResult::ValidImport,
|
||||
true,
|
||||
move |_handle, _req_tx, _message| ready(()),
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut rx_responses = Vec::new();
|
||||
|
||||
let peer = MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Bob);
|
||||
|
||||
let message = make_dispute_message(candidate.clone(), BOB_INDEX, FERDIE_INDEX);
|
||||
rx_responses.push(send_network_dispute_request(req_tx, peer, message.clone().into()).await);
|
||||
|
||||
let message = make_dispute_message(candidate.clone(), CHARLIE_INDEX, FERDIE_INDEX);
|
||||
rx_responses.push(send_network_dispute_request(req_tx, peer, message.clone().into()).await);
|
||||
|
||||
gum::trace!("Import one too much:");
|
||||
|
||||
let message = make_dispute_message(candidate.clone(), CHARLIE_INDEX, ALICE_INDEX);
|
||||
let rx_response_flood =
|
||||
send_network_dispute_request(req_tx, peer, message.clone().into()).await;
|
||||
|
||||
assert_matches!(
|
||||
rx_response_flood.await,
|
||||
Ok(resp) => {
|
||||
let sc_network::config::OutgoingResponse {
|
||||
result,
|
||||
reputation_changes: _,
|
||||
sent_feedback: _,
|
||||
} = resp;
|
||||
// Received error because of flood.
|
||||
assert!(!result.is_ok());
|
||||
}
|
||||
);
|
||||
gum::trace!("Need to wait 2 patch intervals:");
|
||||
Delay::new(BATCH_COLLECTING_INTERVAL).await;
|
||||
Delay::new(BATCH_COLLECTING_INTERVAL).await;
|
||||
|
||||
gum::trace!("Batch should be ready now");
|
||||
|
||||
let pending_confirmation = assert_matches!(
|
||||
handle.recv().await,
|
||||
AllMessages::DisputeCoordinator(
|
||||
DisputeCoordinatorMessage::ImportStatements {
|
||||
candidate_receipt: _,
|
||||
session,
|
||||
statements,
|
||||
pending_confirmation: Some(pending_confirmation),
|
||||
}
|
||||
) => {
|
||||
assert_eq!(session, MOCK_SESSION_INDEX);
|
||||
// Only 3 as fourth was flood:
|
||||
assert_eq!(statements.len(), 3);
|
||||
pending_confirmation
|
||||
}
|
||||
);
|
||||
pending_confirmation.send(ImportStatementsResult::ValidImport).unwrap();
|
||||
|
||||
for rx_response in rx_responses {
|
||||
assert_matches!(
|
||||
rx_response.await,
|
||||
Ok(resp) => {
|
||||
let sc_network::config::OutgoingResponse {
|
||||
result,
|
||||
reputation_changes: _,
|
||||
sent_feedback,
|
||||
} = resp;
|
||||
|
||||
let result = result.unwrap();
|
||||
let decoded =
|
||||
<DisputeResponse as Decode>::decode(&mut result.as_slice()).unwrap();
|
||||
|
||||
assert!(decoded == DisputeResponse::Confirmed);
|
||||
if let Some(sent_feedback) = sent_feedback {
|
||||
sent_feedback.send(()).unwrap();
|
||||
}
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
"Valid import happened."
|
||||
);
|
||||
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
gum::trace!(target: LOG_TARGET, "Concluding.");
|
||||
conclude(&mut handle).await;
|
||||
};
|
||||
test_harness(test);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn send_dispute_gets_cleaned_up() {
|
||||
let test = |mut handle: TestSubsystemContextHandle<DisputeDistributionMessage>, _| async move {
|
||||
let old_head = handle_subsystem_startup(&mut handle, None).await;
|
||||
|
||||
let relay_parent = Hash::random();
|
||||
let candidate = make_candidate_receipt(relay_parent);
|
||||
let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX);
|
||||
handle
|
||||
.send(FromOrchestra::Communication {
|
||||
msg: DisputeDistributionMessage::SendDispute(message.clone()),
|
||||
})
|
||||
.await;
|
||||
|
||||
let expected_receivers = {
|
||||
let info = &MOCK_SESSION_INFO;
|
||||
info.discovery_keys
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|a| a != &Sr25519Keyring::Ferdie.public().into())
|
||||
.collect()
|
||||
// All validators are also authorities in the first session, so we are
|
||||
// done here.
|
||||
};
|
||||
check_sent_requests(&mut handle, expected_receivers, false).await;
|
||||
|
||||
// Give tasks a chance to finish:
|
||||
Delay::new(Duration::from_millis(20)).await;
|
||||
|
||||
activate_leaf(
|
||||
&mut handle,
|
||||
Hash::random(),
|
||||
Some(old_head),
|
||||
MOCK_SESSION_INDEX,
|
||||
None,
|
||||
// No disputes any more:
|
||||
BTreeMap::new(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Yield, so subsystem can make progress:
|
||||
Delay::new(Duration::from_millis(2)).await;
|
||||
|
||||
conclude(&mut handle).await;
|
||||
};
|
||||
test_harness(test);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dispute_retries_and_works_across_session_boundaries() {
|
||||
sp_tracing::try_init_simple();
|
||||
let test = |mut handle: TestSubsystemContextHandle<DisputeDistributionMessage>, _| async move {
|
||||
let old_head = handle_subsystem_startup(&mut handle, None).await;
|
||||
|
||||
let relay_parent = Hash::random();
|
||||
let candidate = make_candidate_receipt(relay_parent);
|
||||
let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX);
|
||||
handle
|
||||
.send(FromOrchestra::Communication {
|
||||
msg: DisputeDistributionMessage::SendDispute(message.clone()),
|
||||
})
|
||||
.await;
|
||||
|
||||
let expected_receivers: HashSet<_> = {
|
||||
let info = &MOCK_SESSION_INFO;
|
||||
info.discovery_keys
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|a| a != &Sr25519Keyring::Ferdie.public().into())
|
||||
.collect()
|
||||
// All validators are also authorities in the first session, so we are
|
||||
// done here.
|
||||
};
|
||||
// Requests don't get confirmed - dispute is carried over to next session.
|
||||
check_sent_requests(&mut handle, expected_receivers.clone(), false).await;
|
||||
|
||||
// Give tasks a chance to finish:
|
||||
Delay::new(Duration::from_millis(20)).await;
|
||||
|
||||
// Trigger retry:
|
||||
let old_head2 = Hash::random();
|
||||
activate_leaf(
|
||||
&mut handle,
|
||||
old_head2,
|
||||
Some(old_head),
|
||||
MOCK_SESSION_INDEX,
|
||||
None,
|
||||
BTreeMap::from([((MOCK_SESSION_INDEX, candidate.hash()), DisputeStatus::Active)]),
|
||||
)
|
||||
.await;
|
||||
|
||||
check_sent_requests(&mut handle, expected_receivers.clone(), false).await;
|
||||
// Give tasks a chance to finish:
|
||||
Delay::new(Duration::from_millis(20)).await;
|
||||
|
||||
// Session change:
|
||||
activate_leaf(
|
||||
&mut handle,
|
||||
Hash::random(),
|
||||
Some(old_head2),
|
||||
MOCK_NEXT_SESSION_INDEX,
|
||||
Some(MOCK_NEXT_SESSION_INFO.clone()),
|
||||
BTreeMap::from([((MOCK_SESSION_INDEX, candidate.hash()), DisputeStatus::Active)]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let expected_receivers = {
|
||||
let validator_count = MOCK_SESSION_INFO.validators.len();
|
||||
let old_validators = MOCK_SESSION_INFO
|
||||
.discovery_keys
|
||||
.clone()
|
||||
.into_iter()
|
||||
.take(validator_count)
|
||||
.filter(|a| *a != *FERDIE_DISCOVERY_KEY);
|
||||
|
||||
MOCK_NEXT_SESSION_INFO
|
||||
.discovery_keys
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|a| *a != *FERDIE_DISCOVERY_KEY)
|
||||
.chain(old_validators)
|
||||
.collect()
|
||||
};
|
||||
check_sent_requests(&mut handle, expected_receivers, true).await;
|
||||
|
||||
conclude(&mut handle).await;
|
||||
};
|
||||
test_harness(test);
|
||||
}
|
||||
|
||||
async fn send_network_dispute_request(
|
||||
req_tx: &mut async_channel::Sender<sc_network::config::IncomingRequest>,
|
||||
peer: PeerId,
|
||||
message: DisputeRequest,
|
||||
) -> oneshot::Receiver<sc_network::config::OutgoingResponse> {
|
||||
let (pending_response, rx_response) = oneshot::channel();
|
||||
let req =
|
||||
sc_network::config::IncomingRequest { peer, payload: message.encode(), pending_response };
|
||||
req_tx.send(req).await.unwrap();
|
||||
rx_response
|
||||
}
|
||||
|
||||
/// Send request and handle its reactions.
|
||||
///
|
||||
/// Passed in function will be called while votes are still being imported.
|
||||
async fn nested_network_dispute_request<'a, F, O>(
|
||||
handle: &'a mut TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
req_tx: &'a mut async_channel::Sender<sc_network::config::IncomingRequest>,
|
||||
peer: PeerId,
|
||||
message: DisputeRequest,
|
||||
import_result: ImportStatementsResult,
|
||||
need_session_info: bool,
|
||||
inner: F,
|
||||
) where
|
||||
F: FnOnce(
|
||||
&'a mut TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
&'a mut async_channel::Sender<sc_network::config::IncomingRequest>,
|
||||
DisputeRequest,
|
||||
) -> O
|
||||
+ 'a,
|
||||
O: Future<Output = ()> + 'a,
|
||||
{
|
||||
let rx_response = send_network_dispute_request(req_tx, peer, message.clone().into()).await;
|
||||
|
||||
if need_session_info {
|
||||
// Subsystem might need `SessionInfo` for determining indices:
|
||||
match handle.recv().await {
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::SessionInfo(_, tx),
|
||||
)) => {
|
||||
tx.send(Ok(Some(MOCK_SESSION_INFO.clone())))
|
||||
.expect("Receiver should stay alive.");
|
||||
},
|
||||
unexpected => panic!("Unexpected message {:?}", unexpected),
|
||||
}
|
||||
match handle.recv().await {
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::SessionExecutorParams(_, tx),
|
||||
)) => {
|
||||
tx.send(Ok(Some(ExecutorParams::default())))
|
||||
.expect("Receiver should stay alive.");
|
||||
},
|
||||
unexpected => panic!("Unexpected message {:?}", unexpected),
|
||||
}
|
||||
|
||||
match handle.recv().await {
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::NodeFeatures(_, si_tx),
|
||||
)) => {
|
||||
si_tx.send(Ok(NodeFeatures::EMPTY)).unwrap();
|
||||
},
|
||||
unexpected => panic!("Unexpected message {:?}", unexpected),
|
||||
}
|
||||
}
|
||||
|
||||
// Import should get initiated:
|
||||
let pending_confirmation = assert_matches!(
|
||||
handle.recv().await,
|
||||
AllMessages::DisputeCoordinator(
|
||||
DisputeCoordinatorMessage::ImportStatements {
|
||||
candidate_receipt,
|
||||
session,
|
||||
statements,
|
||||
pending_confirmation: Some(pending_confirmation),
|
||||
}
|
||||
) => {
|
||||
let candidate_hash = candidate_receipt.hash();
|
||||
assert_eq!(session, MOCK_SESSION_INDEX);
|
||||
assert_eq!(candidate_hash, message.0.candidate_receipt.hash());
|
||||
assert_eq!(statements.len(), 2);
|
||||
pending_confirmation
|
||||
}
|
||||
);
|
||||
|
||||
// Do the inner thing:
|
||||
inner(handle, req_tx, message).await;
|
||||
|
||||
// Confirm import
|
||||
pending_confirmation.send(import_result).unwrap();
|
||||
|
||||
assert_matches!(
|
||||
rx_response.await,
|
||||
Ok(resp) => {
|
||||
let sc_network::config::OutgoingResponse {
|
||||
result,
|
||||
reputation_changes,
|
||||
sent_feedback,
|
||||
} = resp;
|
||||
|
||||
match import_result {
|
||||
ImportStatementsResult::ValidImport => {
|
||||
let result = result.unwrap();
|
||||
let decoded =
|
||||
<DisputeResponse as Decode>::decode(&mut result.as_slice()).unwrap();
|
||||
|
||||
assert!(decoded == DisputeResponse::Confirmed);
|
||||
if let Some(sent_feedback) = sent_feedback {
|
||||
sent_feedback.send(()).unwrap();
|
||||
}
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
"Valid import happened."
|
||||
);
|
||||
|
||||
}
|
||||
ImportStatementsResult::InvalidImport => {
|
||||
// Peer should get punished:
|
||||
assert_eq!(reputation_changes.len(), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
async fn conclude(handle: &mut TestSubsystemContextHandle<DisputeDistributionMessage>) {
|
||||
// No more messages should be in the queue:
|
||||
poll_fn(|ctx| {
|
||||
let fut = handle.recv();
|
||||
pin_mut!(fut);
|
||||
// No requests should be initiated, as there is no longer any dispute active:
|
||||
assert_matches!(fut.poll(ctx), Poll::Pending, "No requests expected");
|
||||
Poll::Ready(())
|
||||
})
|
||||
.await;
|
||||
|
||||
handle.send(FromOrchestra::Signal(OverseerSignal::Conclude)).await;
|
||||
}
|
||||
|
||||
/// Pass a `new_session` if you expect the subsystem to retrieve `SessionInfo` when given the
|
||||
/// `session_index`.
|
||||
async fn activate_leaf(
|
||||
handle: &mut TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
activate: Hash,
|
||||
deactivate: Option<Hash>,
|
||||
session_index: SessionIndex,
|
||||
// New session if we expect the subsystem to request it.
|
||||
new_session: Option<SessionInfo>,
|
||||
// Currently active disputes to send to the subsystem.
|
||||
active_disputes: BTreeMap<(SessionIndex, CandidateHash), DisputeStatus>,
|
||||
) {
|
||||
handle
|
||||
.send(FromOrchestra::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
|
||||
activated: Some(new_leaf(activate, 10)),
|
||||
deactivated: deactivate.into_iter().collect(),
|
||||
})))
|
||||
.await;
|
||||
assert_matches!(
|
||||
handle.recv().await,
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
h,
|
||||
RuntimeApiRequest::SessionIndexForChild(tx)
|
||||
)) => {
|
||||
assert_eq!(h, activate);
|
||||
tx.send(Ok(session_index)).expect("Receiver should stay alive.");
|
||||
}
|
||||
);
|
||||
|
||||
if let Some(session_info) = new_session {
|
||||
assert_matches!(
|
||||
handle.recv().await,
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
h,
|
||||
RuntimeApiRequest::SessionInfo(session_idx, tx)
|
||||
)) => {
|
||||
assert_eq!(h, activate);
|
||||
assert_eq!(session_index, session_idx);
|
||||
tx.send(Ok(Some(session_info))).expect("Receiver should stay alive.");
|
||||
}
|
||||
);
|
||||
assert_matches!(
|
||||
handle.recv().await,
|
||||
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
|
||||
h,
|
||||
RuntimeApiRequest::SessionExecutorParams(session_idx, tx)
|
||||
)) => {
|
||||
assert_eq!(h, activate);
|
||||
assert_eq!(session_index, session_idx);
|
||||
tx.send(Ok(Some(ExecutorParams::default()))).expect("Receiver should stay alive.");
|
||||
}
|
||||
);
|
||||
assert_matches!(
|
||||
handle.recv().await,
|
||||
AllMessages::RuntimeApi(
|
||||
RuntimeApiMessage::Request(_, RuntimeApiRequest::NodeFeatures(_, si_tx), )
|
||||
) => {
|
||||
si_tx.send(Ok(NodeFeatures::EMPTY)).unwrap();
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
assert_matches!(
|
||||
handle.recv().await,
|
||||
AllMessages::DisputeCoordinator(DisputeCoordinatorMessage::ActiveDisputes(tx)) => {
|
||||
tx.send(active_disputes).expect("Receiver should stay alive.");
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/// Check whether sent network bridge requests match the expectation.
|
||||
async fn check_sent_requests(
|
||||
handle: &mut TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
expected_receivers: HashSet<AuthorityDiscoveryId>,
|
||||
confirm_receive: bool,
|
||||
) {
|
||||
let expected_receivers: HashSet<_> =
|
||||
expected_receivers.into_iter().map(Recipient::Authority).collect();
|
||||
|
||||
// Sends to concerned validators:
|
||||
assert_matches!(
|
||||
handle.recv().await,
|
||||
AllMessages::NetworkBridgeTx(
|
||||
NetworkBridgeTxMessage::SendRequests(reqs, IfDisconnected::ImmediateError)
|
||||
) => {
|
||||
let reqs: Vec<_> = reqs.into_iter().map(|r|
|
||||
assert_matches!(
|
||||
r,
|
||||
Requests::DisputeSendingV1(req) => {req}
|
||||
)
|
||||
)
|
||||
.collect();
|
||||
|
||||
let receivers_raw: Vec<_> = reqs.iter().map(|r| r.peer.clone()).collect();
|
||||
let receivers: HashSet<_> = receivers_raw.clone().clone().into_iter().collect();
|
||||
assert_eq!(receivers_raw.len(), receivers.len(), "No duplicates are expected.");
|
||||
assert_eq!(receivers.len(), expected_receivers.len());
|
||||
assert_eq!(receivers, expected_receivers);
|
||||
if confirm_receive {
|
||||
for req in reqs {
|
||||
req.pending_response.send(
|
||||
Ok((DisputeResponse::Confirmed.encode(), ProtocolName::from("")))
|
||||
)
|
||||
.expect("Subsystem should be listening for a response.");
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/// Initialize subsystem and return request sender needed for sending incoming requests to the
|
||||
/// subsystem.
|
||||
async fn handle_subsystem_startup(
|
||||
handle: &mut TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
ongoing_dispute: Option<CandidateHash>,
|
||||
) -> Hash {
|
||||
let relay_parent = Hash::random();
|
||||
activate_leaf(
|
||||
handle,
|
||||
relay_parent,
|
||||
None,
|
||||
MOCK_SESSION_INDEX,
|
||||
Some(MOCK_SESSION_INFO.clone()),
|
||||
ongoing_dispute
|
||||
.into_iter()
|
||||
.map(|c| ((MOCK_SESSION_INDEX, c), DisputeStatus::Active))
|
||||
.collect(),
|
||||
)
|
||||
.await;
|
||||
relay_parent
|
||||
}
|
||||
|
||||
/// Launch subsystem and provided test function
|
||||
///
|
||||
/// which simulates the overseer.
|
||||
fn test_harness<TestFn, Fut>(test: TestFn)
|
||||
where
|
||||
TestFn: FnOnce(
|
||||
TestSubsystemContextHandle<DisputeDistributionMessage>,
|
||||
RequestResponseConfig,
|
||||
) -> Fut,
|
||||
Fut: Future<Output = ()>,
|
||||
{
|
||||
sp_tracing::try_init_simple();
|
||||
let keystore = make_ferdie_keystore();
|
||||
|
||||
let genesis_hash = Hash::repeat_byte(0xff);
|
||||
let req_protocol_names = ReqProtocolNames::new(&genesis_hash, None);
|
||||
let (req_receiver, req_cfg) = IncomingRequest::get_config_receiver::<
|
||||
Block,
|
||||
sc_network::NetworkWorker<Block, Hash>,
|
||||
>(&req_protocol_names);
|
||||
let subsystem = DisputeDistributionSubsystem::new(
|
||||
keystore,
|
||||
req_receiver,
|
||||
MOCK_AUTHORITY_DISCOVERY.clone(),
|
||||
Metrics::new_dummy(),
|
||||
);
|
||||
|
||||
let subsystem = |ctx| async {
|
||||
match subsystem.run(ctx).await {
|
||||
Ok(()) => {},
|
||||
Err(fatal) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?fatal,
|
||||
"Dispute distribution exited with fatal error."
|
||||
);
|
||||
},
|
||||
}
|
||||
};
|
||||
subsystem_test_harness(|handle| test(handle, req_cfg), subsystem);
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
[package]
|
||||
name = "pezkuwi-gossip-support"
|
||||
version = "7.0.0"
|
||||
description = "Pezkuwi Gossip Support subsystem. Responsible for keeping track of session changes and issuing a connection request to the relevant validators on every new session."
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
sc-network = { workspace = true, default-features = true }
|
||||
sp-application-crypto = { workspace = true, default-features = true }
|
||||
sp-core = { workspace = true, default-features = true }
|
||||
sp-crypto-hashing = { workspace = true, default-features = true }
|
||||
sp-keystore = { workspace = true, default-features = true }
|
||||
|
||||
pezkuwi-node-network-protocol = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem = { workspace = true, default-features = true }
|
||||
pezkuwi-node-subsystem-util = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives = { workspace = true, default-features = true }
|
||||
|
||||
futures = { workspace = true }
|
||||
futures-timer = { workspace = true }
|
||||
gum = { workspace = true, default-features = true }
|
||||
rand = { workspace = true }
|
||||
rand_chacha = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
sp-authority-discovery = { workspace = true, default-features = true }
|
||||
sp-consensus-babe = { workspace = true, default-features = true }
|
||||
sp-keyring = { workspace = true, default-features = true }
|
||||
sp-tracing = { workspace = true, default-features = true }
|
||||
|
||||
pezkuwi-node-subsystem-test-helpers = { workspace = true }
|
||||
|
||||
assert_matches = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
parking_lot = { workspace = true, default-features = true }
|
||||
quickcheck = { workspace = true, default-features = true }
|
||||
|
||||
[features]
|
||||
runtime-benchmarks = [
|
||||
"gum/runtime-benchmarks",
|
||||
"pezkuwi-node-network-protocol/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-test-helpers/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem-util/runtime-benchmarks",
|
||||
"pezkuwi-node-subsystem/runtime-benchmarks",
|
||||
"pezkuwi-primitives/runtime-benchmarks",
|
||||
"sc-network/runtime-benchmarks",
|
||||
"sp-authority-discovery/runtime-benchmarks",
|
||||
"sp-consensus-babe/runtime-benchmarks",
|
||||
"sp-keyring/runtime-benchmarks",
|
||||
]
|
||||
@@ -0,0 +1,891 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! This subsystem is responsible for keeping track of session changes
|
||||
//! and issuing a connection request to the relevant validators
|
||||
//! on every new session.
|
||||
//!
|
||||
//! In addition to that, it creates a gossip overlay topology
|
||||
//! which limits the amount of messages sent and received
|
||||
//! to be an order of sqrt of the validators. Our neighbors
|
||||
//! in this graph will be forwarded to the network bridge with
|
||||
//! the `NetworkBridgeRxMessage::NewGossipTopology` message.
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
fmt,
|
||||
time::{Duration, Instant},
|
||||
u32,
|
||||
};
|
||||
|
||||
use futures::{channel::oneshot, select, FutureExt as _};
|
||||
use futures_timer::Delay;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use rand_chacha::ChaCha20Rng;
|
||||
|
||||
use sc_network::{config::parse_addr, Multiaddr};
|
||||
use sp_application_crypto::{AppCrypto, ByteArray};
|
||||
use sp_keystore::{Keystore, KeystorePtr};
|
||||
|
||||
use pezkuwi_node_network_protocol::{
|
||||
authority_discovery::AuthorityDiscovery, peer_set::PeerSet, GossipSupportNetworkMessage,
|
||||
PeerId, ValidationProtocols,
|
||||
};
|
||||
use pezkuwi_node_subsystem::{
|
||||
messages::{
|
||||
ChainApiMessage, GossipSupportMessage, NetworkBridgeEvent, NetworkBridgeRxMessage,
|
||||
NetworkBridgeTxMessage, RuntimeApiMessage, RuntimeApiRequest,
|
||||
},
|
||||
overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem, SubsystemError,
|
||||
};
|
||||
use pezkuwi_node_subsystem_util as util;
|
||||
use pezkuwi_primitives::{AuthorityDiscoveryId, Hash, SessionIndex, SessionInfo, ValidatorIndex};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
mod metrics;
|
||||
|
||||
use metrics::Metrics;
|
||||
|
||||
const LOG_TARGET: &str = "teyrchain::gossip-support";
|
||||
// How much time should we wait to reissue a connection request
|
||||
// since the last authority discovery resolution failure.
|
||||
#[cfg(not(test))]
|
||||
const BACKOFF_DURATION: Duration = Duration::from_secs(5);
|
||||
|
||||
#[cfg(test)]
|
||||
const BACKOFF_DURATION: Duration = Duration::from_millis(500);
|
||||
|
||||
// The authorithy_discovery queries runs every ten minutes,
|
||||
// so it make sense to run a bit more often than that to
|
||||
// detect changes as often as we can, but not too often since
|
||||
// it won't help.
|
||||
#[cfg(not(test))]
|
||||
const TRY_RERESOLVE_AUTHORITIES: Duration = Duration::from_secs(5 * 60);
|
||||
|
||||
#[cfg(test)]
|
||||
const TRY_RERESOLVE_AUTHORITIES: Duration = Duration::from_secs(2);
|
||||
|
||||
/// Duration after which we consider low connectivity a problem.
|
||||
///
|
||||
/// Especially at startup low connectivity is expected (authority discovery cache needs to be
|
||||
/// populated). Authority discovery on Kusama takes around 8 minutes, so warning after 10 minutes
|
||||
/// should be fine:
|
||||
///
|
||||
/// https://github.com/paritytech/substrate/blob/fc49802f263529160635471c8a17888846035f5d/client/authority-discovery/src/lib.rs#L88
|
||||
const LOW_CONNECTIVITY_WARN_DELAY: Duration = Duration::from_secs(600);
|
||||
|
||||
/// If connectivity is lower than this in percent, issue warning in logs.
|
||||
const LOW_CONNECTIVITY_WARN_THRESHOLD: usize = 85;
|
||||
|
||||
/// The Gossip Support subsystem.
|
||||
pub struct GossipSupport<AD> {
|
||||
keystore: KeystorePtr,
|
||||
|
||||
last_session_index: Option<SessionIndex>,
|
||||
/// Whether we are currently an authority or not.
|
||||
is_authority_now: bool,
|
||||
/// The minimum known session we build the topology for.
|
||||
min_known_session: SessionIndex,
|
||||
// Some(timestamp) if we failed to resolve
|
||||
// at least a third of authorities the last time.
|
||||
// `None` otherwise.
|
||||
last_failure: Option<Instant>,
|
||||
|
||||
// Validators can restart during a session, so if they change
|
||||
// their PeerID, we will connect to them in the best case after
|
||||
// a session, so we need to try more often to resolved peers and
|
||||
// reconnect to them. The authorithy_discovery queries runs every ten
|
||||
// minutes, so we can't detect changes in the address more often
|
||||
// that that.
|
||||
last_connection_request: Option<Instant>,
|
||||
|
||||
/// First time we did not reach our connectivity threshold.
|
||||
///
|
||||
/// This is the time of the first failed attempt to connect to >2/3 of all validators in a
|
||||
/// potential sequence of failed attempts. It will be cleared once we reached >2/3
|
||||
/// connectivity.
|
||||
failure_start: Option<Instant>,
|
||||
|
||||
/// Successfully resolved connections
|
||||
///
|
||||
/// waiting for actual connection.
|
||||
resolved_authorities: HashMap<AuthorityDiscoveryId, HashSet<Multiaddr>>,
|
||||
|
||||
/// Actually connected authorities.
|
||||
connected_authorities: HashMap<AuthorityDiscoveryId, PeerId>,
|
||||
/// By `PeerId`.
|
||||
///
|
||||
/// Needed for efficient handling of disconnect events.
|
||||
connected_peers: HashMap<PeerId, HashSet<AuthorityDiscoveryId>>,
|
||||
/// Authority discovery service.
|
||||
authority_discovery: AD,
|
||||
|
||||
/// The oldest session we need to build a topology for because
|
||||
/// the finalized blocks are from a session we haven't built a topology for.
|
||||
finalized_needed_session: Option<u32>,
|
||||
/// Subsystem metrics.
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
#[overseer::contextbounds(GossipSupport, prefix = self::overseer)]
|
||||
impl<AD> GossipSupport<AD>
|
||||
where
|
||||
AD: AuthorityDiscovery,
|
||||
{
|
||||
/// Create a new instance of the [`GossipSupport`] subsystem.
|
||||
pub fn new(keystore: KeystorePtr, authority_discovery: AD, metrics: Metrics) -> Self {
|
||||
// Initialize metrics to `0`.
|
||||
metrics.on_is_not_authority();
|
||||
metrics.on_is_not_teyrchain_validator();
|
||||
|
||||
Self {
|
||||
keystore,
|
||||
last_session_index: None,
|
||||
last_failure: None,
|
||||
last_connection_request: None,
|
||||
failure_start: None,
|
||||
resolved_authorities: HashMap::new(),
|
||||
connected_authorities: HashMap::new(),
|
||||
connected_peers: HashMap::new(),
|
||||
min_known_session: u32::MAX,
|
||||
authority_discovery,
|
||||
finalized_needed_session: None,
|
||||
is_authority_now: false,
|
||||
metrics,
|
||||
}
|
||||
}
|
||||
|
||||
async fn run<Context>(mut self, mut ctx: Context) -> Self {
|
||||
fn get_connectivity_check_delay() -> Delay {
|
||||
Delay::new(LOW_CONNECTIVITY_WARN_DELAY)
|
||||
}
|
||||
let mut next_connectivity_check = get_connectivity_check_delay().fuse();
|
||||
loop {
|
||||
let message = select!(
|
||||
_ = next_connectivity_check => {
|
||||
self.check_connectivity();
|
||||
next_connectivity_check = get_connectivity_check_delay().fuse();
|
||||
continue
|
||||
}
|
||||
result = ctx.recv().fuse() =>
|
||||
match result {
|
||||
Ok(message) => message,
|
||||
Err(e) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
err = ?e,
|
||||
"Failed to receive a message from Overseer, exiting",
|
||||
);
|
||||
return self
|
||||
},
|
||||
}
|
||||
);
|
||||
match message {
|
||||
FromOrchestra::Communication {
|
||||
msg: GossipSupportMessage::NetworkBridgeUpdate(ev),
|
||||
} => self.handle_connect_disconnect(ev),
|
||||
FromOrchestra::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
|
||||
activated,
|
||||
..
|
||||
})) => {
|
||||
gum::trace!(target: LOG_TARGET, "active leaves signal");
|
||||
|
||||
let leaves = activated.into_iter().map(|a| a.hash);
|
||||
if let Err(e) = self.handle_active_leaves(ctx.sender(), leaves).await {
|
||||
gum::debug!(target: LOG_TARGET, error = ?e);
|
||||
}
|
||||
},
|
||||
FromOrchestra::Signal(OverseerSignal::BlockFinalized(_hash, _number)) =>
|
||||
if let Some(session_index) = self.last_session_index {
|
||||
if let Err(e) = self
|
||||
.build_topology_for_last_finalized_if_needed(
|
||||
ctx.sender(),
|
||||
session_index,
|
||||
)
|
||||
.await
|
||||
{
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"Failed to build topology for last finalized session: {:?}",
|
||||
e
|
||||
);
|
||||
}
|
||||
},
|
||||
FromOrchestra::Signal(OverseerSignal::Conclude) => return self,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 1. Determine if the current session index has changed.
|
||||
/// 2. If it has, determine relevant validators and issue a connection request.
|
||||
async fn handle_active_leaves(
|
||||
&mut self,
|
||||
sender: &mut impl overseer::GossipSupportSenderTrait,
|
||||
leaves: impl Iterator<Item = Hash>,
|
||||
) -> Result<(), util::Error> {
|
||||
for leaf in leaves {
|
||||
let current_index = util::request_session_index_for_child(leaf, sender).await.await??;
|
||||
let since_failure = self.last_failure.map(|i| i.elapsed()).unwrap_or_default();
|
||||
let since_last_reconnect =
|
||||
self.last_connection_request.map(|i| i.elapsed()).unwrap_or_default();
|
||||
|
||||
let force_request = since_failure >= BACKOFF_DURATION;
|
||||
let re_resolve_authorities = since_last_reconnect >= TRY_RERESOLVE_AUTHORITIES;
|
||||
let leaf_session = Some((current_index, leaf));
|
||||
let maybe_new_session = match self.last_session_index {
|
||||
Some(i) if current_index <= i => None,
|
||||
_ => leaf_session,
|
||||
};
|
||||
|
||||
let maybe_issue_connection = if force_request || re_resolve_authorities {
|
||||
leaf_session
|
||||
} else {
|
||||
maybe_new_session
|
||||
};
|
||||
|
||||
if let Some((session_index, relay_parent)) = maybe_issue_connection {
|
||||
let session_info =
|
||||
util::request_session_info(leaf, session_index, sender).await.await??;
|
||||
|
||||
let session_info = match session_info {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
gum::warn!(
|
||||
relay_parent = ?leaf,
|
||||
session_index = self.last_session_index,
|
||||
"Failed to get session info.",
|
||||
);
|
||||
|
||||
continue;
|
||||
},
|
||||
};
|
||||
|
||||
// Note: we only update `last_session_index` once we've
|
||||
// successfully gotten the `SessionInfo`.
|
||||
let is_new_session = maybe_new_session.is_some();
|
||||
if is_new_session {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
%session_index,
|
||||
"New session detected",
|
||||
);
|
||||
self.last_session_index = Some(session_index);
|
||||
self.is_authority_now =
|
||||
ensure_i_am_an_authority(&self.keystore, &session_info.discovery_keys)
|
||||
.is_ok();
|
||||
}
|
||||
|
||||
// Connect to authorities from the past/present/future.
|
||||
//
|
||||
// This is maybe not the right place for this logic to live,
|
||||
// but at the moment we're limited by the network bridge's ability
|
||||
// to handle connection requests (it only allows one, globally).
|
||||
//
|
||||
// Certain network protocols - mostly req/res, but some gossip,
|
||||
// will require being connected to past/future validators as well
|
||||
// as current. That is, the old authority sets are not made obsolete
|
||||
// by virtue of a new session being entered. Therefore we maintain
|
||||
// connections to a much broader set of validators.
|
||||
{
|
||||
let mut connections = authorities_past_present_future(sender, leaf).await?;
|
||||
self.last_connection_request = Some(Instant::now());
|
||||
// Remove all of our locally controlled validator indices so we don't connect to
|
||||
// ourself.
|
||||
let connections =
|
||||
if remove_all_controlled(&self.keystore, &mut connections) != 0 {
|
||||
connections
|
||||
} else {
|
||||
// If we control none of them, issue an empty connection request
|
||||
// to clean up all connections.
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
if force_request || is_new_session {
|
||||
self.issue_connection_request(sender, connections).await;
|
||||
} else if re_resolve_authorities {
|
||||
self.issue_connection_request_to_changed(sender, connections).await;
|
||||
}
|
||||
}
|
||||
|
||||
if is_new_session {
|
||||
if let Err(err) = self
|
||||
.build_topology_for_last_finalized_if_needed(sender, session_index)
|
||||
.await
|
||||
{
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"Failed to build topology for last finalized session: {:?}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
// Gossip topology is only relevant for authorities in the current session.
|
||||
let our_index = self.get_key_index_and_update_metrics(&session_info)?;
|
||||
update_gossip_topology(
|
||||
sender,
|
||||
our_index,
|
||||
session_info.discovery_keys.clone(),
|
||||
relay_parent,
|
||||
session_index,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
// authority_discovery is just a cache so let's try every time we try to re-connect
|
||||
// if new authorities are present.
|
||||
self.update_authority_ids(sender, session_info.discovery_keys).await;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Build the gossip topology for the session of the last finalized block if we haven't built
|
||||
/// one.
|
||||
///
|
||||
/// This is needed to ensure that if finality is lagging accross session boundary and a restart
|
||||
/// happens after the new session started, we built a topology from the session we haven't
|
||||
/// finalized the blocks yet.
|
||||
/// Once finalized blocks start to be from a session we've built a topology for, we can stop.
|
||||
async fn build_topology_for_last_finalized_if_needed(
|
||||
&mut self,
|
||||
sender: &mut impl overseer::GossipSupportSenderTrait,
|
||||
current_session_index: u32,
|
||||
) -> Result<(), util::Error> {
|
||||
self.min_known_session = self.min_known_session.min(current_session_index);
|
||||
|
||||
if self
|
||||
.finalized_needed_session
|
||||
.map(|oldest_needed_session| oldest_needed_session < self.min_known_session)
|
||||
.unwrap_or(true)
|
||||
{
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender.send_message(ChainApiMessage::FinalizedBlockNumber(tx)).await;
|
||||
let finalized_block_number = match rx.await? {
|
||||
Ok(block_number) => block_number,
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender
|
||||
.send_message(ChainApiMessage::FinalizedBlockHash(finalized_block_number, tx))
|
||||
.await;
|
||||
|
||||
let finalized_block_hash = match rx.await? {
|
||||
Ok(Some(block_hash)) => block_hash,
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
let finalized_session_index =
|
||||
util::request_session_index_for_child(finalized_block_hash, sender)
|
||||
.await
|
||||
.await??;
|
||||
|
||||
if finalized_session_index < self.min_known_session &&
|
||||
Some(finalized_session_index) != self.finalized_needed_session
|
||||
{
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?finalized_block_hash,
|
||||
?finalized_block_number,
|
||||
?finalized_session_index,
|
||||
"Building topology for finalized block session",
|
||||
);
|
||||
|
||||
let finalized_session_info = match util::request_session_info(
|
||||
finalized_block_hash,
|
||||
finalized_session_index,
|
||||
sender,
|
||||
)
|
||||
.await
|
||||
.await??
|
||||
{
|
||||
Some(session_info) => session_info,
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
let our_index = self.get_key_index_and_update_metrics(&finalized_session_info)?;
|
||||
update_gossip_topology(
|
||||
sender,
|
||||
our_index,
|
||||
finalized_session_info.discovery_keys.clone(),
|
||||
finalized_block_hash,
|
||||
finalized_session_index,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
self.finalized_needed_session = Some(finalized_session_index);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Checks if the node is an authority and also updates `pezkuwi_node_is_authority` and
|
||||
// `pezkuwi_node_is_teyrchain_validator` metrics accordingly.
|
||||
// On success, returns the index of our keys in `session_info.discovery_keys`.
|
||||
fn get_key_index_and_update_metrics(
|
||||
&mut self,
|
||||
session_info: &SessionInfo,
|
||||
) -> Result<usize, util::Error> {
|
||||
let authority_check_result =
|
||||
ensure_i_am_an_authority(&self.keystore, &session_info.discovery_keys);
|
||||
|
||||
match authority_check_result.as_ref() {
|
||||
Ok(index) => {
|
||||
gum::trace!(target: LOG_TARGET, "We are now an authority",);
|
||||
self.metrics.on_is_authority();
|
||||
|
||||
// The subset of authorities participating in teyrchain consensus.
|
||||
let teyrchain_validators_this_session = session_info.validators.len();
|
||||
|
||||
// First `maxValidators` entries are the teyrchain validators. We'll check
|
||||
// if our index is in this set to avoid searching for the keys.
|
||||
// https://github.com/paritytech/polkadot/blob/a52dca2be7840b23c19c153cf7e110b1e3e475f8/runtime/parachains/src/configuration.rs#L148
|
||||
if *index < teyrchain_validators_this_session {
|
||||
gum::trace!(target: LOG_TARGET, "We are now a teyrchain validator",);
|
||||
self.metrics.on_is_teyrchain_validator();
|
||||
} else {
|
||||
gum::trace!(target: LOG_TARGET, "We are no longer a teyrchain validator",);
|
||||
self.metrics.on_is_not_teyrchain_validator();
|
||||
}
|
||||
},
|
||||
Err(util::Error::NotAValidator) => {
|
||||
gum::trace!(target: LOG_TARGET, "We are no longer an authority",);
|
||||
self.metrics.on_is_not_authority();
|
||||
self.metrics.on_is_not_teyrchain_validator();
|
||||
},
|
||||
// Don't update on runtime errors.
|
||||
Err(_) => {},
|
||||
};
|
||||
|
||||
authority_check_result
|
||||
}
|
||||
|
||||
async fn resolve_authorities(
|
||||
&mut self,
|
||||
authorities: Vec<AuthorityDiscoveryId>,
|
||||
) -> (Vec<HashSet<Multiaddr>>, HashMap<AuthorityDiscoveryId, HashSet<Multiaddr>>, usize) {
|
||||
let mut validator_addrs = Vec::with_capacity(authorities.len());
|
||||
let mut resolved = HashMap::with_capacity(authorities.len());
|
||||
let mut failures = 0;
|
||||
|
||||
for authority in authorities {
|
||||
if let Some(addrs) =
|
||||
self.authority_discovery.get_addresses_by_authority_id(authority.clone()).await
|
||||
{
|
||||
validator_addrs.push(addrs.clone());
|
||||
resolved.insert(authority, addrs);
|
||||
} else {
|
||||
failures += 1;
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
"Couldn't resolve addresses of authority: {:?}",
|
||||
authority
|
||||
);
|
||||
}
|
||||
}
|
||||
(validator_addrs, resolved, failures)
|
||||
}
|
||||
|
||||
async fn issue_connection_request_to_changed<Sender>(
|
||||
&mut self,
|
||||
sender: &mut Sender,
|
||||
authorities: Vec<AuthorityDiscoveryId>,
|
||||
) where
|
||||
Sender: overseer::GossipSupportSenderTrait,
|
||||
{
|
||||
let (_, resolved, _) = self.resolve_authorities(authorities).await;
|
||||
|
||||
let mut changed = Vec::new();
|
||||
|
||||
for (authority, new_addresses) in &resolved {
|
||||
let new_peer_ids = new_addresses
|
||||
.iter()
|
||||
.flat_map(|addr| parse_addr(addr.clone()).ok().map(|(p, _)| p))
|
||||
.collect::<HashSet<_>>();
|
||||
match self.resolved_authorities.get(authority) {
|
||||
Some(old_addresses) => {
|
||||
let old_peer_ids = old_addresses
|
||||
.iter()
|
||||
.flat_map(|addr| parse_addr(addr.clone()).ok().map(|(p, _)| p))
|
||||
.collect::<HashSet<_>>();
|
||||
if !old_peer_ids.is_superset(&new_peer_ids) {
|
||||
changed.push(new_addresses.clone());
|
||||
}
|
||||
},
|
||||
None => changed.push(new_addresses.clone()),
|
||||
}
|
||||
}
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
num_changed = ?changed.len(),
|
||||
?changed,
|
||||
"Issuing a connection request to changed validators"
|
||||
);
|
||||
if !changed.is_empty() {
|
||||
self.resolved_authorities = resolved;
|
||||
|
||||
sender
|
||||
.send_message(NetworkBridgeTxMessage::AddToResolvedValidators {
|
||||
validator_addrs: changed,
|
||||
peer_set: PeerSet::Validation,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn issue_connection_request<Sender>(
|
||||
&mut self,
|
||||
sender: &mut Sender,
|
||||
authorities: Vec<AuthorityDiscoveryId>,
|
||||
) where
|
||||
Sender: overseer::GossipSupportSenderTrait,
|
||||
{
|
||||
let num = authorities.len();
|
||||
|
||||
let (validator_addrs, resolved, failures) = self.resolve_authorities(authorities).await;
|
||||
|
||||
self.resolved_authorities = resolved;
|
||||
gum::debug!(target: LOG_TARGET, %num, "Issuing a connection request");
|
||||
|
||||
sender
|
||||
.send_message(NetworkBridgeTxMessage::ConnectToResolvedValidators {
|
||||
validator_addrs,
|
||||
peer_set: PeerSet::Validation,
|
||||
})
|
||||
.await;
|
||||
|
||||
// issue another request for the same session
|
||||
// if at least a third of the authorities were not resolved.
|
||||
if num != 0 && 3 * failures >= num {
|
||||
let timestamp = Instant::now();
|
||||
match self.failure_start {
|
||||
None => self.failure_start = Some(timestamp),
|
||||
Some(first) if first.elapsed() >= LOW_CONNECTIVITY_WARN_DELAY => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
connected = ?(num - failures),
|
||||
target = ?num,
|
||||
"Low connectivity - authority lookup failed for too many validators."
|
||||
);
|
||||
},
|
||||
Some(_) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
connected = ?(num - failures),
|
||||
target = ?num,
|
||||
"Low connectivity (due to authority lookup failures) - expected on startup."
|
||||
);
|
||||
},
|
||||
}
|
||||
self.last_failure = Some(timestamp);
|
||||
} else {
|
||||
self.last_failure = None;
|
||||
self.failure_start = None;
|
||||
};
|
||||
}
|
||||
|
||||
async fn update_authority_ids<Sender>(
|
||||
&mut self,
|
||||
sender: &mut Sender,
|
||||
authorities: Vec<AuthorityDiscoveryId>,
|
||||
) where
|
||||
Sender: overseer::GossipSupportSenderTrait,
|
||||
{
|
||||
let mut authority_ids: HashMap<PeerId, HashSet<AuthorityDiscoveryId>> = HashMap::new();
|
||||
for authority in authorities {
|
||||
let peer_ids = self
|
||||
.authority_discovery
|
||||
.get_addresses_by_authority_id(authority.clone())
|
||||
.await
|
||||
.into_iter()
|
||||
.flat_map(|list| list.into_iter())
|
||||
.flat_map(|addr| parse_addr(addr).ok().map(|(p, _)| p))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peer_ids,
|
||||
?authority,
|
||||
"Resolved to peer ids"
|
||||
);
|
||||
|
||||
for p in peer_ids {
|
||||
authority_ids.entry(p).or_default().insert(authority.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// peer was authority and now isn't
|
||||
for (peer_id, current) in self.connected_peers.iter_mut() {
|
||||
// empty -> nonempty is handled in the next loop
|
||||
if !current.is_empty() && !authority_ids.contains_key(peer_id) {
|
||||
sender
|
||||
.send_message(NetworkBridgeRxMessage::UpdatedAuthorityIds {
|
||||
peer_id: *peer_id,
|
||||
authority_ids: HashSet::new(),
|
||||
})
|
||||
.await;
|
||||
|
||||
for a in current.drain() {
|
||||
self.connected_authorities.remove(&a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// peer has new authority set.
|
||||
for (peer_id, new) in authority_ids {
|
||||
// If the peer is connected _and_ the authority IDs have changed.
|
||||
if let Some(prev) = self.connected_peers.get(&peer_id).filter(|x| x != &&new) {
|
||||
sender
|
||||
.send_message(NetworkBridgeRxMessage::UpdatedAuthorityIds {
|
||||
peer_id,
|
||||
authority_ids: new.clone(),
|
||||
})
|
||||
.await;
|
||||
|
||||
prev.iter().for_each(|a| {
|
||||
self.connected_authorities.remove(a);
|
||||
});
|
||||
new.iter().for_each(|a| {
|
||||
self.connected_authorities.insert(a.clone(), peer_id);
|
||||
});
|
||||
|
||||
self.connected_peers.insert(peer_id, new);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_connect_disconnect(&mut self, ev: NetworkBridgeEvent<GossipSupportNetworkMessage>) {
|
||||
match ev {
|
||||
NetworkBridgeEvent::PeerConnected(peer_id, _, _, o_authority) => {
|
||||
if let Some(authority_ids) = o_authority {
|
||||
authority_ids.iter().for_each(|a| {
|
||||
self.connected_authorities.insert(a.clone(), peer_id);
|
||||
});
|
||||
self.connected_peers.insert(peer_id, authority_ids);
|
||||
} else {
|
||||
self.connected_peers.insert(peer_id, HashSet::new());
|
||||
}
|
||||
},
|
||||
NetworkBridgeEvent::PeerDisconnected(peer_id) => {
|
||||
if let Some(authority_ids) = self.connected_peers.remove(&peer_id) {
|
||||
authority_ids.into_iter().for_each(|a| {
|
||||
self.connected_authorities.remove(&a);
|
||||
});
|
||||
}
|
||||
},
|
||||
NetworkBridgeEvent::UpdatedAuthorityIds(_, _) => {
|
||||
// The `gossip-support` subsystem itself issues these messages.
|
||||
},
|
||||
NetworkBridgeEvent::OurViewChange(_) => {},
|
||||
NetworkBridgeEvent::PeerViewChange(_, _) => {},
|
||||
NetworkBridgeEvent::NewGossipTopology { .. } => {},
|
||||
NetworkBridgeEvent::PeerMessage(_, message) => {
|
||||
// match void -> LLVM unreachable
|
||||
match message {
|
||||
ValidationProtocols::V3(m) => match m {},
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Check connectivity and report on it in logs.
|
||||
fn check_connectivity(&mut self) {
|
||||
let absolute_connected = self.connected_authorities.len();
|
||||
let absolute_resolved = self.resolved_authorities.len();
|
||||
let connected_ratio =
|
||||
(100 * absolute_connected).checked_div(absolute_resolved).unwrap_or(100);
|
||||
let unconnected_authorities = self
|
||||
.resolved_authorities
|
||||
.iter()
|
||||
.filter(|(a, _)| !self.connected_authorities.contains_key(a));
|
||||
if connected_ratio <= LOW_CONNECTIVITY_WARN_THRESHOLD && self.is_authority_now {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
session_index = self.last_session_index.as_ref().map(|s| *s).unwrap_or_default(),
|
||||
"Connectivity seems low, we are only connected to {connected_ratio}% of available validators (see debug logs for details), if this persists more than a session action needs to be taken"
|
||||
);
|
||||
}
|
||||
let pretty = PrettyAuthorities(unconnected_authorities);
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?connected_ratio,
|
||||
?absolute_connected,
|
||||
?absolute_resolved,
|
||||
unconnected_authorities = %pretty,
|
||||
"Connectivity Report"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Get the authorities of the past, present, and future.
|
||||
async fn authorities_past_present_future(
|
||||
sender: &mut impl overseer::GossipSupportSenderTrait,
|
||||
relay_parent: Hash,
|
||||
) -> Result<Vec<AuthorityDiscoveryId>, util::Error> {
|
||||
let authorities = util::request_authorities(relay_parent, sender).await.await??;
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
authority_count = ?authorities.len(),
|
||||
"Determined past/present/future authorities",
|
||||
);
|
||||
Ok(authorities)
|
||||
}
|
||||
|
||||
/// Return an error if we're not a validator in the given set (do not have keys).
|
||||
/// Otherwise, returns the index of our keys in `authorities`.
|
||||
fn ensure_i_am_an_authority(
|
||||
keystore: &KeystorePtr,
|
||||
authorities: &[AuthorityDiscoveryId],
|
||||
) -> Result<usize, util::Error> {
|
||||
for (i, v) in authorities.iter().enumerate() {
|
||||
if Keystore::has_keys(&**keystore, &[(v.to_raw_vec(), AuthorityDiscoveryId::ID)]) {
|
||||
return Ok(i);
|
||||
}
|
||||
}
|
||||
Err(util::Error::NotAValidator)
|
||||
}
|
||||
|
||||
/// Filter out all controlled keys in the given set. Returns the number of keys removed.
|
||||
fn remove_all_controlled(
|
||||
keystore: &KeystorePtr,
|
||||
authorities: &mut Vec<AuthorityDiscoveryId>,
|
||||
) -> usize {
|
||||
let mut to_remove = Vec::new();
|
||||
for (i, v) in authorities.iter().enumerate() {
|
||||
if Keystore::has_keys(&**keystore, &[(v.to_raw_vec(), AuthorityDiscoveryId::ID)]) {
|
||||
to_remove.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
for i in to_remove.iter().rev().copied() {
|
||||
authorities.remove(i);
|
||||
}
|
||||
|
||||
to_remove.len()
|
||||
}
|
||||
|
||||
/// We partition the list of all sorted `authorities` into `sqrt(len)` groups of `sqrt(len)` size
|
||||
/// and form a matrix where each validator is connected to all validators in its row and column.
|
||||
/// This is similar to `[web3]` research proposed topology, except for the groups are not teyrchain
|
||||
/// groups (because not all validators are teyrchain validators and the group size is small),
|
||||
/// but formed randomly via BABE randomness from two epochs ago.
|
||||
/// This limits the amount of gossip peers to 2 * `sqrt(len)` and ensures the diameter of 2.
|
||||
///
|
||||
/// [web3]: https://research.web3.foundation/en/latest/polkadot/networking/3-avail-valid.html#topology
|
||||
async fn update_gossip_topology(
|
||||
sender: &mut impl overseer::GossipSupportSenderTrait,
|
||||
our_index: usize,
|
||||
authorities: Vec<AuthorityDiscoveryId>,
|
||||
relay_parent: Hash,
|
||||
session_index: SessionIndex,
|
||||
) -> Result<(), util::Error> {
|
||||
// retrieve BABE randomness
|
||||
let random_seed = {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
// TODO https://github.com/paritytech/polkadot/issues/5316:
|
||||
// get the random seed from the `SessionInfo` instead.
|
||||
sender
|
||||
.send_message(RuntimeApiMessage::Request(
|
||||
relay_parent,
|
||||
RuntimeApiRequest::CurrentBabeEpoch(tx),
|
||||
))
|
||||
.await;
|
||||
|
||||
let randomness = rx.await??.randomness;
|
||||
let mut subject = [0u8; 40];
|
||||
subject[..8].copy_from_slice(b"gossipsu");
|
||||
subject[8..].copy_from_slice(&randomness);
|
||||
sp_crypto_hashing::blake2_256(&subject)
|
||||
};
|
||||
|
||||
// shuffle the validators and create the index mapping
|
||||
let (shuffled_indices, canonical_shuffling) = {
|
||||
let mut rng: ChaCha20Rng = SeedableRng::from_seed(random_seed);
|
||||
let len = authorities.len();
|
||||
let mut shuffled_indices = vec![0; len];
|
||||
let mut canonical_shuffling: Vec<_> = authorities
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, a)| (a.clone(), ValidatorIndex(i as _)))
|
||||
.collect();
|
||||
|
||||
fisher_yates_shuffle(&mut rng, &mut canonical_shuffling[..]);
|
||||
for (i, (_, validator_index)) in canonical_shuffling.iter().enumerate() {
|
||||
shuffled_indices[validator_index.0 as usize] = i;
|
||||
}
|
||||
|
||||
(shuffled_indices, canonical_shuffling)
|
||||
};
|
||||
|
||||
sender
|
||||
.send_message(NetworkBridgeRxMessage::NewGossipTopology {
|
||||
session: session_index,
|
||||
local_index: Some(ValidatorIndex(our_index as _)),
|
||||
canonical_shuffling,
|
||||
shuffled_indices,
|
||||
})
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Durstenfeld algorithm for the Fisher-Yates shuffle
|
||||
// https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
|
||||
fn fisher_yates_shuffle<T, R: Rng + ?Sized>(rng: &mut R, items: &mut [T]) {
|
||||
for i in (1..items.len()).rev() {
|
||||
// invariant: elements with index > i have been locked in place.
|
||||
let index = rng.gen_range(0u32..(i as u32 + 1));
|
||||
items.swap(i, index as usize);
|
||||
}
|
||||
}
|
||||
|
||||
#[overseer::subsystem(GossipSupport, error = SubsystemError, prefix = self::overseer)]
|
||||
impl<Context, AD> GossipSupport<AD>
|
||||
where
|
||||
AD: AuthorityDiscovery + Clone,
|
||||
{
|
||||
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
||||
let future = self.run(ctx).map(|_| Ok(())).boxed();
|
||||
|
||||
SpawnedSubsystem { name: "gossip-support-subsystem", future }
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper struct to get a nice rendering of unreachable authorities.
|
||||
struct PrettyAuthorities<I>(I);
|
||||
|
||||
impl<'a, I> fmt::Display for PrettyAuthorities<I>
|
||||
where
|
||||
I: Iterator<Item = (&'a AuthorityDiscoveryId, &'a HashSet<Multiaddr>)> + Clone,
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let mut authorities = self.0.clone().peekable();
|
||||
if authorities.peek().is_none() {
|
||||
write!(f, "None")?;
|
||||
} else {
|
||||
write!(f, "\n")?;
|
||||
}
|
||||
for (authority, addrs) in authorities {
|
||||
write!(f, "{}:\n", authority)?;
|
||||
for addr in addrs {
|
||||
write!(f, " {}\n", addr)?;
|
||||
}
|
||||
write!(f, "\n")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_subsystem_util::{
|
||||
metrics,
|
||||
metrics::{
|
||||
prometheus,
|
||||
prometheus::{Gauge, PrometheusError, Registry, U64},
|
||||
},
|
||||
};
|
||||
|
||||
/// Dispute Distribution metrics.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
/// Tracks authority status for producing relay chain blocks.
|
||||
is_authority: Gauge<U64>,
|
||||
/// Tracks authority status for teyrchain approval checking.
|
||||
is_teyrchain_validator: Gauge<U64>,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
/// Dummy constructor for testing.
|
||||
#[cfg(test)]
|
||||
pub fn new_dummy() -> Self {
|
||||
Self(None)
|
||||
}
|
||||
|
||||
/// Set the `relaychain validator` metric.
|
||||
pub fn on_is_authority(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.is_authority.set(1);
|
||||
}
|
||||
}
|
||||
|
||||
/// Unset the `relaychain validator` metric.
|
||||
pub fn on_is_not_authority(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.is_authority.set(0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the `teyrchain validator` metric.
|
||||
pub fn on_is_teyrchain_validator(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.is_teyrchain_validator.set(1);
|
||||
}
|
||||
}
|
||||
|
||||
/// Unset the `teyrchain validator` metric.
|
||||
pub fn on_is_not_teyrchain_validator(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.is_teyrchain_validator.set(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(registry: &Registry) -> Result<Self, PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
is_authority: prometheus::register(
|
||||
Gauge::new("pezkuwi_node_is_active_validator", "Tracks if the validator is in the active set. \
|
||||
Updates at session boundary.")?,
|
||||
registry,
|
||||
)?,
|
||||
is_teyrchain_validator: prometheus::register(
|
||||
Gauge::new("pezkuwi_node_is_teyrchain_validator",
|
||||
"Tracks if the validator participates in teyrchain consensus. Teyrchain validators are a \
|
||||
subset of the active set validators that perform approval checking of all teyrchain candidates in a session.\
|
||||
Updates at session boundary.")?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,45 @@
|
||||
[package]
|
||||
name = "pezkuwi-node-network-protocol"
|
||||
version = "7.0.0"
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
description = "Primitives types for the Node-side"
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-channel = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
bitvec = { workspace = true, default-features = true }
|
||||
codec = { features = ["derive"], workspace = true }
|
||||
derive_more = { workspace = true, default-features = true }
|
||||
fatality = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
gum = { workspace = true, default-features = true }
|
||||
hex = { workspace = true, default-features = true }
|
||||
pezkuwi-node-primitives = { workspace = true, default-features = true }
|
||||
pezkuwi-primitives = { workspace = true, default-features = true }
|
||||
rand = { workspace = true, default-features = true }
|
||||
sc-authority-discovery = { workspace = true, default-features = true }
|
||||
sc-network = { workspace = true, default-features = true }
|
||||
sc-network-types = { workspace = true, default-features = true }
|
||||
sp-runtime = { workspace = true, default-features = true }
|
||||
strum = { features = ["derive"], workspace = true, default-features = true }
|
||||
thiserror = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
rand_chacha = { workspace = true, default-features = true }
|
||||
|
||||
[features]
|
||||
runtime-benchmarks = [
|
||||
"gum/runtime-benchmarks",
|
||||
"pezkuwi-node-primitives/runtime-benchmarks",
|
||||
"pezkuwi-primitives/runtime-benchmarks",
|
||||
"sc-authority-discovery/runtime-benchmarks",
|
||||
"sc-network/runtime-benchmarks",
|
||||
"sp-runtime/runtime-benchmarks",
|
||||
]
|
||||
@@ -0,0 +1,61 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Authority discovery service interfacing.
|
||||
|
||||
use std::{collections::HashSet, fmt::Debug};
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use sc_authority_discovery::Service as AuthorityDiscoveryService;
|
||||
|
||||
use pezkuwi_primitives::AuthorityDiscoveryId;
|
||||
use sc_network::Multiaddr;
|
||||
use sc_network_types::PeerId;
|
||||
|
||||
/// An abstraction over the authority discovery service.
|
||||
///
|
||||
/// Needed for mocking in tests mostly.
|
||||
#[async_trait]
|
||||
pub trait AuthorityDiscovery: Send + Debug + 'static {
|
||||
/// Get the addresses for the given [`AuthorityDiscoveryId`] from the local address cache.
|
||||
async fn get_addresses_by_authority_id(
|
||||
&mut self,
|
||||
authority: AuthorityDiscoveryId,
|
||||
) -> Option<HashSet<Multiaddr>>;
|
||||
/// Get the [`AuthorityDiscoveryId`] for the given [`PeerId`] from the local address cache.
|
||||
async fn get_authority_ids_by_peer_id(
|
||||
&mut self,
|
||||
peer_id: PeerId,
|
||||
) -> Option<HashSet<AuthorityDiscoveryId>>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AuthorityDiscovery for AuthorityDiscoveryService {
|
||||
async fn get_addresses_by_authority_id(
|
||||
&mut self,
|
||||
authority: AuthorityDiscoveryId,
|
||||
) -> Option<HashSet<Multiaddr>> {
|
||||
AuthorityDiscoveryService::get_addresses_by_authority_id(self, authority).await
|
||||
}
|
||||
|
||||
async fn get_authority_ids_by_peer_id(
|
||||
&mut self,
|
||||
peer_id: PeerId,
|
||||
) -> Option<HashSet<AuthorityDiscoveryId>> {
|
||||
AuthorityDiscoveryService::get_authority_ids_by_peer_id(self, peer_id).await
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,738 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Grid topology support implementation
|
||||
//! The basic operation of the 2D grid topology is that:
|
||||
//! * A validator producing a message sends it to its row-neighbors and its column-neighbors
|
||||
//! * A validator receiving a message originating from one of its row-neighbors sends it to its
|
||||
//! column-neighbors
|
||||
//! * A validator receiving a message originating from one of its column-neighbors sends it to its
|
||||
//! row-neighbors
|
||||
//!
|
||||
//! This grid approach defines 2 unique paths for every validator to reach every other validator in
|
||||
//! at most 2 hops.
|
||||
//!
|
||||
//! However, we also supplement this with some degree of random propagation:
|
||||
//! every validator, upon seeing a message for the first time, propagates it to 8 random peers.
|
||||
//! This inserts some redundancy in case the grid topology isn't working or is being attacked -
|
||||
//! an adversary doesn't know which peers a validator will send to.
|
||||
//! This is combined with the property that the adversary doesn't know which validators will elect
|
||||
//! to check a block.
|
||||
|
||||
use crate::PeerId;
|
||||
use pezkuwi_primitives::{AuthorityDiscoveryId, SessionIndex, ValidatorIndex};
|
||||
use rand::{CryptoRng, Rng};
|
||||
use std::{
|
||||
collections::{hash_map, HashMap, HashSet},
|
||||
fmt::Debug,
|
||||
};
|
||||
|
||||
const LOG_TARGET: &str = "teyrchain::grid-topology";
|
||||
|
||||
/// The sample rate for randomly propagating messages. This
|
||||
/// reduces the left tail of the binomial distribution but also
|
||||
/// introduces a bias towards peers who we sample before others
|
||||
/// (i.e. those who get a block before others).
|
||||
pub const DEFAULT_RANDOM_SAMPLE_RATE: usize = crate::MIN_GOSSIP_PEERS;
|
||||
|
||||
/// The number of peers to randomly propagate messages to.
|
||||
pub const DEFAULT_RANDOM_CIRCULATION: usize = 4;
|
||||
|
||||
/// Information about a peer in the gossip topology for a session.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct TopologyPeerInfo {
|
||||
/// The validator's known peer IDs.
|
||||
pub peer_ids: Vec<PeerId>,
|
||||
/// The index of the validator in the discovery keys of the corresponding
|
||||
/// `SessionInfo`. This can extend _beyond_ the set of active teyrchain validators.
|
||||
pub validator_index: ValidatorIndex,
|
||||
/// The authority discovery public key of the validator in the corresponding
|
||||
/// `SessionInfo`.
|
||||
pub discovery_id: AuthorityDiscoveryId,
|
||||
}
|
||||
|
||||
/// Topology representation for a session.
|
||||
#[derive(Default, Clone, Debug, PartialEq)]
|
||||
pub struct SessionGridTopology {
|
||||
/// An array mapping validator indices to their indices in the
|
||||
/// shuffling itself. This has the same size as the number of validators
|
||||
/// in the session.
|
||||
shuffled_indices: Vec<usize>,
|
||||
/// The canonical shuffling of validators for the session.
|
||||
canonical_shuffling: Vec<TopologyPeerInfo>,
|
||||
/// The list of peer-ids in an efficient way to search.
|
||||
peer_ids: HashSet<PeerId>,
|
||||
}
|
||||
|
||||
impl SessionGridTopology {
|
||||
/// Create a new session grid topology.
|
||||
pub fn new(shuffled_indices: Vec<usize>, canonical_shuffling: Vec<TopologyPeerInfo>) -> Self {
|
||||
let mut peer_ids = HashSet::new();
|
||||
for peer_info in canonical_shuffling.iter() {
|
||||
for peer_id in peer_info.peer_ids.iter() {
|
||||
peer_ids.insert(*peer_id);
|
||||
}
|
||||
}
|
||||
SessionGridTopology { shuffled_indices, canonical_shuffling, peer_ids }
|
||||
}
|
||||
|
||||
/// Updates the known peer ids for the passed authorities ids.
|
||||
pub fn update_authority_ids(
|
||||
&mut self,
|
||||
peer_id: PeerId,
|
||||
ids: &HashSet<AuthorityDiscoveryId>,
|
||||
) -> bool {
|
||||
let mut updated = false;
|
||||
if !self.peer_ids.contains(&peer_id) {
|
||||
for peer in self
|
||||
.canonical_shuffling
|
||||
.iter_mut()
|
||||
.filter(|peer| ids.contains(&peer.discovery_id))
|
||||
{
|
||||
peer.peer_ids.push(peer_id);
|
||||
self.peer_ids.insert(peer_id);
|
||||
updated = true;
|
||||
}
|
||||
}
|
||||
updated
|
||||
}
|
||||
/// Produces the outgoing routing logic for a particular peer.
|
||||
///
|
||||
/// Returns `None` if the validator index is out of bounds.
|
||||
pub fn compute_grid_neighbors_for(&self, v: ValidatorIndex) -> Option<GridNeighbors> {
|
||||
if self.shuffled_indices.len() != self.canonical_shuffling.len() {
|
||||
return None;
|
||||
}
|
||||
let shuffled_val_index = *self.shuffled_indices.get(v.0 as usize)?;
|
||||
|
||||
let neighbors = matrix_neighbors(shuffled_val_index, self.shuffled_indices.len())?;
|
||||
|
||||
let mut grid_subset = GridNeighbors::empty();
|
||||
for r_n in neighbors.row_neighbors {
|
||||
let n = &self.canonical_shuffling[r_n];
|
||||
grid_subset.validator_indices_x.insert(n.validator_index);
|
||||
for p in &n.peer_ids {
|
||||
grid_subset.peers_x.insert(*p);
|
||||
}
|
||||
}
|
||||
|
||||
for c_n in neighbors.column_neighbors {
|
||||
let n = &self.canonical_shuffling[c_n];
|
||||
grid_subset.validator_indices_y.insert(n.validator_index);
|
||||
for p in &n.peer_ids {
|
||||
grid_subset.peers_y.insert(*p);
|
||||
}
|
||||
}
|
||||
|
||||
Some(grid_subset)
|
||||
}
|
||||
|
||||
/// Tells if a given peer id is validator in a session
|
||||
pub fn is_validator(&self, peer: &PeerId) -> bool {
|
||||
self.peer_ids.contains(peer)
|
||||
}
|
||||
}
|
||||
|
||||
struct MatrixNeighbors<R, C> {
|
||||
row_neighbors: R,
|
||||
column_neighbors: C,
|
||||
}
|
||||
|
||||
/// Compute the row and column neighbors of `val_index` in a matrix
|
||||
fn matrix_neighbors(
|
||||
val_index: usize,
|
||||
len: usize,
|
||||
) -> Option<MatrixNeighbors<impl Iterator<Item = usize>, impl Iterator<Item = usize>>> {
|
||||
if val_index >= len {
|
||||
return None;
|
||||
}
|
||||
|
||||
// e.g. for size 11 the matrix would be
|
||||
//
|
||||
// 0 1 2
|
||||
// 3 4 5
|
||||
// 6 7 8
|
||||
// 9 10
|
||||
//
|
||||
// and for index 10, the neighbors would be 1, 4, 7, 9
|
||||
|
||||
let sqrt = (len as f64).sqrt() as usize;
|
||||
let our_row = val_index / sqrt;
|
||||
let our_column = val_index % sqrt;
|
||||
let row_neighbors = our_row * sqrt..std::cmp::min(our_row * sqrt + sqrt, len);
|
||||
let column_neighbors = (our_column..len).step_by(sqrt);
|
||||
|
||||
Some(MatrixNeighbors {
|
||||
row_neighbors: row_neighbors.filter(move |i| *i != val_index),
|
||||
column_neighbors: column_neighbors.filter(move |i| *i != val_index),
|
||||
})
|
||||
}
|
||||
|
||||
/// Information about the grid neighbors for a particular node in the topology.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct GridNeighbors {
|
||||
/// Represent peers in the X axis
|
||||
pub peers_x: HashSet<PeerId>,
|
||||
/// Represent validators in the X axis
|
||||
pub validator_indices_x: HashSet<ValidatorIndex>,
|
||||
/// Represent peers in the Y axis
|
||||
pub peers_y: HashSet<PeerId>,
|
||||
/// Represent validators in the Y axis
|
||||
pub validator_indices_y: HashSet<ValidatorIndex>,
|
||||
}
|
||||
|
||||
impl GridNeighbors {
|
||||
/// Utility function for creating an empty set of grid neighbors.
|
||||
/// Useful for testing.
|
||||
pub fn empty() -> Self {
|
||||
GridNeighbors {
|
||||
peers_x: HashSet::new(),
|
||||
validator_indices_x: HashSet::new(),
|
||||
peers_y: HashSet::new(),
|
||||
validator_indices_y: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Given the originator of a message as a validator index, indicates the part of the topology
|
||||
/// we're meant to send the message to.
|
||||
pub fn required_routing_by_index(
|
||||
&self,
|
||||
originator: ValidatorIndex,
|
||||
local: bool,
|
||||
) -> RequiredRouting {
|
||||
if local {
|
||||
return RequiredRouting::GridXY;
|
||||
}
|
||||
|
||||
let grid_x = self.validator_indices_x.contains(&originator);
|
||||
let grid_y = self.validator_indices_y.contains(&originator);
|
||||
|
||||
match (grid_x, grid_y) {
|
||||
(false, false) => RequiredRouting::None,
|
||||
(true, false) => RequiredRouting::GridY, // messages from X go to Y
|
||||
(false, true) => RequiredRouting::GridX, // messages from Y go to X
|
||||
(true, true) => RequiredRouting::GridXY, /* if the grid works as expected, this
|
||||
* shouldn't happen. */
|
||||
}
|
||||
}
|
||||
|
||||
/// Given the originator of a message as a peer index, indicates the part of the topology
|
||||
/// we're meant to send the message to.
|
||||
pub fn required_routing_by_peer_id(&self, originator: PeerId, local: bool) -> RequiredRouting {
|
||||
if local {
|
||||
return RequiredRouting::GridXY;
|
||||
}
|
||||
|
||||
let grid_x = self.peers_x.contains(&originator);
|
||||
let grid_y = self.peers_y.contains(&originator);
|
||||
|
||||
match (grid_x, grid_y) {
|
||||
(false, false) => RequiredRouting::None,
|
||||
(true, false) => RequiredRouting::GridY, // messages from X go to Y
|
||||
(false, true) => RequiredRouting::GridX, // messages from Y go to X
|
||||
(true, true) => {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?originator,
|
||||
"Grid topology is unexpected, play it safe and send to X AND Y"
|
||||
);
|
||||
RequiredRouting::GridXY
|
||||
}, /* if the grid works as expected, this
|
||||
* shouldn't happen. */
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a filter function based on this topology and the required routing
|
||||
/// which returns `true` for peers that are within the required routing set
|
||||
/// and false otherwise.
|
||||
pub fn route_to_peer(&self, required_routing: RequiredRouting, peer: &PeerId) -> bool {
|
||||
match required_routing {
|
||||
RequiredRouting::All => true,
|
||||
RequiredRouting::GridX => self.peers_x.contains(peer),
|
||||
RequiredRouting::GridY => self.peers_y.contains(peer),
|
||||
RequiredRouting::GridXY => self.peers_x.contains(peer) || self.peers_y.contains(peer),
|
||||
RequiredRouting::None | RequiredRouting::PendingTopology => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the difference between this and the `other` topology as a vector of peers
|
||||
pub fn peers_diff(&self, other: &Self) -> Vec<PeerId> {
|
||||
self.peers_x
|
||||
.iter()
|
||||
.chain(self.peers_y.iter())
|
||||
.filter(|peer_id| !(other.peers_x.contains(peer_id) || other.peers_y.contains(peer_id)))
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// A convenience method that returns total number of peers in the topology
|
||||
pub fn len(&self) -> usize {
|
||||
self.peers_x.len().saturating_add(self.peers_y.len())
|
||||
}
|
||||
}
|
||||
|
||||
/// An entry tracking a session grid topology and some cached local neighbors.
|
||||
#[derive(Debug)]
|
||||
pub struct SessionGridTopologyEntry {
|
||||
topology: SessionGridTopology,
|
||||
local_neighbors: GridNeighbors,
|
||||
local_index: Option<ValidatorIndex>,
|
||||
}
|
||||
|
||||
impl SessionGridTopologyEntry {
|
||||
/// Access the local grid neighbors.
|
||||
pub fn local_grid_neighbors(&self) -> &GridNeighbors {
|
||||
&self.local_neighbors
|
||||
}
|
||||
|
||||
/// Access the local grid neighbors mutably.
|
||||
pub fn local_grid_neighbors_mut(&mut self) -> &mut GridNeighbors {
|
||||
&mut self.local_neighbors
|
||||
}
|
||||
|
||||
/// Access the underlying topology.
|
||||
pub fn get(&self) -> &SessionGridTopology {
|
||||
&self.topology
|
||||
}
|
||||
|
||||
/// Tells if a given peer id is validator in a session
|
||||
pub fn is_validator(&self, peer: &PeerId) -> bool {
|
||||
self.topology.is_validator(peer)
|
||||
}
|
||||
|
||||
/// Returns the list of peers to route based on the required routing.
|
||||
pub fn peers_to_route(&self, required_routing: RequiredRouting) -> Vec<PeerId> {
|
||||
match required_routing {
|
||||
RequiredRouting::All => self.topology.peer_ids.iter().copied().collect(),
|
||||
RequiredRouting::GridX => self.local_neighbors.peers_x.iter().copied().collect(),
|
||||
RequiredRouting::GridY => self.local_neighbors.peers_y.iter().copied().collect(),
|
||||
RequiredRouting::GridXY => self
|
||||
.local_neighbors
|
||||
.peers_x
|
||||
.iter()
|
||||
.chain(self.local_neighbors.peers_y.iter())
|
||||
.copied()
|
||||
.collect(),
|
||||
RequiredRouting::None | RequiredRouting::PendingTopology => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Updates the known peer ids for the passed authorities ids.
|
||||
pub fn update_authority_ids(
|
||||
&mut self,
|
||||
peer_id: PeerId,
|
||||
ids: &HashSet<AuthorityDiscoveryId>,
|
||||
) -> bool {
|
||||
let peer_id_updated = self.topology.update_authority_ids(peer_id, ids);
|
||||
// If we added a new peer id we need to recompute the grid neighbors, so that
|
||||
// neighbors_x and neighbors_y reflect the right peer ids.
|
||||
if peer_id_updated {
|
||||
if let Some(local_index) = self.local_index.as_ref() {
|
||||
if let Some(new_grid) = self.topology.compute_grid_neighbors_for(*local_index) {
|
||||
self.local_neighbors = new_grid;
|
||||
}
|
||||
}
|
||||
}
|
||||
peer_id_updated
|
||||
}
|
||||
}
|
||||
|
||||
/// A set of topologies indexed by session
|
||||
#[derive(Default)]
|
||||
pub struct SessionGridTopologies {
|
||||
inner: HashMap<SessionIndex, (Option<SessionGridTopologyEntry>, usize)>,
|
||||
}
|
||||
|
||||
impl SessionGridTopologies {
|
||||
/// Returns a topology for the specific session index
|
||||
pub fn get_topology(&self, session: SessionIndex) -> Option<&SessionGridTopologyEntry> {
|
||||
self.inner.get(&session).and_then(|val| val.0.as_ref())
|
||||
}
|
||||
|
||||
/// Updates the known peer ids for the passed authorities ids.
|
||||
pub fn update_authority_ids(
|
||||
&mut self,
|
||||
peer_id: PeerId,
|
||||
ids: &HashSet<AuthorityDiscoveryId>,
|
||||
) -> bool {
|
||||
self.inner
|
||||
.iter_mut()
|
||||
.map(|(_, topology)| {
|
||||
topology.0.as_mut().map(|topology| topology.update_authority_ids(peer_id, ids))
|
||||
})
|
||||
.any(|updated| updated.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Increase references counter for a specific topology
|
||||
pub fn inc_session_refs(&mut self, session: SessionIndex) {
|
||||
self.inner.entry(session).or_insert((None, 0)).1 += 1;
|
||||
}
|
||||
|
||||
/// Decrease references counter for a specific topology
|
||||
pub fn dec_session_refs(&mut self, session: SessionIndex) {
|
||||
if let hash_map::Entry::Occupied(mut occupied) = self.inner.entry(session) {
|
||||
occupied.get_mut().1 = occupied.get().1.saturating_sub(1);
|
||||
if occupied.get().1 == 0 {
|
||||
let _ = occupied.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a new topology, no-op if already present.
|
||||
pub fn insert_topology(
|
||||
&mut self,
|
||||
session: SessionIndex,
|
||||
topology: SessionGridTopology,
|
||||
local_index: Option<ValidatorIndex>,
|
||||
) {
|
||||
let entry = self.inner.entry(session).or_insert((None, 0));
|
||||
if entry.0.is_none() {
|
||||
let local_neighbors = local_index
|
||||
.and_then(|l| topology.compute_grid_neighbors_for(l))
|
||||
.unwrap_or_else(GridNeighbors::empty);
|
||||
|
||||
entry.0 = Some(SessionGridTopologyEntry { topology, local_neighbors, local_index });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A simple storage for a topology and the corresponding session index
|
||||
#[derive(Debug)]
|
||||
struct GridTopologySessionBound {
|
||||
entry: SessionGridTopologyEntry,
|
||||
session_index: SessionIndex,
|
||||
}
|
||||
|
||||
/// A storage for the current and maybe previous topology
|
||||
#[derive(Debug)]
|
||||
pub struct SessionBoundGridTopologyStorage {
|
||||
current_topology: GridTopologySessionBound,
|
||||
prev_topology: Option<GridTopologySessionBound>,
|
||||
}
|
||||
|
||||
impl Default for SessionBoundGridTopologyStorage {
|
||||
fn default() -> Self {
|
||||
// having this struct be `Default` is objectively stupid
|
||||
// but used in a few places
|
||||
SessionBoundGridTopologyStorage {
|
||||
current_topology: GridTopologySessionBound {
|
||||
// session 0 is valid so we should use the upper bound
|
||||
// as the default instead of the lower bound.
|
||||
session_index: SessionIndex::max_value(),
|
||||
entry: SessionGridTopologyEntry {
|
||||
topology: SessionGridTopology {
|
||||
shuffled_indices: Vec::new(),
|
||||
canonical_shuffling: Vec::new(),
|
||||
peer_ids: Default::default(),
|
||||
},
|
||||
local_neighbors: GridNeighbors::empty(),
|
||||
local_index: None,
|
||||
},
|
||||
},
|
||||
prev_topology: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SessionBoundGridTopologyStorage {
|
||||
/// Return a grid topology based on the session index:
|
||||
/// If we need a previous session and it is registered in the storage, then return that session.
|
||||
/// Otherwise, return a current session to have some grid topology in any case
|
||||
pub fn get_topology_or_fallback(&self, idx: SessionIndex) -> &SessionGridTopologyEntry {
|
||||
self.get_topology(idx).unwrap_or(&self.current_topology.entry)
|
||||
}
|
||||
|
||||
/// Return the grid topology for the specific session index, if no such a session is stored
|
||||
/// returns `None`.
|
||||
pub fn get_topology(&self, idx: SessionIndex) -> Option<&SessionGridTopologyEntry> {
|
||||
if let Some(prev_topology) = &self.prev_topology {
|
||||
if idx == prev_topology.session_index {
|
||||
return Some(&prev_topology.entry);
|
||||
}
|
||||
}
|
||||
if self.current_topology.session_index == idx {
|
||||
return Some(&self.current_topology.entry);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Update the current topology preserving the previous one
|
||||
pub fn update_topology(
|
||||
&mut self,
|
||||
session_index: SessionIndex,
|
||||
topology: SessionGridTopology,
|
||||
local_index: Option<ValidatorIndex>,
|
||||
) {
|
||||
let local_neighbors = local_index
|
||||
.and_then(|l| topology.compute_grid_neighbors_for(l))
|
||||
.unwrap_or_else(GridNeighbors::empty);
|
||||
|
||||
let old_current = std::mem::replace(
|
||||
&mut self.current_topology,
|
||||
GridTopologySessionBound {
|
||||
entry: SessionGridTopologyEntry { topology, local_neighbors, local_index },
|
||||
session_index,
|
||||
},
|
||||
);
|
||||
self.prev_topology.replace(old_current);
|
||||
}
|
||||
|
||||
/// Returns a current grid topology
|
||||
pub fn get_current_topology(&self) -> &SessionGridTopologyEntry {
|
||||
&self.current_topology.entry
|
||||
}
|
||||
|
||||
/// Returns the current session index.
|
||||
pub fn get_current_session_index(&self) -> SessionIndex {
|
||||
self.current_topology.session_index
|
||||
}
|
||||
|
||||
/// Access the current grid topology mutably. Dangerous and intended
|
||||
/// to be used in tests.
|
||||
pub fn get_current_topology_mut(&mut self) -> &mut SessionGridTopologyEntry {
|
||||
&mut self.current_topology.entry
|
||||
}
|
||||
}
|
||||
|
||||
/// A representation of routing based on sample
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct RandomRouting {
|
||||
/// The number of peers to target.
|
||||
target: usize,
|
||||
/// The number of peers this has been sent to.
|
||||
sent: usize,
|
||||
/// Sampling rate
|
||||
sample_rate: usize,
|
||||
}
|
||||
|
||||
impl Default for RandomRouting {
|
||||
fn default() -> Self {
|
||||
RandomRouting {
|
||||
target: DEFAULT_RANDOM_CIRCULATION,
|
||||
sent: 0_usize,
|
||||
sample_rate: DEFAULT_RANDOM_SAMPLE_RATE,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RandomRouting {
|
||||
/// Perform random sampling for a specific peer
|
||||
/// Returns `true` for a lucky peer
|
||||
pub fn sample(&self, n_peers_total: usize, rng: &mut (impl CryptoRng + Rng)) -> bool {
|
||||
if n_peers_total == 0 || self.sent >= self.target {
|
||||
false
|
||||
} else if self.sample_rate > n_peers_total {
|
||||
true
|
||||
} else {
|
||||
rng.gen_ratio(self.sample_rate as _, n_peers_total as _)
|
||||
}
|
||||
}
|
||||
|
||||
/// Increase number of messages being sent
|
||||
pub fn inc_sent(&mut self) {
|
||||
self.sent += 1
|
||||
}
|
||||
|
||||
/// Returns `true` if we already took all the necessary samples.
|
||||
pub fn is_complete(&self) -> bool {
|
||||
self.sent >= self.target
|
||||
}
|
||||
}
|
||||
|
||||
/// Routing mode
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum RequiredRouting {
|
||||
/// We don't know yet, because we're waiting for topology info
|
||||
/// (race condition between learning about the first blocks in a new session
|
||||
/// and getting the topology for that session)
|
||||
PendingTopology,
|
||||
/// Propagate to all peers of any kind.
|
||||
All,
|
||||
/// Propagate to all peers sharing either the X or Y dimension of the grid.
|
||||
GridXY,
|
||||
/// Propagate to all peers sharing the X dimension of the grid.
|
||||
GridX,
|
||||
/// Propagate to all peers sharing the Y dimension of the grid.
|
||||
GridY,
|
||||
/// No required propagation.
|
||||
None,
|
||||
}
|
||||
|
||||
impl RequiredRouting {
|
||||
/// Whether the required routing set is definitely empty.
|
||||
pub fn is_empty(self) -> bool {
|
||||
match self {
|
||||
RequiredRouting::PendingTopology | RequiredRouting::None => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Combine two required routing sets into one that would cover both routing modes.
|
||||
pub fn combine(self, other: Self) -> Self {
|
||||
match (self, other) {
|
||||
(RequiredRouting::All, _) | (_, RequiredRouting::All) => RequiredRouting::All,
|
||||
(RequiredRouting::GridXY, _) | (_, RequiredRouting::GridXY) => RequiredRouting::GridXY,
|
||||
(RequiredRouting::GridX, RequiredRouting::GridY) |
|
||||
(RequiredRouting::GridY, RequiredRouting::GridX) => RequiredRouting::GridXY,
|
||||
(RequiredRouting::GridX, RequiredRouting::GridX) => RequiredRouting::GridX,
|
||||
(RequiredRouting::GridY, RequiredRouting::GridY) => RequiredRouting::GridY,
|
||||
(RequiredRouting::None, RequiredRouting::PendingTopology) |
|
||||
(RequiredRouting::PendingTopology, RequiredRouting::None) => RequiredRouting::PendingTopology,
|
||||
(RequiredRouting::None, _) | (RequiredRouting::PendingTopology, _) => other,
|
||||
(_, RequiredRouting::None) | (_, RequiredRouting::PendingTopology) => self,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rand::SeedableRng;
|
||||
use rand_chacha::ChaCha12Rng;
|
||||
|
||||
fn dummy_rng() -> ChaCha12Rng {
|
||||
rand_chacha::ChaCha12Rng::seed_from_u64(12345)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_required_routing_combine() {
|
||||
assert_eq!(RequiredRouting::All.combine(RequiredRouting::None), RequiredRouting::All);
|
||||
assert_eq!(RequiredRouting::All.combine(RequiredRouting::GridXY), RequiredRouting::All);
|
||||
assert_eq!(RequiredRouting::GridXY.combine(RequiredRouting::All), RequiredRouting::All);
|
||||
assert_eq!(RequiredRouting::None.combine(RequiredRouting::All), RequiredRouting::All);
|
||||
assert_eq!(RequiredRouting::None.combine(RequiredRouting::None), RequiredRouting::None);
|
||||
assert_eq!(
|
||||
RequiredRouting::PendingTopology.combine(RequiredRouting::GridX),
|
||||
RequiredRouting::GridX
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
RequiredRouting::GridX.combine(RequiredRouting::PendingTopology),
|
||||
RequiredRouting::GridX
|
||||
);
|
||||
assert_eq!(RequiredRouting::GridX.combine(RequiredRouting::GridY), RequiredRouting::GridXY);
|
||||
assert_eq!(RequiredRouting::GridY.combine(RequiredRouting::GridX), RequiredRouting::GridXY);
|
||||
assert_eq!(
|
||||
RequiredRouting::GridXY.combine(RequiredRouting::GridXY),
|
||||
RequiredRouting::GridXY
|
||||
);
|
||||
assert_eq!(RequiredRouting::GridX.combine(RequiredRouting::GridX), RequiredRouting::GridX);
|
||||
assert_eq!(RequiredRouting::GridY.combine(RequiredRouting::GridY), RequiredRouting::GridY);
|
||||
|
||||
assert_eq!(RequiredRouting::None.combine(RequiredRouting::GridY), RequiredRouting::GridY);
|
||||
assert_eq!(RequiredRouting::None.combine(RequiredRouting::GridX), RequiredRouting::GridX);
|
||||
assert_eq!(RequiredRouting::None.combine(RequiredRouting::GridXY), RequiredRouting::GridXY);
|
||||
|
||||
assert_eq!(RequiredRouting::GridY.combine(RequiredRouting::None), RequiredRouting::GridY);
|
||||
assert_eq!(RequiredRouting::GridX.combine(RequiredRouting::None), RequiredRouting::GridX);
|
||||
assert_eq!(RequiredRouting::GridXY.combine(RequiredRouting::None), RequiredRouting::GridXY);
|
||||
|
||||
assert_eq!(
|
||||
RequiredRouting::PendingTopology.combine(RequiredRouting::None),
|
||||
RequiredRouting::PendingTopology
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
RequiredRouting::None.combine(RequiredRouting::PendingTopology),
|
||||
RequiredRouting::PendingTopology
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_random_routing_sample() {
|
||||
// This test is fragile as it relies on a specific ChaCha12Rng
|
||||
// sequence that might be implementation defined even for a static seed
|
||||
let mut rng = dummy_rng();
|
||||
let mut random_routing = RandomRouting { target: 4, sent: 0, sample_rate: 8 };
|
||||
|
||||
assert_eq!(random_routing.sample(16, &mut rng), true);
|
||||
random_routing.inc_sent();
|
||||
assert_eq!(random_routing.sample(16, &mut rng), false);
|
||||
assert_eq!(random_routing.sample(16, &mut rng), false);
|
||||
assert_eq!(random_routing.sample(16, &mut rng), true);
|
||||
random_routing.inc_sent();
|
||||
assert_eq!(random_routing.sample(16, &mut rng), true);
|
||||
random_routing.inc_sent();
|
||||
assert_eq!(random_routing.sample(16, &mut rng), false);
|
||||
assert_eq!(random_routing.sample(16, &mut rng), false);
|
||||
assert_eq!(random_routing.sample(16, &mut rng), false);
|
||||
assert_eq!(random_routing.sample(16, &mut rng), true);
|
||||
random_routing.inc_sent();
|
||||
|
||||
for _ in 0..16 {
|
||||
assert_eq!(random_routing.sample(16, &mut rng), false);
|
||||
}
|
||||
}
|
||||
|
||||
fn run_random_routing(
|
||||
random_routing: &mut RandomRouting,
|
||||
rng: &mut (impl CryptoRng + Rng),
|
||||
npeers: usize,
|
||||
iters: usize,
|
||||
) -> usize {
|
||||
let mut ret = 0_usize;
|
||||
|
||||
for _ in 0..iters {
|
||||
if random_routing.sample(npeers, rng) {
|
||||
random_routing.inc_sent();
|
||||
ret += 1;
|
||||
}
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_random_routing_distribution() {
|
||||
let mut rng = dummy_rng();
|
||||
|
||||
let mut random_routing = RandomRouting { target: 4, sent: 0, sample_rate: 8 };
|
||||
assert_eq!(run_random_routing(&mut random_routing, &mut rng, 100, 10000), 4);
|
||||
|
||||
let mut random_routing = RandomRouting { target: 8, sent: 0, sample_rate: 100 };
|
||||
assert_eq!(run_random_routing(&mut random_routing, &mut rng, 100, 10000), 8);
|
||||
|
||||
let mut random_routing = RandomRouting { target: 0, sent: 0, sample_rate: 100 };
|
||||
assert_eq!(run_random_routing(&mut random_routing, &mut rng, 100, 10000), 0);
|
||||
|
||||
let mut random_routing = RandomRouting { target: 10, sent: 0, sample_rate: 10 };
|
||||
assert_eq!(run_random_routing(&mut random_routing, &mut rng, 10, 100), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_matrix_neighbors() {
|
||||
for (our_index, len, expected_row, expected_column) in vec![
|
||||
(0usize, 1usize, vec![], vec![]),
|
||||
(1, 2, vec![], vec![0usize]),
|
||||
(0, 9, vec![1, 2], vec![3, 6]),
|
||||
(9, 10, vec![], vec![0, 3, 6]),
|
||||
(10, 11, vec![9], vec![1, 4, 7]),
|
||||
(7, 11, vec![6, 8], vec![1, 4, 10]),
|
||||
]
|
||||
.into_iter()
|
||||
{
|
||||
let matrix = matrix_neighbors(our_index, len).unwrap();
|
||||
let mut row_result: Vec<_> = matrix.row_neighbors.collect();
|
||||
let mut column_result: Vec<_> = matrix.column_neighbors.collect();
|
||||
row_result.sort();
|
||||
column_result.sort();
|
||||
|
||||
assert_eq!(row_result, expected_row);
|
||||
assert_eq!(column_result, expected_column);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,773 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Network protocol types for teyrchains.
|
||||
|
||||
#![deny(unused_crate_dependencies)]
|
||||
#![warn(missing_docs)]
|
||||
|
||||
use codec::{Decode, Encode};
|
||||
use pezkuwi_primitives::{BlockNumber, Hash};
|
||||
use std::fmt;
|
||||
|
||||
#[doc(hidden)]
|
||||
pub use sc_network::IfDisconnected;
|
||||
pub use sc_network_types::PeerId;
|
||||
#[doc(hidden)]
|
||||
pub use std::sync::Arc;
|
||||
|
||||
mod reputation;
|
||||
pub use self::reputation::{ReputationChange, UnifiedReputationChange};
|
||||
|
||||
/// Peer-sets and protocols used for teyrchains.
|
||||
pub mod peer_set;
|
||||
|
||||
/// Request/response protocols used in Pezkuwi.
|
||||
pub mod request_response;
|
||||
|
||||
/// Accessing authority discovery service
|
||||
pub mod authority_discovery;
|
||||
/// Grid topology support module
|
||||
pub mod grid_topology;
|
||||
|
||||
/// The minimum amount of peers to send gossip messages to.
|
||||
pub const MIN_GOSSIP_PEERS: usize = 25;
|
||||
|
||||
/// An error indicating that this the over-arching message type had the wrong variant
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct WrongVariant;
|
||||
|
||||
impl fmt::Display for WrongVariant {
|
||||
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(formatter, "Wrong message variant")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for WrongVariant {}
|
||||
|
||||
/// The advertised role of a node.
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum ObservedRole {
|
||||
/// A light node.
|
||||
Light,
|
||||
/// A full node.
|
||||
Full,
|
||||
/// A node claiming to be an authority (unauthenticated)
|
||||
Authority,
|
||||
}
|
||||
|
||||
impl From<sc_network::ObservedRole> for ObservedRole {
|
||||
fn from(role: sc_network::ObservedRole) -> ObservedRole {
|
||||
match role {
|
||||
sc_network::ObservedRole::Light => ObservedRole::Light,
|
||||
sc_network::ObservedRole::Authority => ObservedRole::Authority,
|
||||
sc_network::ObservedRole::Full => ObservedRole::Full,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<sc_network::ObservedRole> for ObservedRole {
|
||||
fn into(self) -> sc_network::ObservedRole {
|
||||
match self {
|
||||
ObservedRole::Light => sc_network::ObservedRole::Light,
|
||||
ObservedRole::Full => sc_network::ObservedRole::Full,
|
||||
ObservedRole::Authority => sc_network::ObservedRole::Authority,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Specialized wrapper around [`View`].
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct OurView {
|
||||
view: View,
|
||||
}
|
||||
|
||||
impl OurView {
|
||||
/// Creates a new instance.
|
||||
pub fn new(heads: impl IntoIterator<Item = Hash>, finalized_number: BlockNumber) -> Self {
|
||||
let view = View::new(heads, finalized_number);
|
||||
Self { view }
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for OurView {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.view == other.view
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for OurView {
|
||||
type Target = View;
|
||||
|
||||
fn deref(&self) -> &View {
|
||||
&self.view
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a new [`OurView`] with the given chain heads, finalized number 0
|
||||
///
|
||||
/// NOTE: Use for tests only.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use pezkuwi_node_network_protocol::our_view;
|
||||
/// # use pezkuwi_primitives::Hash;
|
||||
/// let our_view = our_view![Hash::repeat_byte(1), Hash::repeat_byte(2)];
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! our_view {
|
||||
( $( $hash:expr ),* $(,)? ) => {
|
||||
$crate::OurView::new(
|
||||
vec![ $( $hash.clone() ),* ].into_iter().map(|h| h),
|
||||
0,
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
/// A succinct representation of a peer's view. This consists of a bounded amount of chain heads
|
||||
/// and the highest known finalized block number.
|
||||
///
|
||||
/// Up to `N` (5?) chain heads.
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq, Encode, Decode)]
|
||||
pub struct View {
|
||||
/// A bounded amount of chain heads.
|
||||
/// Invariant: Sorted.
|
||||
heads: Vec<Hash>,
|
||||
/// The highest known finalized block number.
|
||||
pub finalized_number: BlockNumber,
|
||||
}
|
||||
|
||||
/// Construct a new view with the given chain heads and finalized number 0.
|
||||
///
|
||||
/// NOTE: Use for tests only.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use pezkuwi_node_network_protocol::view;
|
||||
/// # use pezkuwi_primitives::Hash;
|
||||
/// let view = view![Hash::repeat_byte(1), Hash::repeat_byte(2)];
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! view {
|
||||
( $( $hash:expr ),* $(,)? ) => {
|
||||
$crate::View::new(vec![ $( $hash.clone() ),* ], 0)
|
||||
};
|
||||
}
|
||||
|
||||
impl View {
|
||||
/// Construct a new view based on heads and a finalized block number.
|
||||
pub fn new(heads: impl IntoIterator<Item = Hash>, finalized_number: BlockNumber) -> Self {
|
||||
let mut heads = heads.into_iter().collect::<Vec<Hash>>();
|
||||
heads.sort();
|
||||
Self { heads, finalized_number }
|
||||
}
|
||||
|
||||
/// Start with no heads, but only a finalized block number.
|
||||
pub fn with_finalized(finalized_number: BlockNumber) -> Self {
|
||||
Self { heads: Vec::new(), finalized_number }
|
||||
}
|
||||
|
||||
/// Obtain the number of heads that are in view.
|
||||
pub fn len(&self) -> usize {
|
||||
self.heads.len()
|
||||
}
|
||||
|
||||
/// Check if the number of heads contained, is null.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.heads.is_empty()
|
||||
}
|
||||
|
||||
/// Obtain an iterator over all heads.
|
||||
pub fn iter(&self) -> impl Iterator<Item = &Hash> {
|
||||
self.heads.iter()
|
||||
}
|
||||
|
||||
/// Obtain an iterator over all heads.
|
||||
pub fn into_iter(self) -> impl Iterator<Item = Hash> {
|
||||
self.heads.into_iter()
|
||||
}
|
||||
|
||||
/// Replace `self` with `new`.
|
||||
///
|
||||
/// Returns an iterator that will yield all elements of `new` that were not part of `self`.
|
||||
pub fn replace_difference(&mut self, new: View) -> impl Iterator<Item = &Hash> {
|
||||
let old = std::mem::replace(self, new);
|
||||
|
||||
self.heads.iter().filter(move |h| !old.contains(h))
|
||||
}
|
||||
|
||||
/// Returns an iterator of the hashes present in `Self` but not in `other`.
|
||||
pub fn difference<'a>(&'a self, other: &'a View) -> impl Iterator<Item = &'a Hash> + 'a {
|
||||
self.heads.iter().filter(move |h| !other.contains(h))
|
||||
}
|
||||
|
||||
/// An iterator containing hashes present in both `Self` and in `other`.
|
||||
pub fn intersection<'a>(&'a self, other: &'a View) -> impl Iterator<Item = &'a Hash> + 'a {
|
||||
self.heads.iter().filter(move |h| other.contains(h))
|
||||
}
|
||||
|
||||
/// Whether the view contains a given hash.
|
||||
pub fn contains(&self, hash: &Hash) -> bool {
|
||||
self.heads.contains(hash)
|
||||
}
|
||||
|
||||
/// Check if two views have the same heads.
|
||||
///
|
||||
/// Equivalent to the `PartialEq` function,
|
||||
/// but ignores the `finalized_number` field.
|
||||
pub fn check_heads_eq(&self, other: &Self) -> bool {
|
||||
self.heads == other.heads
|
||||
}
|
||||
}
|
||||
|
||||
/// A protocol-versioned type for validation.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ValidationProtocols<V3> {
|
||||
/// V3 type.
|
||||
V3(V3),
|
||||
}
|
||||
|
||||
/// A protocol-versioned type for collation.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum CollationProtocols<V1, V2> {
|
||||
/// V1 type.
|
||||
V1(V1),
|
||||
/// V2 type.
|
||||
V2(V2),
|
||||
}
|
||||
|
||||
impl<V3: Clone> ValidationProtocols<&'_ V3> {
|
||||
/// Convert to a fully-owned version of the message.
|
||||
pub fn clone_inner(&self) -> ValidationProtocols<V3> {
|
||||
match *self {
|
||||
ValidationProtocols::V3(inner) => ValidationProtocols::V3(inner.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<V1: Clone, V2: Clone> CollationProtocols<&'_ V1, &'_ V2> {
|
||||
/// Convert to a fully-owned version of the message.
|
||||
pub fn clone_inner(&self) -> CollationProtocols<V1, V2> {
|
||||
match *self {
|
||||
CollationProtocols::V1(inner) => CollationProtocols::V1(inner.clone()),
|
||||
CollationProtocols::V2(inner) => CollationProtocols::V2(inner.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// All supported versions of the validation protocol message.
|
||||
pub type VersionedValidationProtocol = ValidationProtocols<v3::ValidationProtocol>;
|
||||
|
||||
impl From<v3::ValidationProtocol> for VersionedValidationProtocol {
|
||||
fn from(v3: v3::ValidationProtocol) -> Self {
|
||||
VersionedValidationProtocol::V3(v3)
|
||||
}
|
||||
}
|
||||
|
||||
/// All supported versions of the collation protocol message.
|
||||
pub type VersionedCollationProtocol =
|
||||
CollationProtocols<v1::CollationProtocol, v2::CollationProtocol>;
|
||||
|
||||
impl From<v1::CollationProtocol> for VersionedCollationProtocol {
|
||||
fn from(v1: v1::CollationProtocol) -> Self {
|
||||
VersionedCollationProtocol::V1(v1)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<v2::CollationProtocol> for VersionedCollationProtocol {
|
||||
fn from(v2: v2::CollationProtocol) -> Self {
|
||||
VersionedCollationProtocol::V2(v2)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_versioned_validation_full_protocol_from {
|
||||
($from:ty, $out:ty, $variant:ident) => {
|
||||
impl From<$from> for $out {
|
||||
fn from(versioned_from: $from) -> $out {
|
||||
match versioned_from {
|
||||
ValidationProtocols::V3(x) => ValidationProtocols::V3(x.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_versioned_collation_full_protocol_from {
|
||||
($from:ty, $out:ty, $variant:ident) => {
|
||||
impl From<$from> for $out {
|
||||
fn from(versioned_from: $from) -> $out {
|
||||
match versioned_from {
|
||||
CollationProtocols::V1(x) => CollationProtocols::V1(x.into()),
|
||||
CollationProtocols::V2(x) => CollationProtocols::V2(x.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Implement `TryFrom` for one versioned validation enum variant into the inner type.
|
||||
/// `$m_ty::$variant(inner) -> Ok(inner)`
|
||||
macro_rules! impl_versioned_validation_try_from {
|
||||
(
|
||||
$from:ty,
|
||||
$out:ty,
|
||||
$v3_pat:pat => $v3_out:expr
|
||||
) => {
|
||||
impl TryFrom<$from> for $out {
|
||||
type Error = crate::WrongVariant;
|
||||
|
||||
fn try_from(x: $from) -> Result<$out, Self::Error> {
|
||||
#[allow(unreachable_patterns)] // when there is only one variant
|
||||
match x {
|
||||
ValidationProtocols::V3($v3_pat) => Ok(ValidationProtocols::V3($v3_out)),
|
||||
_ => Err(crate::WrongVariant),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<&'a $from> for $out {
|
||||
type Error = crate::WrongVariant;
|
||||
|
||||
fn try_from(x: &'a $from) -> Result<$out, Self::Error> {
|
||||
#[allow(unreachable_patterns)] // when there is only one variant
|
||||
match x {
|
||||
ValidationProtocols::V3($v3_pat) =>
|
||||
Ok(ValidationProtocols::V3($v3_out.clone())),
|
||||
_ => Err(crate::WrongVariant),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Implement `TryFrom` for one versioned collation enum variant into the inner type.
|
||||
/// `$m_ty::$variant(inner) -> Ok(inner)`
|
||||
macro_rules! impl_versioned_collation_try_from {
|
||||
(
|
||||
$from:ty,
|
||||
$out:ty,
|
||||
$v1_pat:pat => $v1_out:expr,
|
||||
$v2_pat:pat => $v2_out:expr
|
||||
) => {
|
||||
impl TryFrom<$from> for $out {
|
||||
type Error = crate::WrongVariant;
|
||||
|
||||
fn try_from(x: $from) -> Result<$out, Self::Error> {
|
||||
#[allow(unreachable_patterns)] // when there is only one variant
|
||||
match x {
|
||||
CollationProtocols::V1($v1_pat) => Ok(CollationProtocols::V1($v1_out)),
|
||||
CollationProtocols::V2($v2_pat) => Ok(CollationProtocols::V2($v2_out)),
|
||||
_ => Err(crate::WrongVariant),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<&'a $from> for $out {
|
||||
type Error = crate::WrongVariant;
|
||||
|
||||
fn try_from(x: &'a $from) -> Result<$out, Self::Error> {
|
||||
#[allow(unreachable_patterns)] // when there is only one variant
|
||||
match x {
|
||||
CollationProtocols::V1($v1_pat) => Ok(CollationProtocols::V1($v1_out.clone())),
|
||||
CollationProtocols::V2($v2_pat) => Ok(CollationProtocols::V2($v2_out.clone())),
|
||||
_ => Err(crate::WrongVariant),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Version-annotated messages used by the bitfield distribution subsystem.
|
||||
pub type BitfieldDistributionMessage = ValidationProtocols<v3::BitfieldDistributionMessage>;
|
||||
impl_versioned_validation_full_protocol_from!(
|
||||
BitfieldDistributionMessage,
|
||||
VersionedValidationProtocol,
|
||||
BitfieldDistribution
|
||||
);
|
||||
impl_versioned_validation_try_from!(
|
||||
VersionedValidationProtocol,
|
||||
BitfieldDistributionMessage,
|
||||
v3::ValidationProtocol::BitfieldDistribution(x) => x
|
||||
);
|
||||
|
||||
/// Version-annotated messages used by the statement distribution subsystem.
|
||||
pub type StatementDistributionMessage = ValidationProtocols<v3::StatementDistributionMessage>;
|
||||
impl_versioned_validation_full_protocol_from!(
|
||||
StatementDistributionMessage,
|
||||
VersionedValidationProtocol,
|
||||
StatementDistribution
|
||||
);
|
||||
impl_versioned_validation_try_from!(
|
||||
VersionedValidationProtocol,
|
||||
StatementDistributionMessage,
|
||||
v3::ValidationProtocol::StatementDistribution(x) => x
|
||||
);
|
||||
|
||||
/// Version-annotated messages used by the approval distribution subsystem.
|
||||
pub type ApprovalDistributionMessage = ValidationProtocols<v3::ApprovalDistributionMessage>;
|
||||
impl_versioned_validation_full_protocol_from!(
|
||||
ApprovalDistributionMessage,
|
||||
VersionedValidationProtocol,
|
||||
ApprovalDistribution
|
||||
);
|
||||
impl_versioned_validation_try_from!(
|
||||
VersionedValidationProtocol,
|
||||
ApprovalDistributionMessage,
|
||||
v3::ValidationProtocol::ApprovalDistribution(x) => x
|
||||
|
||||
);
|
||||
|
||||
/// Version-annotated messages used by the gossip-support subsystem (this is void).
|
||||
pub type GossipSupportNetworkMessage = ValidationProtocols<v3::GossipSupportNetworkMessage>;
|
||||
|
||||
// This is a void enum placeholder, so never gets sent over the wire.
|
||||
impl TryFrom<VersionedValidationProtocol> for GossipSupportNetworkMessage {
|
||||
type Error = WrongVariant;
|
||||
fn try_from(_: VersionedValidationProtocol) -> Result<Self, Self::Error> {
|
||||
Err(WrongVariant)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<&'a VersionedValidationProtocol> for GossipSupportNetworkMessage {
|
||||
type Error = WrongVariant;
|
||||
fn try_from(_: &'a VersionedValidationProtocol) -> Result<Self, Self::Error> {
|
||||
Err(WrongVariant)
|
||||
}
|
||||
}
|
||||
|
||||
/// Version-annotated messages used by the collator protocol subsystem.
|
||||
pub type CollatorProtocolMessage =
|
||||
CollationProtocols<v1::CollatorProtocolMessage, v2::CollatorProtocolMessage>;
|
||||
impl_versioned_collation_full_protocol_from!(
|
||||
CollatorProtocolMessage,
|
||||
VersionedCollationProtocol,
|
||||
CollatorProtocol
|
||||
);
|
||||
impl_versioned_collation_try_from!(
|
||||
VersionedCollationProtocol,
|
||||
CollatorProtocolMessage,
|
||||
v1::CollationProtocol::CollatorProtocol(x) => x,
|
||||
v2::CollationProtocol::CollatorProtocol(x) => x
|
||||
);
|
||||
|
||||
/// v1 notification protocol types.
|
||||
pub mod v1 {
|
||||
use codec::{Decode, Encode};
|
||||
|
||||
use pezkuwi_primitives::{CollatorId, CollatorSignature, Hash, Id as ParaId};
|
||||
|
||||
use pezkuwi_node_primitives::UncheckedSignedFullStatement;
|
||||
|
||||
/// Network messages used by the collator protocol subsystem
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
|
||||
pub enum CollatorProtocolMessage {
|
||||
/// Declare the intent to advertise collations under a collator ID, attaching a
|
||||
/// signature of the `PeerId` of the node using the given collator ID key.
|
||||
#[codec(index = 0)]
|
||||
Declare(CollatorId, ParaId, CollatorSignature),
|
||||
/// Advertise a collation to a validator. Can only be sent once the peer has
|
||||
/// declared that they are a collator with given ID.
|
||||
#[codec(index = 1)]
|
||||
AdvertiseCollation(Hash),
|
||||
/// A collation sent to a validator was seconded.
|
||||
#[codec(index = 4)]
|
||||
CollationSeconded(Hash, UncheckedSignedFullStatement),
|
||||
}
|
||||
|
||||
/// All network messages on the collation peer-set.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq, derive_more::From)]
|
||||
pub enum CollationProtocol {
|
||||
/// Collator protocol messages
|
||||
#[codec(index = 0)]
|
||||
#[from]
|
||||
CollatorProtocol(CollatorProtocolMessage),
|
||||
}
|
||||
|
||||
/// Get the payload that should be signed and included in a `Declare` message.
|
||||
///
|
||||
/// The payload is the local peer id of the node, which serves to prove that it
|
||||
/// controls the collator key it is declaring an intention to collate under.
|
||||
pub fn declare_signature_payload(peer_id: &sc_network_types::PeerId) -> Vec<u8> {
|
||||
let mut payload = peer_id.to_bytes();
|
||||
payload.extend_from_slice(b"COLL");
|
||||
payload
|
||||
}
|
||||
}
|
||||
|
||||
/// v2 network protocol types.
|
||||
pub mod v2 {
|
||||
use codec::{Decode, Encode};
|
||||
|
||||
use pezkuwi_primitives::{CandidateHash, CollatorId, CollatorSignature, Hash, Id as ParaId};
|
||||
|
||||
use pezkuwi_node_primitives::UncheckedSignedFullStatement;
|
||||
|
||||
/// This parts of the protocol did not change from v1, so just alias them in v2.
|
||||
pub use super::v1::declare_signature_payload;
|
||||
|
||||
/// Network messages used by the collator protocol subsystem
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
|
||||
pub enum CollatorProtocolMessage {
|
||||
/// Declare the intent to advertise collations under a collator ID, attaching a
|
||||
/// signature of the `PeerId` of the node using the given collator ID key.
|
||||
#[codec(index = 0)]
|
||||
Declare(CollatorId, ParaId, CollatorSignature),
|
||||
/// Advertise a collation to a validator. Can only be sent once the peer has
|
||||
/// declared that they are a collator with given ID.
|
||||
#[codec(index = 1)]
|
||||
AdvertiseCollation {
|
||||
/// Hash of the relay parent advertised collation is based on.
|
||||
relay_parent: Hash,
|
||||
/// Candidate hash.
|
||||
candidate_hash: CandidateHash,
|
||||
/// Teyrchain head data hash before candidate execution.
|
||||
parent_head_data_hash: Hash,
|
||||
},
|
||||
/// A collation sent to a validator was seconded.
|
||||
#[codec(index = 4)]
|
||||
CollationSeconded(Hash, UncheckedSignedFullStatement),
|
||||
}
|
||||
|
||||
/// All network messages on the collation peer-set.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq, derive_more::From)]
|
||||
pub enum CollationProtocol {
|
||||
/// Collator protocol messages
|
||||
#[codec(index = 0)]
|
||||
#[from]
|
||||
CollatorProtocol(CollatorProtocolMessage),
|
||||
}
|
||||
}
|
||||
|
||||
/// v3 network protocol types.
|
||||
/// Purpose is for changing ApprovalDistributionMessage to
|
||||
/// include more than one assignment and approval in a message.
|
||||
pub mod v3 {
|
||||
use bitvec::{order::Lsb0, slice::BitSlice, vec::BitVec};
|
||||
use codec::{Decode, Encode};
|
||||
|
||||
use pezkuwi_primitives::{
|
||||
CandidateHash, GroupIndex, Hash, Id as ParaId, UncheckedSignedAvailabilityBitfield,
|
||||
UncheckedSignedStatement,
|
||||
};
|
||||
|
||||
use pezkuwi_node_primitives::approval::v2::{
|
||||
CandidateBitfield, IndirectAssignmentCertV2, IndirectSignedApprovalVoteV2,
|
||||
};
|
||||
|
||||
/// This parts of the protocol did not change from v2, so just alias them in v3.
|
||||
pub use super::v2::declare_signature_payload;
|
||||
|
||||
/// Network messages used by the bitfield distribution subsystem.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
|
||||
pub enum BitfieldDistributionMessage {
|
||||
/// A signed availability bitfield for a given relay-parent hash.
|
||||
#[codec(index = 0)]
|
||||
Bitfield(Hash, UncheckedSignedAvailabilityBitfield),
|
||||
}
|
||||
|
||||
/// Bitfields indicating the statements that are known or undesired
|
||||
/// about a candidate.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
|
||||
pub struct StatementFilter {
|
||||
/// Seconded statements. '1' is known or undesired.
|
||||
pub seconded_in_group: BitVec<u8, Lsb0>,
|
||||
/// Valid statements. '1' is known or undesired.
|
||||
pub validated_in_group: BitVec<u8, Lsb0>,
|
||||
}
|
||||
|
||||
impl StatementFilter {
|
||||
/// Create a new blank filter with the given group size.
|
||||
pub fn blank(group_size: usize) -> Self {
|
||||
StatementFilter {
|
||||
seconded_in_group: BitVec::repeat(false, group_size),
|
||||
validated_in_group: BitVec::repeat(false, group_size),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new full filter with the given group size.
|
||||
pub fn full(group_size: usize) -> Self {
|
||||
StatementFilter {
|
||||
seconded_in_group: BitVec::repeat(true, group_size),
|
||||
validated_in_group: BitVec::repeat(true, group_size),
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether the filter has a specific expected length, consistent across both
|
||||
/// bitfields.
|
||||
pub fn has_len(&self, len: usize) -> bool {
|
||||
self.seconded_in_group.len() == len && self.validated_in_group.len() == len
|
||||
}
|
||||
|
||||
/// Determine the number of backing validators in the statement filter.
|
||||
pub fn backing_validators(&self) -> usize {
|
||||
self.seconded_in_group
|
||||
.iter()
|
||||
.by_vals()
|
||||
.zip(self.validated_in_group.iter().by_vals())
|
||||
.filter(|&(s, v)| s || v) // no double-counting
|
||||
.count()
|
||||
}
|
||||
|
||||
/// Whether the statement filter has at least one seconded statement.
|
||||
pub fn has_seconded(&self) -> bool {
|
||||
self.seconded_in_group.iter().by_vals().any(|x| x)
|
||||
}
|
||||
|
||||
/// Mask out `Seconded` statements in `self` according to the provided
|
||||
/// bitvec. Bits appearing in `mask` will not appear in `self` afterwards.
|
||||
pub fn mask_seconded(&mut self, mask: &BitSlice<u8, Lsb0>) {
|
||||
for (mut x, mask) in self
|
||||
.seconded_in_group
|
||||
.iter_mut()
|
||||
.zip(mask.iter().by_vals().chain(std::iter::repeat(false)))
|
||||
{
|
||||
// (x, mask) => x
|
||||
// (true, true) => false
|
||||
// (true, false) => true
|
||||
// (false, true) => false
|
||||
// (false, false) => false
|
||||
*x = *x && !mask;
|
||||
}
|
||||
}
|
||||
|
||||
/// Mask out `Valid` statements in `self` according to the provided
|
||||
/// bitvec. Bits appearing in `mask` will not appear in `self` afterwards.
|
||||
pub fn mask_valid(&mut self, mask: &BitSlice<u8, Lsb0>) {
|
||||
for (mut x, mask) in self
|
||||
.validated_in_group
|
||||
.iter_mut()
|
||||
.zip(mask.iter().by_vals().chain(std::iter::repeat(false)))
|
||||
{
|
||||
// (x, mask) => x
|
||||
// (true, true) => false
|
||||
// (true, false) => true
|
||||
// (false, true) => false
|
||||
// (false, false) => false
|
||||
*x = *x && !mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A manifest of a known backed candidate, along with a description
|
||||
/// of the statements backing it.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
|
||||
pub struct BackedCandidateManifest {
|
||||
/// The relay-parent of the candidate.
|
||||
pub relay_parent: Hash,
|
||||
/// The hash of the candidate.
|
||||
pub candidate_hash: CandidateHash,
|
||||
/// The group index backing the candidate at the relay-parent.
|
||||
pub group_index: GroupIndex,
|
||||
/// The para ID of the candidate. It is illegal for this to
|
||||
/// be a para ID which is not assigned to the group indicated
|
||||
/// in this manifest.
|
||||
pub para_id: ParaId,
|
||||
/// The head-data corresponding to the candidate.
|
||||
pub parent_head_data_hash: Hash,
|
||||
/// A statement filter which indicates which validators in the
|
||||
/// para's group at the relay-parent have validated this candidate
|
||||
/// and issued statements about it, to the advertiser's knowledge.
|
||||
///
|
||||
/// This MUST have exactly the minimum amount of bytes
|
||||
/// necessary to represent the number of validators in the assigned
|
||||
/// backing group as-of the relay-parent.
|
||||
pub statement_knowledge: StatementFilter,
|
||||
}
|
||||
|
||||
/// An acknowledgement of a backed candidate being known.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
|
||||
pub struct BackedCandidateAcknowledgement {
|
||||
/// The hash of the candidate.
|
||||
pub candidate_hash: CandidateHash,
|
||||
/// A statement filter which indicates which validators in the
|
||||
/// para's group at the relay-parent have validated this candidate
|
||||
/// and issued statements about it, to the advertiser's knowledge.
|
||||
///
|
||||
/// This MUST have exactly the minimum amount of bytes
|
||||
/// necessary to represent the number of validators in the assigned
|
||||
/// backing group as-of the relay-parent.
|
||||
pub statement_knowledge: StatementFilter,
|
||||
}
|
||||
|
||||
/// Network messages used by the statement distribution subsystem.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
|
||||
pub enum StatementDistributionMessage {
|
||||
/// A notification of a signed statement in compact form, for a given relay parent.
|
||||
#[codec(index = 0)]
|
||||
Statement(Hash, UncheckedSignedStatement),
|
||||
|
||||
/// A notification of a backed candidate being known by the
|
||||
/// sending node, for the purpose of being requested by the receiving node
|
||||
/// if needed.
|
||||
#[codec(index = 1)]
|
||||
BackedCandidateManifest(BackedCandidateManifest),
|
||||
|
||||
/// A notification of a backed candidate being known by the sending node,
|
||||
/// for the purpose of informing a receiving node which already has the candidate.
|
||||
#[codec(index = 2)]
|
||||
BackedCandidateKnown(BackedCandidateAcknowledgement),
|
||||
}
|
||||
|
||||
/// Network messages used by the approval distribution subsystem.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
|
||||
pub enum ApprovalDistributionMessage {
|
||||
/// Assignments for candidates in recent, unfinalized blocks.
|
||||
/// We use a bitfield to reference claimed candidates, where the bit index is equal to
|
||||
/// candidate index.
|
||||
///
|
||||
/// Actually checking the assignment may yield a different result.
|
||||
///
|
||||
/// TODO at next protocol upgrade opportunity:
|
||||
/// - remove redundancy `candidate_index` vs `core_index`
|
||||
/// - `<https://github.com/pezkuwichain/pezkuwi-sdk/issues/106>`
|
||||
#[codec(index = 0)]
|
||||
Assignments(Vec<(IndirectAssignmentCertV2, CandidateBitfield)>),
|
||||
/// Approvals for candidates in some recent, unfinalized block.
|
||||
#[codec(index = 1)]
|
||||
Approvals(Vec<IndirectSignedApprovalVoteV2>),
|
||||
}
|
||||
|
||||
/// Dummy network message type, so we will receive connect/disconnect events.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum GossipSupportNetworkMessage {}
|
||||
|
||||
/// All network messages on the validation peer-set.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq, derive_more::From)]
|
||||
pub enum ValidationProtocol {
|
||||
/// Bitfield distribution messages
|
||||
#[codec(index = 1)]
|
||||
#[from]
|
||||
BitfieldDistribution(BitfieldDistributionMessage),
|
||||
/// Statement distribution messages
|
||||
#[codec(index = 3)]
|
||||
#[from]
|
||||
StatementDistribution(StatementDistributionMessage),
|
||||
/// Approval distribution messages
|
||||
#[codec(index = 4)]
|
||||
#[from]
|
||||
ApprovalDistribution(ApprovalDistributionMessage),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the subset of `peers` with the specified `version`.
|
||||
pub fn filter_by_peer_version(
|
||||
peers: &[(PeerId, peer_set::ProtocolVersion)],
|
||||
version: peer_set::ProtocolVersion,
|
||||
) -> Vec<PeerId> {
|
||||
peers.iter().filter(|(_, v)| v == &version).map(|(p, _)| *p).collect::<Vec<_>>()
|
||||
}
|
||||
@@ -0,0 +1,616 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! All peersets and protocols used for teyrchains.
|
||||
|
||||
use derive_more::Display;
|
||||
use pezkuwi_primitives::Hash;
|
||||
use sc_network::{
|
||||
config::SetConfig, peer_store::PeerStoreProvider, service::NotificationMetrics,
|
||||
types::ProtocolName, NetworkBackend, NotificationService,
|
||||
};
|
||||
use sp_runtime::traits::Block;
|
||||
use std::{
|
||||
collections::{hash_map::Entry, HashMap},
|
||||
ops::{Index, IndexMut},
|
||||
sync::Arc,
|
||||
};
|
||||
use strum::{EnumIter, IntoEnumIterator};
|
||||
|
||||
/// The legacy collation protocol name. Only supported on version = 1.
|
||||
const LEGACY_COLLATION_PROTOCOL_V1: &str = "/pezkuwi/collation/1";
|
||||
|
||||
/// The legacy protocol version. Is always 1 for collation.
|
||||
const LEGACY_COLLATION_PROTOCOL_VERSION_V1: u32 = 1;
|
||||
|
||||
/// Max notification size is currently constant.
|
||||
pub const MAX_NOTIFICATION_SIZE: u64 = 100 * 1024;
|
||||
|
||||
/// Maximum allowed incoming connection streams for validator nodes on the collation protocol.
|
||||
pub const MAX_AUTHORITY_INCOMING_STREAMS: u32 = 100;
|
||||
|
||||
/// The peer-sets and thus the protocols which are used for the network.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)]
|
||||
pub enum PeerSet {
|
||||
/// The validation peer-set is responsible for all messages related to candidate validation and
|
||||
/// communication among validators.
|
||||
Validation,
|
||||
/// The collation peer-set is used for validator<>collator communication.
|
||||
Collation,
|
||||
}
|
||||
|
||||
/// Whether a node is an authority or not.
|
||||
///
|
||||
/// Peer set configuration gets adjusted accordingly.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum IsAuthority {
|
||||
/// Node is authority.
|
||||
Yes,
|
||||
/// Node is not an authority.
|
||||
No,
|
||||
}
|
||||
|
||||
impl PeerSet {
|
||||
/// Get `sc_network` peer set configurations for each peerset on the default version.
|
||||
///
|
||||
/// Those should be used in the network configuration to register the protocols with the
|
||||
/// network service.
|
||||
pub fn get_info<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
|
||||
self,
|
||||
is_authority: IsAuthority,
|
||||
peerset_protocol_names: &PeerSetProtocolNames,
|
||||
metrics: NotificationMetrics,
|
||||
peer_store_handle: Arc<dyn PeerStoreProvider>,
|
||||
) -> (N::NotificationProtocolConfig, (PeerSet, Box<dyn NotificationService>)) {
|
||||
// Networking layer relies on `get_main_name()` being the main name of the protocol
|
||||
// for peersets and connection management.
|
||||
let protocol = peerset_protocol_names.get_main_name(self);
|
||||
let fallback_names = PeerSetProtocolNames::get_fallback_names(
|
||||
self,
|
||||
&peerset_protocol_names.genesis_hash,
|
||||
peerset_protocol_names.fork_id.as_deref(),
|
||||
);
|
||||
let max_notification_size = self.get_max_notification_size(is_authority);
|
||||
|
||||
match self {
|
||||
PeerSet::Validation => {
|
||||
let (config, notification_service) = N::notification_config(
|
||||
protocol,
|
||||
fallback_names,
|
||||
max_notification_size,
|
||||
None,
|
||||
SetConfig {
|
||||
// we allow full nodes to connect to validators for gossip
|
||||
// to ensure any `MIN_GOSSIP_PEERS` always include reserved peers
|
||||
// we limit the amount of non-reserved slots to be less
|
||||
// than `MIN_GOSSIP_PEERS` in total
|
||||
in_peers: super::MIN_GOSSIP_PEERS as u32 / 2 - 1,
|
||||
out_peers: super::MIN_GOSSIP_PEERS as u32 / 2 - 1,
|
||||
reserved_nodes: Vec::new(),
|
||||
non_reserved_mode: sc_network::config::NonReservedPeerMode::Accept,
|
||||
},
|
||||
metrics,
|
||||
peer_store_handle,
|
||||
);
|
||||
|
||||
(config, (PeerSet::Validation, notification_service))
|
||||
},
|
||||
PeerSet::Collation => {
|
||||
let (config, notification_service) = N::notification_config(
|
||||
protocol,
|
||||
fallback_names,
|
||||
max_notification_size,
|
||||
None,
|
||||
SetConfig {
|
||||
// Non-authority nodes don't need to accept incoming connections on this
|
||||
// peer set:
|
||||
in_peers: if is_authority == IsAuthority::Yes {
|
||||
MAX_AUTHORITY_INCOMING_STREAMS
|
||||
} else {
|
||||
0
|
||||
},
|
||||
out_peers: 0,
|
||||
reserved_nodes: Vec::new(),
|
||||
non_reserved_mode: if is_authority == IsAuthority::Yes {
|
||||
sc_network::config::NonReservedPeerMode::Accept
|
||||
} else {
|
||||
sc_network::config::NonReservedPeerMode::Deny
|
||||
},
|
||||
},
|
||||
metrics,
|
||||
peer_store_handle,
|
||||
);
|
||||
|
||||
(config, (PeerSet::Collation, notification_service))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the main protocol version for this peer set.
|
||||
///
|
||||
/// Networking layer relies on `get_main_version()` being the version
|
||||
/// of the main protocol name reported by [`PeerSetProtocolNames::get_main_name()`].
|
||||
pub fn get_main_version(self) -> ProtocolVersion {
|
||||
match self {
|
||||
PeerSet::Validation => ValidationVersion::V3.into(),
|
||||
PeerSet::Collation => CollationVersion::V2.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the max notification size for this peer set.
|
||||
pub fn get_max_notification_size(self, _: IsAuthority) -> u64 {
|
||||
MAX_NOTIFICATION_SIZE
|
||||
}
|
||||
|
||||
/// Get the peer set label for metrics reporting.
|
||||
pub fn get_label(self) -> &'static str {
|
||||
match self {
|
||||
PeerSet::Validation => "validation",
|
||||
PeerSet::Collation => "collation",
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the protocol label for metrics reporting.
|
||||
pub fn get_protocol_label(self, version: ProtocolVersion) -> Option<&'static str> {
|
||||
// Unfortunately, labels must be static strings, so we must manually cover them
|
||||
// for all protocol versions here.
|
||||
match self {
|
||||
PeerSet::Validation =>
|
||||
if version == ValidationVersion::V3.into() {
|
||||
Some("validation/3")
|
||||
} else {
|
||||
None
|
||||
},
|
||||
PeerSet::Collation =>
|
||||
if version == CollationVersion::V1.into() {
|
||||
Some("collation/1")
|
||||
} else if version == CollationVersion::V2.into() {
|
||||
Some("collation/2")
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A small and nifty collection that allows to store data pertaining to each peer set.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct PerPeerSet<T> {
|
||||
validation: T,
|
||||
collation: T,
|
||||
}
|
||||
|
||||
impl<T> Index<PeerSet> for PerPeerSet<T> {
|
||||
type Output = T;
|
||||
fn index(&self, index: PeerSet) -> &T {
|
||||
match index {
|
||||
PeerSet::Validation => &self.validation,
|
||||
PeerSet::Collation => &self.collation,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IndexMut<PeerSet> for PerPeerSet<T> {
|
||||
fn index_mut(&mut self, index: PeerSet) -> &mut T {
|
||||
match index {
|
||||
PeerSet::Validation => &mut self.validation,
|
||||
PeerSet::Collation => &mut self.collation,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get `NonDefaultSetConfig`s for all available peer sets, at their default versions.
|
||||
///
|
||||
/// Should be used during network configuration (added to `NetworkConfiguration::extra_sets`)
|
||||
/// or shortly after startup to register the protocols with the network service.
|
||||
pub fn peer_sets_info<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
|
||||
is_authority: IsAuthority,
|
||||
peerset_protocol_names: &PeerSetProtocolNames,
|
||||
metrics: NotificationMetrics,
|
||||
peer_store_handle: Arc<dyn PeerStoreProvider>,
|
||||
) -> Vec<(N::NotificationProtocolConfig, (PeerSet, Box<dyn NotificationService>))> {
|
||||
PeerSet::iter()
|
||||
.map(|s| {
|
||||
s.get_info::<B, N>(
|
||||
is_authority,
|
||||
&peerset_protocol_names,
|
||||
metrics.clone(),
|
||||
Arc::clone(&peer_store_handle),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// A generic version of the protocol. This struct must not be created directly.
|
||||
#[derive(Debug, Clone, Copy, Display, PartialEq, Eq, Hash)]
|
||||
pub struct ProtocolVersion(u32);
|
||||
|
||||
impl From<ProtocolVersion> for u32 {
|
||||
fn from(version: ProtocolVersion) -> u32 {
|
||||
version.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Supported validation protocol versions. Only versions defined here must be used in the codebase.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)]
|
||||
pub enum ValidationVersion {
|
||||
/// The third version.
|
||||
V3 = 3,
|
||||
}
|
||||
|
||||
/// Supported collation protocol versions. Only versions defined here must be used in the codebase.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)]
|
||||
pub enum CollationVersion {
|
||||
/// The first version.
|
||||
V1 = 1,
|
||||
/// The second version.
|
||||
V2 = 2,
|
||||
}
|
||||
|
||||
/// Marker indicating the version is unknown.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct UnknownVersion;
|
||||
|
||||
impl TryFrom<ProtocolVersion> for ValidationVersion {
|
||||
type Error = UnknownVersion;
|
||||
|
||||
fn try_from(p: ProtocolVersion) -> Result<Self, UnknownVersion> {
|
||||
for v in Self::iter() {
|
||||
if v as u32 == p.0 {
|
||||
return Ok(v);
|
||||
}
|
||||
}
|
||||
|
||||
Err(UnknownVersion)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<ProtocolVersion> for CollationVersion {
|
||||
type Error = UnknownVersion;
|
||||
|
||||
fn try_from(p: ProtocolVersion) -> Result<Self, UnknownVersion> {
|
||||
for v in Self::iter() {
|
||||
if v as u32 == p.0 {
|
||||
return Ok(v);
|
||||
}
|
||||
}
|
||||
|
||||
Err(UnknownVersion)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ValidationVersion> for ProtocolVersion {
|
||||
fn from(version: ValidationVersion) -> ProtocolVersion {
|
||||
ProtocolVersion(version as u32)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CollationVersion> for ProtocolVersion {
|
||||
fn from(version: CollationVersion) -> ProtocolVersion {
|
||||
ProtocolVersion(version as u32)
|
||||
}
|
||||
}
|
||||
|
||||
/// On the wire protocol name to [`PeerSet`] mapping.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PeerSetProtocolNames {
|
||||
protocols: HashMap<ProtocolName, (PeerSet, ProtocolVersion)>,
|
||||
names: HashMap<(PeerSet, ProtocolVersion), ProtocolName>,
|
||||
genesis_hash: Hash,
|
||||
fork_id: Option<String>,
|
||||
}
|
||||
|
||||
impl PeerSetProtocolNames {
|
||||
/// Construct [`PeerSetProtocolNames`] using `genesis_hash` and `fork_id`.
|
||||
pub fn new(genesis_hash: Hash, fork_id: Option<&str>) -> Self {
|
||||
let mut protocols = HashMap::new();
|
||||
let mut names = HashMap::new();
|
||||
for protocol in PeerSet::iter() {
|
||||
match protocol {
|
||||
PeerSet::Validation =>
|
||||
for version in ValidationVersion::iter() {
|
||||
Self::register_main_protocol(
|
||||
&mut protocols,
|
||||
&mut names,
|
||||
protocol,
|
||||
version.into(),
|
||||
&genesis_hash,
|
||||
fork_id,
|
||||
);
|
||||
},
|
||||
PeerSet::Collation => {
|
||||
for version in CollationVersion::iter() {
|
||||
Self::register_main_protocol(
|
||||
&mut protocols,
|
||||
&mut names,
|
||||
protocol,
|
||||
version.into(),
|
||||
&genesis_hash,
|
||||
fork_id,
|
||||
);
|
||||
}
|
||||
Self::register_legacy_collation_protocol(&mut protocols, protocol);
|
||||
},
|
||||
}
|
||||
}
|
||||
Self { protocols, names, genesis_hash, fork_id: fork_id.map(|fork_id| fork_id.into()) }
|
||||
}
|
||||
|
||||
/// Helper function to register main protocol.
|
||||
fn register_main_protocol(
|
||||
protocols: &mut HashMap<ProtocolName, (PeerSet, ProtocolVersion)>,
|
||||
names: &mut HashMap<(PeerSet, ProtocolVersion), ProtocolName>,
|
||||
protocol: PeerSet,
|
||||
version: ProtocolVersion,
|
||||
genesis_hash: &Hash,
|
||||
fork_id: Option<&str>,
|
||||
) {
|
||||
let protocol_name = Self::generate_name(genesis_hash, fork_id, protocol, version);
|
||||
names.insert((protocol, version), protocol_name.clone());
|
||||
Self::insert_protocol_or_panic(protocols, protocol_name, protocol, version);
|
||||
}
|
||||
|
||||
/// Helper function to register legacy collation protocol.
|
||||
fn register_legacy_collation_protocol(
|
||||
protocols: &mut HashMap<ProtocolName, (PeerSet, ProtocolVersion)>,
|
||||
protocol: PeerSet,
|
||||
) {
|
||||
Self::insert_protocol_or_panic(
|
||||
protocols,
|
||||
LEGACY_COLLATION_PROTOCOL_V1.into(),
|
||||
protocol,
|
||||
ProtocolVersion(LEGACY_COLLATION_PROTOCOL_VERSION_V1),
|
||||
)
|
||||
}
|
||||
|
||||
/// Helper function to make sure no protocols have the same name.
|
||||
fn insert_protocol_or_panic(
|
||||
protocols: &mut HashMap<ProtocolName, (PeerSet, ProtocolVersion)>,
|
||||
name: ProtocolName,
|
||||
protocol: PeerSet,
|
||||
version: ProtocolVersion,
|
||||
) {
|
||||
match protocols.entry(name) {
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert((protocol, version));
|
||||
},
|
||||
Entry::Occupied(entry) => {
|
||||
panic!(
|
||||
"Protocol {:?} (version {}) has the same on-the-wire name as protocol {:?} (version {}): `{}`.",
|
||||
protocol,
|
||||
version,
|
||||
entry.get().0,
|
||||
entry.get().1,
|
||||
entry.key(),
|
||||
);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Lookup the protocol using its on the wire name.
|
||||
pub fn try_get_protocol(&self, name: &ProtocolName) -> Option<(PeerSet, ProtocolVersion)> {
|
||||
self.protocols.get(name).map(ToOwned::to_owned)
|
||||
}
|
||||
|
||||
/// Get the main protocol name. It's used by the networking for keeping track
|
||||
/// of peersets and connections.
|
||||
pub fn get_main_name(&self, protocol: PeerSet) -> ProtocolName {
|
||||
self.get_name(protocol, protocol.get_main_version())
|
||||
}
|
||||
|
||||
/// Get the protocol name for specific version.
|
||||
pub fn get_name(&self, protocol: PeerSet, version: ProtocolVersion) -> ProtocolName {
|
||||
self.names
|
||||
.get(&(protocol, version))
|
||||
.expect("Protocols & versions are specified via enums defined above, and they are all registered in `new()`; qed")
|
||||
.clone()
|
||||
}
|
||||
|
||||
/// The protocol name of this protocol based on `genesis_hash` and `fork_id`.
|
||||
fn generate_name(
|
||||
genesis_hash: &Hash,
|
||||
fork_id: Option<&str>,
|
||||
protocol: PeerSet,
|
||||
version: ProtocolVersion,
|
||||
) -> ProtocolName {
|
||||
let prefix = if let Some(fork_id) = fork_id {
|
||||
format!("/{}/{}", hex::encode(genesis_hash), fork_id)
|
||||
} else {
|
||||
format!("/{}", hex::encode(genesis_hash))
|
||||
};
|
||||
|
||||
let short_name = match protocol {
|
||||
PeerSet::Validation => "validation",
|
||||
PeerSet::Collation => "collation",
|
||||
};
|
||||
|
||||
format!("{}/{}/{}", prefix, short_name, version).into()
|
||||
}
|
||||
|
||||
/// Get the protocol fallback names. Currently, it only holds
|
||||
/// the legacy name for the collation protocol version 1.
|
||||
fn get_fallback_names(
|
||||
protocol: PeerSet,
|
||||
_genesis_hash: &Hash,
|
||||
_fork_id: Option<&str>,
|
||||
) -> Vec<ProtocolName> {
|
||||
let mut fallbacks = vec![];
|
||||
match protocol {
|
||||
PeerSet::Validation => {
|
||||
// The validation protocol no longer supports protocol versions 1 and 2,
|
||||
// and only version 3 is used. Therefore, fallback protocols remain empty.
|
||||
},
|
||||
PeerSet::Collation => {
|
||||
fallbacks.push(LEGACY_COLLATION_PROTOCOL_V1.into());
|
||||
},
|
||||
};
|
||||
fallbacks
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
CollationVersion, Hash, PeerSet, PeerSetProtocolNames, ProtocolVersion, ValidationVersion,
|
||||
};
|
||||
use strum::IntoEnumIterator;
|
||||
|
||||
struct TestVersion(u32);
|
||||
|
||||
impl From<TestVersion> for ProtocolVersion {
|
||||
fn from(version: TestVersion) -> ProtocolVersion {
|
||||
ProtocolVersion(version.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn protocol_names_are_correctly_generated() {
|
||||
let genesis_hash = Hash::from([
|
||||
122, 200, 116, 29, 232, 183, 20, 109, 138, 86, 23, 253, 70, 41, 20, 85, 127, 230, 60,
|
||||
38, 90, 127, 28, 16, 231, 218, 227, 40, 88, 238, 187, 128,
|
||||
]);
|
||||
let name = PeerSetProtocolNames::generate_name(
|
||||
&genesis_hash,
|
||||
None,
|
||||
PeerSet::Validation,
|
||||
TestVersion(3).into(),
|
||||
);
|
||||
let expected =
|
||||
"/7ac8741de8b7146d8a5617fd462914557fe63c265a7f1c10e7dae32858eebb80/validation/3";
|
||||
assert_eq!(name, expected.into());
|
||||
|
||||
let name = PeerSetProtocolNames::generate_name(
|
||||
&genesis_hash,
|
||||
None,
|
||||
PeerSet::Collation,
|
||||
TestVersion(5).into(),
|
||||
);
|
||||
let expected =
|
||||
"/7ac8741de8b7146d8a5617fd462914557fe63c265a7f1c10e7dae32858eebb80/collation/5";
|
||||
assert_eq!(name, expected.into());
|
||||
|
||||
let fork_id = Some("test-fork");
|
||||
let name = PeerSetProtocolNames::generate_name(
|
||||
&genesis_hash,
|
||||
fork_id,
|
||||
PeerSet::Validation,
|
||||
TestVersion(7).into(),
|
||||
);
|
||||
let expected =
|
||||
"/7ac8741de8b7146d8a5617fd462914557fe63c265a7f1c10e7dae32858eebb80/test-fork/validation/7";
|
||||
assert_eq!(name, expected.into());
|
||||
|
||||
let name = PeerSetProtocolNames::generate_name(
|
||||
&genesis_hash,
|
||||
fork_id,
|
||||
PeerSet::Collation,
|
||||
TestVersion(11).into(),
|
||||
);
|
||||
let expected =
|
||||
"/7ac8741de8b7146d8a5617fd462914557fe63c265a7f1c10e7dae32858eebb80/test-fork/collation/11";
|
||||
assert_eq!(name, expected.into());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_protocol_names_are_known() {
|
||||
let genesis_hash = Hash::from([
|
||||
122, 200, 116, 29, 232, 183, 20, 109, 138, 86, 23, 253, 70, 41, 20, 85, 127, 230, 60,
|
||||
38, 90, 127, 28, 16, 231, 218, 227, 40, 88, 238, 187, 128,
|
||||
]);
|
||||
let protocol_names = PeerSetProtocolNames::new(genesis_hash, None);
|
||||
|
||||
let validation_main =
|
||||
"/7ac8741de8b7146d8a5617fd462914557fe63c265a7f1c10e7dae32858eebb80/validation/3";
|
||||
assert_eq!(
|
||||
protocol_names.try_get_protocol(&validation_main.into()),
|
||||
Some((PeerSet::Validation, TestVersion(3).into())),
|
||||
);
|
||||
|
||||
let validation_legacy = "/pezkuwi/validation/1";
|
||||
assert!(protocol_names.try_get_protocol(&validation_legacy.into()).is_none());
|
||||
|
||||
let collation_main =
|
||||
"/7ac8741de8b7146d8a5617fd462914557fe63c265a7f1c10e7dae32858eebb80/collation/1";
|
||||
assert_eq!(
|
||||
protocol_names.try_get_protocol(&collation_main.into()),
|
||||
Some((PeerSet::Collation, TestVersion(1).into())),
|
||||
);
|
||||
|
||||
let collation_legacy = "/pezkuwi/collation/1";
|
||||
assert_eq!(
|
||||
protocol_names.try_get_protocol(&collation_legacy.into()),
|
||||
Some((PeerSet::Collation, TestVersion(1).into())),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_protocol_versions_are_registered() {
|
||||
let genesis_hash = Hash::from([
|
||||
122, 200, 116, 29, 232, 183, 20, 109, 138, 86, 23, 253, 70, 41, 20, 85, 127, 230, 60,
|
||||
38, 90, 127, 28, 16, 231, 218, 227, 40, 88, 238, 187, 128,
|
||||
]);
|
||||
let protocol_names = PeerSetProtocolNames::new(genesis_hash, None);
|
||||
|
||||
for protocol in PeerSet::iter() {
|
||||
match protocol {
|
||||
PeerSet::Validation =>
|
||||
for version in ValidationVersion::iter() {
|
||||
assert_eq!(
|
||||
protocol_names.get_name(protocol, version.into()),
|
||||
PeerSetProtocolNames::generate_name(
|
||||
&genesis_hash,
|
||||
None,
|
||||
protocol,
|
||||
version.into(),
|
||||
),
|
||||
);
|
||||
},
|
||||
PeerSet::Collation =>
|
||||
for version in CollationVersion::iter() {
|
||||
assert_eq!(
|
||||
protocol_names.get_name(protocol, version.into()),
|
||||
PeerSetProtocolNames::generate_name(
|
||||
&genesis_hash,
|
||||
None,
|
||||
protocol,
|
||||
version.into(),
|
||||
),
|
||||
);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_protocol_versions_have_labels() {
|
||||
for protocol in PeerSet::iter() {
|
||||
match protocol {
|
||||
PeerSet::Validation =>
|
||||
for version in ValidationVersion::iter() {
|
||||
protocol
|
||||
.get_protocol_label(version.into())
|
||||
.expect("All validation protocol versions must have a label.");
|
||||
},
|
||||
PeerSet::Collation =>
|
||||
for version in CollationVersion::iter() {
|
||||
protocol
|
||||
.get_protocol_label(version.into())
|
||||
.expect("All collation protocol versions must have a label.");
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
pub use sc_network::ReputationChange;
|
||||
|
||||
/// Unified annoyance cost and good behavior benefits.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum UnifiedReputationChange {
|
||||
CostMajor(&'static str),
|
||||
CostMinor(&'static str),
|
||||
CostMajorRepeated(&'static str),
|
||||
CostMinorRepeated(&'static str),
|
||||
Malicious(&'static str),
|
||||
BenefitMinorFirst(&'static str),
|
||||
BenefitMinor(&'static str),
|
||||
BenefitMajorFirst(&'static str),
|
||||
BenefitMajor(&'static str),
|
||||
}
|
||||
|
||||
impl UnifiedReputationChange {
|
||||
/// Obtain the cost or benefit associated with
|
||||
/// the enum variant.
|
||||
///
|
||||
/// Order of magnitude rationale:
|
||||
///
|
||||
/// * the peerset will not connect to a peer whose reputation is below a fixed value
|
||||
/// * `max(2% *$rep, 1)` is the delta of convergence towards a reputation of 0
|
||||
///
|
||||
/// The whole range of an `i32` should be used, so order of magnitude of
|
||||
/// something malicious should be `1<<20` (give or take).
|
||||
pub const fn cost_or_benefit(&self) -> i32 {
|
||||
match self {
|
||||
Self::CostMinor(_) => -100_000,
|
||||
Self::CostMajor(_) => -300_000,
|
||||
Self::CostMinorRepeated(_) => -200_000,
|
||||
Self::CostMajorRepeated(_) => -600_000,
|
||||
Self::Malicious(_) => i32::MIN,
|
||||
Self::BenefitMajorFirst(_) => 300_000,
|
||||
Self::BenefitMajor(_) => 200_000,
|
||||
Self::BenefitMinorFirst(_) => 15_000,
|
||||
Self::BenefitMinor(_) => 10_000,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the static description.
|
||||
pub const fn description(&self) -> &'static str {
|
||||
match self {
|
||||
Self::CostMinor(description) => description,
|
||||
Self::CostMajor(description) => description,
|
||||
Self::CostMinorRepeated(description) => description,
|
||||
Self::CostMajorRepeated(description) => description,
|
||||
Self::Malicious(description) => description,
|
||||
Self::BenefitMajorFirst(description) => description,
|
||||
Self::BenefitMajor(description) => description,
|
||||
Self::BenefitMinorFirst(description) => description,
|
||||
Self::BenefitMinor(description) => description,
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether the reputation change is for good behavior.
|
||||
pub const fn is_benefit(&self) -> bool {
|
||||
match self {
|
||||
Self::BenefitMajorFirst(_) |
|
||||
Self::BenefitMajor(_) |
|
||||
Self::BenefitMinorFirst(_) |
|
||||
Self::BenefitMinor(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<UnifiedReputationChange> for ReputationChange {
|
||||
fn from(value: UnifiedReputationChange) -> Self {
|
||||
ReputationChange::new(value.cost_or_benefit(), value.description())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Error handling related code and Error/Result definitions.
|
||||
|
||||
use sc_network_types::PeerId;
|
||||
|
||||
use codec::Error as DecodingError;
|
||||
|
||||
#[allow(missing_docs)]
|
||||
#[fatality::fatality(splitable)]
|
||||
pub enum Error {
|
||||
// Incoming request stream exhausted. Should only happen on shutdown.
|
||||
#[fatal]
|
||||
#[error("Incoming request channel got closed.")]
|
||||
RequestChannelExhausted,
|
||||
|
||||
/// Decoding failed, we were able to change the peer's reputation accordingly.
|
||||
#[error("Decoding request failed for peer {0}.")]
|
||||
DecodingError(PeerId, #[source] DecodingError),
|
||||
|
||||
/// Decoding failed, but sending reputation change failed.
|
||||
#[error("Decoding request failed for peer {0}, and changing reputation failed.")]
|
||||
DecodingErrorNoReputationChange(PeerId, #[source] DecodingError),
|
||||
}
|
||||
|
||||
/// General result based on above `Error`.
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -0,0 +1,232 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use futures::{channel::oneshot, StreamExt};
|
||||
|
||||
use codec::{Decode, Encode};
|
||||
|
||||
use sc_network::{config as netconfig, NetworkBackend};
|
||||
use sc_network_types::PeerId;
|
||||
use sp_runtime::traits::Block;
|
||||
|
||||
use super::{IsRequest, ReqProtocolNames};
|
||||
use crate::UnifiedReputationChange;
|
||||
|
||||
mod error;
|
||||
pub use error::{Error, FatalError, JfyiError, Result};
|
||||
|
||||
/// A request coming in, including a sender for sending responses.
|
||||
///
|
||||
/// Typed `IncomingRequest`s, see `IncomingRequest::get_config_receiver` and substrate
|
||||
/// `NetworkConfiguration` for more information.
|
||||
#[derive(Debug)]
|
||||
pub struct IncomingRequest<Req> {
|
||||
/// `PeerId` of sending peer.
|
||||
pub peer: PeerId,
|
||||
/// The sent request.
|
||||
pub payload: Req,
|
||||
/// Sender for sending response back.
|
||||
pub pending_response: OutgoingResponseSender<Req>,
|
||||
}
|
||||
|
||||
impl<Req> IncomingRequest<Req>
|
||||
where
|
||||
Req: IsRequest + Decode + Encode,
|
||||
Req::Response: Encode,
|
||||
{
|
||||
/// Create configuration for `NetworkConfiguration::request_response_protocols` and a
|
||||
/// corresponding typed receiver.
|
||||
///
|
||||
/// This Register that config with substrate networking and receive incoming requests via the
|
||||
/// returned `IncomingRequestReceiver`.
|
||||
pub fn get_config_receiver<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
) -> (IncomingRequestReceiver<Req>, N::RequestResponseProtocolConfig) {
|
||||
let (raw, cfg) = Req::PROTOCOL.get_config::<B, N>(req_protocol_names);
|
||||
(IncomingRequestReceiver { raw, phantom: PhantomData {} }, cfg)
|
||||
}
|
||||
|
||||
/// Create new `IncomingRequest`.
|
||||
pub fn new(
|
||||
peer: PeerId,
|
||||
payload: Req,
|
||||
pending_response: oneshot::Sender<netconfig::OutgoingResponse>,
|
||||
) -> Self {
|
||||
Self {
|
||||
peer,
|
||||
payload,
|
||||
pending_response: OutgoingResponseSender { pending_response, phantom: PhantomData {} },
|
||||
}
|
||||
}
|
||||
|
||||
/// Try building from raw substrate request.
|
||||
///
|
||||
/// This function will fail if the request cannot be decoded and will apply passed in
|
||||
/// reputation changes in that case.
|
||||
///
|
||||
/// Params:
|
||||
/// - The raw request to decode
|
||||
/// - Reputation changes to apply for the peer in case decoding fails.
|
||||
fn try_from_raw(
|
||||
raw: sc_network::config::IncomingRequest,
|
||||
reputation_changes: Vec<UnifiedReputationChange>,
|
||||
) -> std::result::Result<Self, JfyiError> {
|
||||
let sc_network::config::IncomingRequest { payload, peer, pending_response } = raw;
|
||||
let payload = match Req::decode(&mut payload.as_ref()) {
|
||||
Ok(payload) => payload,
|
||||
Err(err) => {
|
||||
let reputation_changes = reputation_changes.into_iter().map(|r| r.into()).collect();
|
||||
let response = sc_network::config::OutgoingResponse {
|
||||
result: Err(()),
|
||||
reputation_changes,
|
||||
sent_feedback: None,
|
||||
};
|
||||
|
||||
if let Err(_) = pending_response.send(response) {
|
||||
return Err(JfyiError::DecodingErrorNoReputationChange(peer, err));
|
||||
}
|
||||
return Err(JfyiError::DecodingError(peer, err));
|
||||
},
|
||||
};
|
||||
Ok(Self::new(peer, payload, pending_response))
|
||||
}
|
||||
|
||||
/// Convert into raw untyped substrate `IncomingRequest`.
|
||||
///
|
||||
/// This is mostly useful for testing.
|
||||
pub fn into_raw(self) -> sc_network::config::IncomingRequest {
|
||||
sc_network::config::IncomingRequest {
|
||||
peer: self.peer,
|
||||
payload: self.payload.encode(),
|
||||
pending_response: self.pending_response.pending_response,
|
||||
}
|
||||
}
|
||||
|
||||
/// Send the response back.
|
||||
///
|
||||
/// Calls [`OutgoingResponseSender::send_response`].
|
||||
pub fn send_response(self, resp: Req::Response) -> std::result::Result<(), Req::Response> {
|
||||
self.pending_response.send_response(resp)
|
||||
}
|
||||
|
||||
/// Send response with additional options.
|
||||
///
|
||||
/// Calls [`OutgoingResponseSender::send_outgoing_response`].
|
||||
pub fn send_outgoing_response(
|
||||
self,
|
||||
resp: OutgoingResponse<<Req as IsRequest>::Response>,
|
||||
) -> std::result::Result<(), ()> {
|
||||
self.pending_response.send_outgoing_response(resp)
|
||||
}
|
||||
}
|
||||
|
||||
/// Sender for sending back responses on an `IncomingRequest`.
|
||||
#[derive(Debug)]
|
||||
pub struct OutgoingResponseSender<Req> {
|
||||
pending_response: oneshot::Sender<netconfig::OutgoingResponse>,
|
||||
phantom: PhantomData<Req>,
|
||||
}
|
||||
|
||||
impl<Req> OutgoingResponseSender<Req>
|
||||
where
|
||||
Req: IsRequest + Decode,
|
||||
Req::Response: Encode,
|
||||
{
|
||||
/// Send the response back.
|
||||
///
|
||||
/// On success we return `Ok(())`, on error we return the not sent `Response`.
|
||||
///
|
||||
/// `netconfig::OutgoingResponse` exposes a way of modifying the peer's reputation. If needed we
|
||||
/// can change this function to expose this feature as well.
|
||||
pub fn send_response(self, resp: Req::Response) -> std::result::Result<(), Req::Response> {
|
||||
self.pending_response
|
||||
.send(netconfig::OutgoingResponse {
|
||||
result: Ok(resp.encode()),
|
||||
reputation_changes: Vec::new(),
|
||||
sent_feedback: None,
|
||||
})
|
||||
.map_err(|_| resp)
|
||||
}
|
||||
|
||||
/// Send response with additional options.
|
||||
///
|
||||
/// This variant allows for waiting for the response to be sent out, allows for changing peer's
|
||||
/// reputation and allows for not sending a response at all (for only changing the peer's
|
||||
/// reputation).
|
||||
pub fn send_outgoing_response(
|
||||
self,
|
||||
resp: OutgoingResponse<<Req as IsRequest>::Response>,
|
||||
) -> std::result::Result<(), ()> {
|
||||
let OutgoingResponse { result, reputation_changes, sent_feedback } = resp;
|
||||
|
||||
let response = netconfig::OutgoingResponse {
|
||||
result: result.map(|v| v.encode()),
|
||||
reputation_changes: reputation_changes.into_iter().map(|c| c.into()).collect(),
|
||||
sent_feedback,
|
||||
};
|
||||
|
||||
self.pending_response.send(response).map_err(|_| ())
|
||||
}
|
||||
}
|
||||
|
||||
/// Typed variant of [`netconfig::OutgoingResponse`].
|
||||
///
|
||||
/// Responses to `IncomingRequest`s.
|
||||
pub struct OutgoingResponse<Response> {
|
||||
/// The payload of the response.
|
||||
///
|
||||
/// `Err(())` if none is available e.g. due to an error while handling the request.
|
||||
pub result: std::result::Result<Response, ()>,
|
||||
|
||||
/// Reputation changes accrued while handling the request. To be applied to the reputation of
|
||||
/// the peer sending the request.
|
||||
pub reputation_changes: Vec<UnifiedReputationChange>,
|
||||
|
||||
/// If provided, the `oneshot::Sender` will be notified when the request has been sent to the
|
||||
/// peer.
|
||||
pub sent_feedback: Option<oneshot::Sender<()>>,
|
||||
}
|
||||
|
||||
/// Receiver for incoming requests.
|
||||
///
|
||||
/// Takes care of decoding and handling of invalid encoded requests.
|
||||
pub struct IncomingRequestReceiver<Req> {
|
||||
raw: async_channel::Receiver<netconfig::IncomingRequest>,
|
||||
phantom: PhantomData<Req>,
|
||||
}
|
||||
|
||||
impl<Req> IncomingRequestReceiver<Req>
|
||||
where
|
||||
Req: IsRequest + Decode + Encode,
|
||||
Req::Response: Encode,
|
||||
{
|
||||
/// Try to receive the next incoming request.
|
||||
///
|
||||
/// Any received request will be decoded, on decoding errors the provided reputation changes
|
||||
/// will be applied and an error will be reported.
|
||||
pub async fn recv<F>(&mut self, reputation_changes: F) -> Result<IncomingRequest<Req>>
|
||||
where
|
||||
F: FnOnce() -> Vec<UnifiedReputationChange>,
|
||||
{
|
||||
let req = match self.raw.next().await {
|
||||
None => return Err(FatalError::RequestChannelExhausted.into()),
|
||||
Some(raw) => IncomingRequest::<Req>::try_from_raw(raw, reputation_changes())?,
|
||||
};
|
||||
Ok(req)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,377 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Overview over request/responses as used in `Pezkuwi`.
|
||||
//!
|
||||
//! `enum Protocol` .... List of all supported protocols.
|
||||
//!
|
||||
//! `enum Requests` .... List of all supported requests, each entry matches one in protocols, but
|
||||
//! has the actual request as payload.
|
||||
//!
|
||||
//! `struct IncomingRequest` .... wrapper for incoming requests, containing a sender for sending
|
||||
//! responses.
|
||||
//!
|
||||
//! `struct OutgoingRequest` .... wrapper for outgoing requests, containing a sender used by the
|
||||
//! networking code for delivering responses/delivery errors.
|
||||
//!
|
||||
//! `trait IsRequest` .... A trait describing a particular request. It is used for gathering meta
|
||||
//! data, like what is the corresponding response type.
|
||||
//!
|
||||
//! ## Versioning
|
||||
//!
|
||||
//! Versioning for request-response protocols can be done in multiple ways.
|
||||
//!
|
||||
//! If you're just changing the protocol name but the binary payloads are the same, just add a new
|
||||
//! `fallback_name` to the protocol config.
|
||||
//!
|
||||
//! One way in which versioning has historically been achieved for req-response protocols is to
|
||||
//! bundle the new req-resp version with an upgrade of a notifications protocol. The subsystem would
|
||||
//! then know which request version to use based on stored data about the peer's notifications
|
||||
//! protocol version.
|
||||
//!
|
||||
//! When bumping a notifications protocol version is not needed/desirable, you may add a new
|
||||
//! req-resp protocol and set the old request as a fallback (see
|
||||
//! `OutgoingRequest::new_with_fallback`). A request with the new version will be attempted and if
|
||||
//! the protocol is refused by the peer, the fallback protocol request will be used.
|
||||
//! Information about the actually used protocol will be returned alongside the raw response, so
|
||||
//! that you know how to decode it.
|
||||
|
||||
use std::{collections::HashMap, time::Duration, u64};
|
||||
|
||||
use pezkuwi_primitives::MAX_CODE_SIZE;
|
||||
use sc_network::{NetworkBackend, MAX_RESPONSE_SIZE};
|
||||
use sp_runtime::traits::Block;
|
||||
use strum::{EnumIter, IntoEnumIterator};
|
||||
|
||||
pub use sc_network::{config as network, config::RequestResponseConfig, ProtocolName};
|
||||
|
||||
/// Everything related to handling of incoming requests.
|
||||
pub mod incoming;
|
||||
/// Everything related to handling of outgoing requests.
|
||||
pub mod outgoing;
|
||||
|
||||
pub use incoming::{IncomingRequest, IncomingRequestReceiver};
|
||||
|
||||
pub use outgoing::{OutgoingRequest, OutgoingResult, Recipient, Requests, ResponseSender};
|
||||
|
||||
///// Multiplexer for incoming requests.
|
||||
// pub mod multiplexer;
|
||||
|
||||
/// Actual versioned requests and responses that are sent over the wire.
|
||||
pub mod v1;
|
||||
|
||||
/// Actual versioned requests and responses that are sent over the wire.
|
||||
pub mod v2;
|
||||
|
||||
/// A protocol per subsystem seems to make the most sense, this way we don't need any dispatching
|
||||
/// within protocols.
|
||||
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, EnumIter)]
|
||||
pub enum Protocol {
|
||||
/// Protocol for chunk fetching, used by availability distribution and availability recovery.
|
||||
ChunkFetchingV1,
|
||||
/// Protocol for fetching collations from collators.
|
||||
CollationFetchingV1,
|
||||
/// Protocol for fetching collations from collators when async backing is enabled.
|
||||
CollationFetchingV2,
|
||||
/// Protocol for fetching seconded PoVs from validators of the same group.
|
||||
PoVFetchingV1,
|
||||
/// Protocol for fetching available data.
|
||||
AvailableDataFetchingV1,
|
||||
/// Sending of dispute statements with application level confirmations.
|
||||
DisputeSendingV1,
|
||||
|
||||
/// Protocol for requesting candidates with attestations in statement distribution
|
||||
/// when async backing is enabled.
|
||||
AttestedCandidateV2,
|
||||
|
||||
/// Protocol for chunk fetching version 2, used by availability distribution and availability
|
||||
/// recovery.
|
||||
ChunkFetchingV2,
|
||||
}
|
||||
|
||||
/// Minimum bandwidth we expect for validators - 500Mbit/s is the recommendation, so approximately
|
||||
/// 50MB per second:
|
||||
const MIN_BANDWIDTH_BYTES: u64 = 50 * 1024 * 1024;
|
||||
|
||||
/// Default request timeout in seconds.
|
||||
///
|
||||
/// When decreasing this value, take into account that the very first request might need to open a
|
||||
/// connection, which can be slow. If this causes problems, we should ensure connectivity via peer
|
||||
/// sets.
|
||||
#[allow(dead_code)]
|
||||
const DEFAULT_REQUEST_TIMEOUT: Duration = Duration::from_secs(3);
|
||||
|
||||
/// Request timeout where we can assume the connection is already open (e.g. we have peers in a
|
||||
/// peer set as well).
|
||||
const DEFAULT_REQUEST_TIMEOUT_CONNECTED: Duration = Duration::from_secs(1);
|
||||
|
||||
/// Timeout for requesting availability chunks.
|
||||
pub const CHUNK_REQUEST_TIMEOUT: Duration = DEFAULT_REQUEST_TIMEOUT_CONNECTED;
|
||||
|
||||
/// This timeout is based on the following parameters, assuming we use asynchronous backing with no
|
||||
/// time budget within a relay block:
|
||||
/// - 500 Mbit/s networking speed
|
||||
/// - 10 MB PoV
|
||||
/// - 10 parallel executions
|
||||
const POV_REQUEST_TIMEOUT_CONNECTED: Duration = Duration::from_millis(2000);
|
||||
|
||||
/// We want attested candidate requests to time out relatively fast,
|
||||
/// because slow requests will bottleneck the backing system. Ideally, we'd have
|
||||
/// an adaptive timeout based on the candidate size, because there will be a lot of variance
|
||||
/// in candidate sizes: candidates with no code and no messages vs candidates with code
|
||||
/// and messages.
|
||||
///
|
||||
/// We supply leniency because there are often large candidates and asynchronous
|
||||
/// backing allows them to be included over a longer window of time. Exponential back-off
|
||||
/// up to a maximum of 10 seconds would be ideal, but isn't supported by the
|
||||
/// infrastructure here yet: see https://github.com/paritytech/polkadot/issues/6009
|
||||
const ATTESTED_CANDIDATE_TIMEOUT: Duration = Duration::from_millis(2500);
|
||||
|
||||
/// We don't want a slow peer to slow down all the others, at the same time we want to get out the
|
||||
/// data quickly in full to at least some peers (as this will reduce load on us as they then can
|
||||
/// start serving the data). So this value is a tradeoff. 5 seems to be sensible. So we would need
|
||||
/// to have 5 slow nodes connected, to delay transfer for others by `ATTESTED_CANDIDATE_TIMEOUT`.
|
||||
pub const MAX_PARALLEL_ATTESTED_CANDIDATE_REQUESTS: u32 = 5;
|
||||
|
||||
/// Response size limit for responses of POV like data.
|
||||
///
|
||||
/// Same as what we use in substrate networking.
|
||||
const POV_RESPONSE_SIZE: u64 = MAX_RESPONSE_SIZE;
|
||||
|
||||
/// Maximum response sizes for `AttestedCandidateV2`.
|
||||
///
|
||||
/// This is `MAX_CODE_SIZE` plus some additional space for protocol overhead and
|
||||
/// additional backing statements.
|
||||
const ATTESTED_CANDIDATE_RESPONSE_SIZE: u64 = MAX_CODE_SIZE as u64 + 100_000;
|
||||
|
||||
/// We can have relative large timeouts here, there is no value of hitting a
|
||||
/// timeout as we want to get statements through to each node in any case.
|
||||
pub const DISPUTE_REQUEST_TIMEOUT: Duration = Duration::from_secs(12);
|
||||
|
||||
impl Protocol {
|
||||
/// Get a configuration for a given Request response protocol.
|
||||
///
|
||||
/// Returns a `ProtocolConfig` for this protocol.
|
||||
/// Use this if you plan only to send requests for this protocol.
|
||||
pub fn get_outbound_only_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
|
||||
self,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
) -> N::RequestResponseProtocolConfig {
|
||||
self.create_config::<B, N>(req_protocol_names, None)
|
||||
}
|
||||
|
||||
/// Get a configuration for a given Request response protocol.
|
||||
///
|
||||
/// Returns a receiver for messages received on this protocol and the requested
|
||||
/// `ProtocolConfig`.
|
||||
pub fn get_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
|
||||
self,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
) -> (async_channel::Receiver<network::IncomingRequest>, N::RequestResponseProtocolConfig) {
|
||||
let (tx, rx) = async_channel::bounded(self.get_channel_size());
|
||||
let cfg = self.create_config::<B, N>(req_protocol_names, Some(tx));
|
||||
(rx, cfg)
|
||||
}
|
||||
|
||||
fn create_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
|
||||
self,
|
||||
req_protocol_names: &ReqProtocolNames,
|
||||
tx: Option<async_channel::Sender<network::IncomingRequest>>,
|
||||
) -> N::RequestResponseProtocolConfig {
|
||||
let name = req_protocol_names.get_name(self);
|
||||
let legacy_names = self.get_legacy_name().into_iter().map(Into::into).collect();
|
||||
match self {
|
||||
Protocol::ChunkFetchingV1 | Protocol::ChunkFetchingV2 => N::request_response_config(
|
||||
name,
|
||||
legacy_names,
|
||||
1_000,
|
||||
POV_RESPONSE_SIZE,
|
||||
// We are connected to all validators:
|
||||
CHUNK_REQUEST_TIMEOUT,
|
||||
tx,
|
||||
),
|
||||
Protocol::CollationFetchingV1 | Protocol::CollationFetchingV2 => {
|
||||
N::request_response_config(
|
||||
name,
|
||||
legacy_names,
|
||||
1_000,
|
||||
POV_RESPONSE_SIZE,
|
||||
// Taken from initial implementation in collator protocol:
|
||||
POV_REQUEST_TIMEOUT_CONNECTED,
|
||||
tx,
|
||||
)
|
||||
},
|
||||
Protocol::PoVFetchingV1 => N::request_response_config(
|
||||
name,
|
||||
legacy_names,
|
||||
1_000,
|
||||
POV_RESPONSE_SIZE,
|
||||
POV_REQUEST_TIMEOUT_CONNECTED,
|
||||
tx,
|
||||
),
|
||||
Protocol::AvailableDataFetchingV1 => N::request_response_config(
|
||||
name,
|
||||
legacy_names,
|
||||
1_000,
|
||||
// Available data size is dominated by the PoV size.
|
||||
POV_RESPONSE_SIZE,
|
||||
POV_REQUEST_TIMEOUT_CONNECTED,
|
||||
tx,
|
||||
),
|
||||
Protocol::DisputeSendingV1 => N::request_response_config(
|
||||
name,
|
||||
legacy_names,
|
||||
1_000,
|
||||
// Responses are just confirmation, in essence not even a bit. So 100 seems
|
||||
// plenty.
|
||||
100,
|
||||
DISPUTE_REQUEST_TIMEOUT,
|
||||
tx,
|
||||
),
|
||||
Protocol::AttestedCandidateV2 => N::request_response_config(
|
||||
name,
|
||||
legacy_names,
|
||||
1_000,
|
||||
ATTESTED_CANDIDATE_RESPONSE_SIZE,
|
||||
ATTESTED_CANDIDATE_TIMEOUT,
|
||||
tx,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// Channel sizes for the supported protocols.
|
||||
fn get_channel_size(self) -> usize {
|
||||
match self {
|
||||
// Hundreds of validators will start requesting their chunks once they see a candidate
|
||||
// awaiting availability on chain. Given that they will see that block at different
|
||||
// times (due to network delays), 100 seems big enough to accommodate for "bursts",
|
||||
// assuming we can service requests relatively quickly, which would need to be measured
|
||||
// as well.
|
||||
Protocol::ChunkFetchingV1 | Protocol::ChunkFetchingV2 => 100,
|
||||
// 10 seems reasonable, considering group sizes of max 10 validators.
|
||||
Protocol::CollationFetchingV1 | Protocol::CollationFetchingV2 => 10,
|
||||
// 10 seems reasonable, considering group sizes of max 10 validators.
|
||||
Protocol::PoVFetchingV1 => 10,
|
||||
// Validators are constantly self-selecting to request available data which may lead
|
||||
// to constant load and occasional burstiness.
|
||||
Protocol::AvailableDataFetchingV1 => 100,
|
||||
// Incoming requests can get bursty, we should also be able to handle them fast on
|
||||
// average, so something in the ballpark of 100 should be fine. Nodes will retry on
|
||||
// failure, so having a good value here is mostly about performance tuning.
|
||||
Protocol::DisputeSendingV1 => 100,
|
||||
|
||||
Protocol::AttestedCandidateV2 => {
|
||||
// We assume we can utilize up to 70% of the available bandwidth for statements.
|
||||
// This is just a guess/estimate, with the following considerations: If we are
|
||||
// faster than that, queue size will stay low anyway, even if not - requesters will
|
||||
// get an immediate error, but if we are slower, requesters will run in a timeout -
|
||||
// wasting precious time.
|
||||
let available_bandwidth = 7 * MIN_BANDWIDTH_BYTES / 10;
|
||||
let size = u64::saturating_sub(
|
||||
ATTESTED_CANDIDATE_TIMEOUT.as_millis() as u64 * available_bandwidth /
|
||||
(1000 * MAX_CODE_SIZE as u64),
|
||||
MAX_PARALLEL_ATTESTED_CANDIDATE_REQUESTS as u64,
|
||||
);
|
||||
debug_assert!(
|
||||
size > 0,
|
||||
"We should have a channel size greater zero, otherwise we won't accept any requests."
|
||||
);
|
||||
size as usize
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Legacy protocol name associated with each peer set, if any.
|
||||
/// The request will be tried on this legacy protocol name if the remote refuses to speak the
|
||||
/// protocol.
|
||||
const fn get_legacy_name(self) -> Option<&'static str> {
|
||||
match self {
|
||||
Protocol::ChunkFetchingV1 => Some("/pezkuwi/req_chunk/1"),
|
||||
Protocol::CollationFetchingV1 => Some("/pezkuwi/req_collation/1"),
|
||||
Protocol::PoVFetchingV1 => Some("/pezkuwi/req_pov/1"),
|
||||
Protocol::AvailableDataFetchingV1 => Some("/pezkuwi/req_available_data/1"),
|
||||
Protocol::DisputeSendingV1 => Some("/pezkuwi/send_dispute/1"),
|
||||
|
||||
// Introduced after legacy names became legacy.
|
||||
Protocol::AttestedCandidateV2 => None,
|
||||
Protocol::CollationFetchingV2 => None,
|
||||
Protocol::ChunkFetchingV2 => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Common properties of any `Request`.
|
||||
pub trait IsRequest {
|
||||
/// Each request has a corresponding `Response`.
|
||||
type Response;
|
||||
|
||||
/// What protocol this `Request` implements.
|
||||
const PROTOCOL: Protocol;
|
||||
}
|
||||
|
||||
/// Type for getting on the wire [`Protocol`] names using genesis hash & fork id.
|
||||
#[derive(Clone)]
|
||||
pub struct ReqProtocolNames {
|
||||
names: HashMap<Protocol, ProtocolName>,
|
||||
}
|
||||
|
||||
impl ReqProtocolNames {
|
||||
/// Construct [`ReqProtocolNames`] from `genesis_hash` and `fork_id`.
|
||||
pub fn new<Hash: AsRef<[u8]>>(genesis_hash: Hash, fork_id: Option<&str>) -> Self {
|
||||
let mut names = HashMap::new();
|
||||
for protocol in Protocol::iter() {
|
||||
names.insert(protocol, Self::generate_name(protocol, &genesis_hash, fork_id));
|
||||
}
|
||||
Self { names }
|
||||
}
|
||||
|
||||
/// Get on the wire [`Protocol`] name.
|
||||
pub fn get_name(&self, protocol: Protocol) -> ProtocolName {
|
||||
self.names
|
||||
.get(&protocol)
|
||||
.expect("All `Protocol` enum variants are added above via `strum`; qed")
|
||||
.clone()
|
||||
}
|
||||
|
||||
/// Protocol name of this protocol based on `genesis_hash` and `fork_id`.
|
||||
fn generate_name<Hash: AsRef<[u8]>>(
|
||||
protocol: Protocol,
|
||||
genesis_hash: &Hash,
|
||||
fork_id: Option<&str>,
|
||||
) -> ProtocolName {
|
||||
let prefix = if let Some(fork_id) = fork_id {
|
||||
format!("/{}/{}", hex::encode(genesis_hash), fork_id)
|
||||
} else {
|
||||
format!("/{}", hex::encode(genesis_hash))
|
||||
};
|
||||
|
||||
let short_name = match protocol {
|
||||
// V1:
|
||||
Protocol::ChunkFetchingV1 => "/req_chunk/1",
|
||||
Protocol::CollationFetchingV1 => "/req_collation/1",
|
||||
Protocol::PoVFetchingV1 => "/req_pov/1",
|
||||
Protocol::AvailableDataFetchingV1 => "/req_available_data/1",
|
||||
Protocol::DisputeSendingV1 => "/send_dispute/1",
|
||||
|
||||
// V2:
|
||||
Protocol::CollationFetchingV2 => "/req_collation/2",
|
||||
Protocol::AttestedCandidateV2 => "/req_attested_candidate/2",
|
||||
Protocol::ChunkFetchingV2 => "/req_chunk/2",
|
||||
};
|
||||
|
||||
format!("{}{}", prefix, short_name).into()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,205 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use futures::{channel::oneshot, prelude::Future, FutureExt};
|
||||
|
||||
use codec::{Decode, Encode, Error as DecodingError};
|
||||
use network::ProtocolName;
|
||||
|
||||
use sc_network as network;
|
||||
use sc_network_types::PeerId;
|
||||
|
||||
use pezkuwi_primitives::AuthorityDiscoveryId;
|
||||
|
||||
use super::{v1, v2, IsRequest, Protocol};
|
||||
|
||||
/// All requests that can be sent to the network bridge via `NetworkBridgeTxMessage::SendRequest`.
|
||||
#[derive(Debug)]
|
||||
pub enum Requests {
|
||||
/// Request an availability chunk from a node.
|
||||
ChunkFetching(OutgoingRequest<v2::ChunkFetchingRequest, v1::ChunkFetchingRequest>),
|
||||
/// Fetch a collation from a collator which previously announced it.
|
||||
CollationFetchingV1(OutgoingRequest<v1::CollationFetchingRequest>),
|
||||
/// Fetch a PoV from a validator which previously sent out a seconded statement.
|
||||
PoVFetchingV1(OutgoingRequest<v1::PoVFetchingRequest>),
|
||||
/// Request full available data from a node.
|
||||
AvailableDataFetchingV1(OutgoingRequest<v1::AvailableDataFetchingRequest>),
|
||||
/// Requests for notifying about an ongoing dispute.
|
||||
DisputeSendingV1(OutgoingRequest<v1::DisputeRequest>),
|
||||
|
||||
/// Request a candidate and attestations.
|
||||
AttestedCandidateV2(OutgoingRequest<v2::AttestedCandidateRequest>),
|
||||
/// Fetch a collation from a collator which previously announced it.
|
||||
/// Compared to V1 it requires specifying which candidate is requested by its hash.
|
||||
CollationFetchingV2(OutgoingRequest<v2::CollationFetchingRequest>),
|
||||
}
|
||||
|
||||
impl Requests {
|
||||
/// Encode the request.
|
||||
///
|
||||
/// The corresponding protocol is returned as well, as we are now leaving typed territory.
|
||||
///
|
||||
/// Note: `Requests` is just an enum collecting all supported requests supported by network
|
||||
/// bridge, it is never sent over the wire. This function just encodes the individual requests
|
||||
/// contained in the `enum`.
|
||||
pub fn encode_request(self) -> (Protocol, OutgoingRequest<Vec<u8>>) {
|
||||
match self {
|
||||
Self::ChunkFetching(r) => r.encode_request(),
|
||||
Self::CollationFetchingV1(r) => r.encode_request(),
|
||||
Self::CollationFetchingV2(r) => r.encode_request(),
|
||||
Self::PoVFetchingV1(r) => r.encode_request(),
|
||||
Self::AvailableDataFetchingV1(r) => r.encode_request(),
|
||||
Self::DisputeSendingV1(r) => r.encode_request(),
|
||||
Self::AttestedCandidateV2(r) => r.encode_request(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Used by the network to send us a response to a request.
|
||||
pub type ResponseSender = oneshot::Sender<Result<(Vec<u8>, ProtocolName), network::RequestFailure>>;
|
||||
|
||||
/// Any error that can occur when sending a request.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum RequestError {
|
||||
/// Response could not be decoded.
|
||||
#[error("Response could not be decoded: {0}")]
|
||||
InvalidResponse(#[from] DecodingError),
|
||||
|
||||
/// Some error in substrate/libp2p happened.
|
||||
#[error("{0}")]
|
||||
NetworkError(#[from] network::RequestFailure),
|
||||
|
||||
/// Response got canceled by networking.
|
||||
#[error("Response channel got canceled")]
|
||||
Canceled(#[from] oneshot::Canceled),
|
||||
}
|
||||
|
||||
impl RequestError {
|
||||
/// Whether the error represents some kind of timeout condition.
|
||||
pub fn is_timed_out(&self) -> bool {
|
||||
match self {
|
||||
Self::Canceled(_) |
|
||||
Self::NetworkError(network::RequestFailure::Obsolete) |
|
||||
Self::NetworkError(network::RequestFailure::Network(
|
||||
network::OutboundFailure::Timeout,
|
||||
)) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A request to be sent to the network bridge, including a sender for sending responses/failures.
|
||||
///
|
||||
/// The network implementation will make use of that sender for informing the requesting subsystem
|
||||
/// about responses/errors.
|
||||
///
|
||||
/// When using `Recipient::Peer`, keep in mind that no address (as in IP address and port) might
|
||||
/// be known for that specific peer. You are encouraged to use `Peer` for peers that you are
|
||||
/// expected to be already connected to.
|
||||
/// When using `Recipient::Authority`, the addresses can be found thanks to the authority
|
||||
/// discovery system.
|
||||
#[derive(Debug)]
|
||||
pub struct OutgoingRequest<Req, FallbackReq = Req> {
|
||||
/// Intended recipient of this request.
|
||||
pub peer: Recipient,
|
||||
/// The actual request to send over the wire.
|
||||
pub payload: Req,
|
||||
/// Optional fallback request and protocol.
|
||||
pub fallback_request: Option<(FallbackReq, Protocol)>,
|
||||
/// Sender which is used by networking to get us back a response.
|
||||
pub pending_response: ResponseSender,
|
||||
}
|
||||
|
||||
/// Potential recipients of an outgoing request.
|
||||
#[derive(Debug, Eq, Hash, PartialEq, Clone)]
|
||||
pub enum Recipient {
|
||||
/// Recipient is a regular peer and we know its peer id.
|
||||
Peer(PeerId),
|
||||
/// Recipient is a validator, we address it via this `AuthorityDiscoveryId`.
|
||||
Authority(AuthorityDiscoveryId),
|
||||
}
|
||||
|
||||
/// Responses received for an `OutgoingRequest`.
|
||||
pub type OutgoingResult<Res> = Result<Res, RequestError>;
|
||||
|
||||
impl<Req, FallbackReq> OutgoingRequest<Req, FallbackReq>
|
||||
where
|
||||
Req: IsRequest + Encode,
|
||||
Req::Response: Decode,
|
||||
FallbackReq: IsRequest + Encode,
|
||||
FallbackReq::Response: Decode,
|
||||
{
|
||||
/// Create a new `OutgoingRequest`.
|
||||
///
|
||||
/// It will contain a sender that is used by the networking for sending back responses. The
|
||||
/// connected receiver is returned as the second element in the returned tuple.
|
||||
pub fn new(
|
||||
peer: Recipient,
|
||||
payload: Req,
|
||||
) -> (Self, impl Future<Output = OutgoingResult<Req::Response>>) {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
let r = Self { peer, payload, pending_response: tx, fallback_request: None };
|
||||
(r, receive_response::<Req>(rx.map(|r| r.map(|r| r.map(|(resp, _)| resp)))))
|
||||
}
|
||||
|
||||
/// Create a new `OutgoingRequest` with a fallback in case the remote does not support this
|
||||
/// protocol. Useful when adding a new version of a req-response protocol, to achieve
|
||||
/// compatibility with the older version.
|
||||
///
|
||||
/// Returns a raw `Vec<u8>` response over the channel. Use the associated `ProtocolName` to know
|
||||
/// which request was the successful one and appropriately decode the response.
|
||||
pub fn new_with_fallback(
|
||||
peer: Recipient,
|
||||
payload: Req,
|
||||
fallback_request: FallbackReq,
|
||||
) -> (Self, impl Future<Output = OutgoingResult<(Vec<u8>, ProtocolName)>>) {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
let r = Self {
|
||||
peer,
|
||||
payload,
|
||||
pending_response: tx,
|
||||
fallback_request: Some((fallback_request, FallbackReq::PROTOCOL)),
|
||||
};
|
||||
(r, async { Ok(rx.await??) })
|
||||
}
|
||||
|
||||
/// Encode a request into a `Vec<u8>`.
|
||||
///
|
||||
/// As this throws away type information, we also return the `Protocol` this encoded request
|
||||
/// adheres to.
|
||||
pub fn encode_request(self) -> (Protocol, OutgoingRequest<Vec<u8>>) {
|
||||
let OutgoingRequest { peer, payload, pending_response, fallback_request } = self;
|
||||
let encoded = OutgoingRequest {
|
||||
peer,
|
||||
payload: payload.encode(),
|
||||
fallback_request: fallback_request.map(|(r, p)| (r.encode(), p)),
|
||||
pending_response,
|
||||
};
|
||||
(Req::PROTOCOL, encoded)
|
||||
}
|
||||
}
|
||||
|
||||
/// Future for actually receiving a typed response for an `OutgoingRequest`.
|
||||
async fn receive_response<Req>(
|
||||
rec: impl Future<Output = Result<Result<Vec<u8>, network::RequestFailure>, oneshot::Canceled>>,
|
||||
) -> OutgoingResult<Req::Response>
|
||||
where
|
||||
Req: IsRequest,
|
||||
Req::Response: Decode,
|
||||
{
|
||||
let raw = rec.await??;
|
||||
Ok(Decode::decode(&mut raw.as_ref())?)
|
||||
}
|
||||
@@ -0,0 +1,214 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Requests and responses as sent over the wire for the individual protocols.
|
||||
|
||||
use codec::{Decode, Encode};
|
||||
|
||||
use pezkuwi_node_primitives::{
|
||||
AvailableData, DisputeMessage, ErasureChunk, PoV, Proof, UncheckedDisputeMessage,
|
||||
};
|
||||
use pezkuwi_primitives::{
|
||||
CandidateHash, CandidateReceiptV2 as CandidateReceipt, Hash, HeadData, Id as ParaId,
|
||||
ValidatorIndex,
|
||||
};
|
||||
|
||||
use super::{IsRequest, Protocol};
|
||||
|
||||
/// Request an availability chunk.
|
||||
#[derive(Debug, Copy, Clone, Encode, Decode)]
|
||||
pub struct ChunkFetchingRequest {
|
||||
/// Hash of candidate we want a chunk for.
|
||||
pub candidate_hash: CandidateHash,
|
||||
/// The validator index we are requesting from. This must be identical to the index of the
|
||||
/// chunk we'll receive. For v2, this may not be the case.
|
||||
pub index: ValidatorIndex,
|
||||
}
|
||||
|
||||
/// Receive a requested erasure chunk.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub enum ChunkFetchingResponse {
|
||||
/// The requested chunk data.
|
||||
#[codec(index = 0)]
|
||||
Chunk(ChunkResponse),
|
||||
/// Node was not in possession of the requested chunk.
|
||||
#[codec(index = 1)]
|
||||
NoSuchChunk,
|
||||
}
|
||||
|
||||
impl From<Option<ChunkResponse>> for ChunkFetchingResponse {
|
||||
fn from(x: Option<ChunkResponse>) -> Self {
|
||||
match x {
|
||||
Some(c) => ChunkFetchingResponse::Chunk(c),
|
||||
None => ChunkFetchingResponse::NoSuchChunk,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ChunkFetchingResponse> for Option<ChunkResponse> {
|
||||
fn from(x: ChunkFetchingResponse) -> Self {
|
||||
match x {
|
||||
ChunkFetchingResponse::Chunk(c) => Some(c),
|
||||
ChunkFetchingResponse::NoSuchChunk => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Skimmed down variant of `ErasureChunk`.
|
||||
///
|
||||
/// Instead of transmitting a full `ErasureChunk` we transmit `ChunkResponse` in
|
||||
/// `ChunkFetchingResponse`, which omits the chunk's index. The index is already known by
|
||||
/// the requester and by not transmitting it, we ensure the requester is going to use his index
|
||||
/// value for validating the response, thus making sure he got what he requested.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub struct ChunkResponse {
|
||||
/// The erasure-encoded chunk of data belonging to the candidate block.
|
||||
pub chunk: Vec<u8>,
|
||||
/// Proof for this chunk's branch in the Merkle tree.
|
||||
pub proof: Proof,
|
||||
}
|
||||
|
||||
impl From<ErasureChunk> for ChunkResponse {
|
||||
fn from(ErasureChunk { chunk, index: _, proof }: ErasureChunk) -> Self {
|
||||
ChunkResponse { chunk, proof }
|
||||
}
|
||||
}
|
||||
|
||||
impl ChunkResponse {
|
||||
/// Re-build an `ErasureChunk` from response and request.
|
||||
pub fn recombine_into_chunk(self, req: &ChunkFetchingRequest) -> ErasureChunk {
|
||||
ErasureChunk { chunk: self.chunk, proof: self.proof, index: req.index.into() }
|
||||
}
|
||||
}
|
||||
|
||||
impl IsRequest for ChunkFetchingRequest {
|
||||
type Response = ChunkFetchingResponse;
|
||||
const PROTOCOL: Protocol = Protocol::ChunkFetchingV1;
|
||||
}
|
||||
|
||||
/// Request the advertised collation at that relay-parent.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub struct CollationFetchingRequest {
|
||||
/// Relay parent we want a collation for.
|
||||
pub relay_parent: Hash,
|
||||
/// The `ParaId` of the collation.
|
||||
pub para_id: ParaId,
|
||||
}
|
||||
|
||||
/// Responses as sent by collators.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub enum CollationFetchingResponse {
|
||||
/// Deliver requested collation.
|
||||
#[codec(index = 0)]
|
||||
Collation(CandidateReceipt, PoV),
|
||||
|
||||
/// Deliver requested collation along with parent head data.
|
||||
#[codec(index = 1)]
|
||||
CollationWithParentHeadData {
|
||||
/// The receipt of the candidate.
|
||||
receipt: CandidateReceipt,
|
||||
/// Candidate's proof of validity.
|
||||
pov: PoV,
|
||||
/// The head data of the candidate's parent.
|
||||
/// This is needed for elastic scaling to work.
|
||||
parent_head_data: HeadData,
|
||||
},
|
||||
}
|
||||
|
||||
impl IsRequest for CollationFetchingRequest {
|
||||
type Response = CollationFetchingResponse;
|
||||
const PROTOCOL: Protocol = Protocol::CollationFetchingV1;
|
||||
}
|
||||
|
||||
/// Request the advertised collation at that relay-parent.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub struct PoVFetchingRequest {
|
||||
/// Candidate we want a PoV for.
|
||||
pub candidate_hash: CandidateHash,
|
||||
}
|
||||
|
||||
/// Responses to `PoVFetchingRequest`.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub enum PoVFetchingResponse {
|
||||
/// Deliver requested PoV.
|
||||
#[codec(index = 0)]
|
||||
PoV(PoV),
|
||||
/// PoV was not found in store.
|
||||
#[codec(index = 1)]
|
||||
NoSuchPoV,
|
||||
}
|
||||
|
||||
impl IsRequest for PoVFetchingRequest {
|
||||
type Response = PoVFetchingResponse;
|
||||
const PROTOCOL: Protocol = Protocol::PoVFetchingV1;
|
||||
}
|
||||
|
||||
/// Request the entire available data for a candidate.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub struct AvailableDataFetchingRequest {
|
||||
/// The candidate hash to get the available data for.
|
||||
pub candidate_hash: CandidateHash,
|
||||
}
|
||||
|
||||
/// Receive a requested available data.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub enum AvailableDataFetchingResponse {
|
||||
/// The requested data.
|
||||
#[codec(index = 0)]
|
||||
AvailableData(AvailableData),
|
||||
/// Node was not in possession of the requested data.
|
||||
#[codec(index = 1)]
|
||||
NoSuchData,
|
||||
}
|
||||
|
||||
impl From<Option<AvailableData>> for AvailableDataFetchingResponse {
|
||||
fn from(x: Option<AvailableData>) -> Self {
|
||||
match x {
|
||||
Some(data) => AvailableDataFetchingResponse::AvailableData(data),
|
||||
None => AvailableDataFetchingResponse::NoSuchData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IsRequest for AvailableDataFetchingRequest {
|
||||
type Response = AvailableDataFetchingResponse;
|
||||
const PROTOCOL: Protocol = Protocol::AvailableDataFetchingV1;
|
||||
}
|
||||
|
||||
/// A dispute request.
|
||||
///
|
||||
/// Contains an invalid vote a valid one for a particular candidate in a given session.
|
||||
#[derive(Clone, Encode, Decode, Debug)]
|
||||
pub struct DisputeRequest(pub UncheckedDisputeMessage);
|
||||
|
||||
impl From<DisputeMessage> for DisputeRequest {
|
||||
fn from(msg: DisputeMessage) -> Self {
|
||||
Self(msg.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Possible responses to a `DisputeRequest`.
|
||||
#[derive(Encode, Decode, Debug, PartialEq, Eq)]
|
||||
pub enum DisputeResponse {
|
||||
/// Recipient successfully processed the dispute request.
|
||||
#[codec(index = 0)]
|
||||
Confirmed,
|
||||
}
|
||||
|
||||
impl IsRequest for DisputeRequest {
|
||||
type Response = DisputeResponse;
|
||||
const PROTOCOL: Protocol = Protocol::DisputeSendingV1;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user