Do not run unneeded subsystems on collator and its alongside node (#3061)

Currently, collators and their alongside nodes spin up a full-scale
overseer running a bunch of subsystems that are not needed if the node
is not a validator. That was considered to be harmless; however, we've
got problems with unused subsystems getting stalled for a reason not
currently known, resulting in the overseer exiting and bringing down the
whole node.

This PR aims to only run needed subsystems on such nodes, replacing the
rest with `DummySubsystem`.

It also enables collator-optimized availability recovery subsystem
implementation.

Partially solves #1730.
This commit is contained in:
s0me0ne-unkn0wn
2024-01-29 10:53:51 +01:00
committed by GitHub
parent 987edd8864
commit 3e8139e7de
16 changed files with 429 additions and 220 deletions
@@ -295,7 +295,7 @@ fn build_polkadot_full_node(
workers_path: None,
workers_names: None,
overseer_gen: polkadot_service::RealOverseerGen,
overseer_gen: polkadot_service::CollatorOverseerGen,
overseer_message_channel_capacity_override: None,
malus_finality_delay: None,
hwbench,
+1
View File
@@ -271,6 +271,7 @@ async fn build_relay_chain_interface(
polkadot_service::IsParachainNode::Collator(CollatorPair::generate().0)
},
None,
polkadot_service::CollatorOverseerGen,
)
.map_err(|e| RelayChainError::Application(Box::new(e) as Box<_>))?,
cumulus_client_cli::RelayChainMode::ExternalRpc(rpc_target_urls) =>
+1 -1
View File
@@ -299,7 +299,7 @@ pub fn run() -> Result<()> {
match &cli.subcommand {
None => run_node_inner(
cli,
service::RealOverseerGen,
service::ValidatorOverseerGen,
None,
polkadot_node_metrics::logger_hook(),
),
+1 -1
View File
@@ -29,7 +29,7 @@ mod error;
pub use service::{self, Block, CoreApi, IdentifyVariant, ProvideRuntimeApi, TFullClient};
#[cfg(feature = "malus")]
pub use service::overseer::prepared_overseer_builder;
pub use service::overseer::validator_overseer_builder;
#[cfg(feature = "cli")]
pub use cli::*;
+6 -11
View File
@@ -49,29 +49,24 @@ use self::{
/// PVF pre-checking subsystem.
pub struct PvfCheckerSubsystem {
enabled: bool,
keystore: KeystorePtr,
metrics: Metrics,
}
impl PvfCheckerSubsystem {
pub fn new(enabled: bool, keystore: KeystorePtr, metrics: Metrics) -> Self {
PvfCheckerSubsystem { enabled, keystore, metrics }
pub fn new(keystore: KeystorePtr, metrics: Metrics) -> Self {
PvfCheckerSubsystem { keystore, metrics }
}
}
#[overseer::subsystem(PvfChecker, error=SubsystemError, prefix = self::overseer)]
impl<Context> PvfCheckerSubsystem {
fn start(self, ctx: Context) -> SpawnedSubsystem {
if self.enabled {
let future = run(ctx, self.keystore, self.metrics)
.map_err(|e| SubsystemError::with_origin("pvf-checker", e))
.boxed();
let future = run(ctx, self.keystore, self.metrics)
.map_err(|e| SubsystemError::with_origin("pvf-checker", e))
.boxed();
SpawnedSubsystem { name: "pvf-checker-subsystem", future }
} else {
polkadot_overseer::DummySubsystem.start(ctx)
}
SpawnedSubsystem { name: "pvf-checker-subsystem", future }
}
}
@@ -19,13 +19,12 @@
//! candidates.
use polkadot_cli::{
prepared_overseer_builder,
service::{
AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, HeaderBackend, Overseer,
OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle, ParachainHost,
ProvideRuntimeApi,
AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, ExtendedOverseerGenArgs,
HeaderBackend, Overseer, OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle,
ParachainHost, ProvideRuntimeApi,
},
Cli,
validator_overseer_builder, Cli,
};
use polkadot_node_subsystem::SpawnGlue;
use polkadot_node_subsystem_types::DefaultSubsystemClient;
@@ -63,6 +62,7 @@ impl OverseerGen for BackGarbageCandidates {
&self,
connector: OverseerConnector,
args: OverseerGenArgs<'_, Spawner, RuntimeClient>,
ext_args: Option<ExtendedOverseerGenArgs>,
) -> Result<
(Overseer<SpawnGlue<Spawner>, Arc<DefaultSubsystemClient<RuntimeClient>>>, OverseerHandle),
Error,
@@ -80,11 +80,14 @@ impl OverseerGen for BackGarbageCandidates {
SpawnGlue(spawner),
);
prepared_overseer_builder(args)?
.replace_candidate_validation(move |cv_subsystem| {
InterceptedSubsystem::new(cv_subsystem, validation_filter)
})
.build_with_connector(connector)
.map_err(|e| e.into())
validator_overseer_builder(
args,
ext_args.expect("Extended arguments required to build validator overseer are provided"),
)?
.replace_candidate_validation(move |cv_subsystem| {
InterceptedSubsystem::new(cv_subsystem, validation_filter)
})
.build_with_connector(connector)
.map_err(|e| e.into())
}
}
@@ -33,13 +33,12 @@
use futures::channel::oneshot;
use polkadot_cli::{
prepared_overseer_builder,
service::{
AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, HeaderBackend, Overseer,
OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle, ParachainHost,
ProvideRuntimeApi,
AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, ExtendedOverseerGenArgs,
HeaderBackend, Overseer, OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle,
ParachainHost, ProvideRuntimeApi,
},
Cli,
validator_overseer_builder, Cli,
};
use polkadot_node_subsystem::{messages::ApprovalVotingMessage, SpawnGlue};
use polkadot_node_subsystem_types::{DefaultSubsystemClient, OverseerSignal};
@@ -237,6 +236,7 @@ impl OverseerGen for DisputeFinalizedCandidates {
&self,
connector: OverseerConnector,
args: OverseerGenArgs<'_, Spawner, RuntimeClient>,
ext_args: Option<ExtendedOverseerGenArgs>,
) -> Result<
(Overseer<SpawnGlue<Spawner>, Arc<DefaultSubsystemClient<RuntimeClient>>>, OverseerHandle),
Error,
@@ -257,9 +257,12 @@ impl OverseerGen for DisputeFinalizedCandidates {
dispute_offset: self.dispute_offset,
};
prepared_overseer_builder(args)?
.replace_approval_voting(move |cb| InterceptedSubsystem::new(cb, ancestor_disputer))
.build_with_connector(connector)
.map_err(|e| e.into())
validator_overseer_builder(
args,
ext_args.expect("Extended arguments required to build validator overseer are provided"),
)?
.replace_approval_voting(move |cb| InterceptedSubsystem::new(cb, ancestor_disputer))
.build_with_connector(connector)
.map_err(|e| e.into())
}
}
@@ -23,13 +23,12 @@
#![allow(missing_docs)]
use polkadot_cli::{
prepared_overseer_builder,
service::{
AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, HeaderBackend, Overseer,
OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle, ParachainHost,
ProvideRuntimeApi,
AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, ExtendedOverseerGenArgs,
HeaderBackend, Overseer, OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle,
ParachainHost, ProvideRuntimeApi,
},
Cli,
validator_overseer_builder, Cli,
};
use polkadot_node_subsystem::SpawnGlue;
use polkadot_node_subsystem_types::DefaultSubsystemClient;
@@ -80,6 +79,7 @@ impl OverseerGen for DisputeValidCandidates {
&self,
connector: OverseerConnector,
args: OverseerGenArgs<'_, Spawner, RuntimeClient>,
ext_args: Option<ExtendedOverseerGenArgs>,
) -> Result<
(Overseer<SpawnGlue<Spawner>, Arc<DefaultSubsystemClient<RuntimeClient>>>, OverseerHandle),
Error,
@@ -97,11 +97,14 @@ impl OverseerGen for DisputeValidCandidates {
SpawnGlue(spawner.clone()),
);
prepared_overseer_builder(args)?
.replace_candidate_validation(move |cv_subsystem| {
InterceptedSubsystem::new(cv_subsystem, validation_filter)
})
.build_with_connector(connector)
.map_err(|e| e.into())
validator_overseer_builder(
args,
ext_args.expect("Extended arguments required to build validator overseer are provided"),
)?
.replace_candidate_validation(move |cv_subsystem| {
InterceptedSubsystem::new(cv_subsystem, validation_filter)
})
.build_with_connector(connector)
.map_err(|e| e.into())
}
}
@@ -23,13 +23,12 @@
#![allow(missing_docs)]
use polkadot_cli::{
prepared_overseer_builder,
service::{
AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, HeaderBackend, Overseer,
OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle, ParachainHost,
ProvideRuntimeApi,
AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, ExtendedOverseerGenArgs,
HeaderBackend, Overseer, OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle,
ParachainHost, ProvideRuntimeApi,
},
Cli,
validator_overseer_builder, Cli,
};
use polkadot_node_core_candidate_validation::find_validation_data;
use polkadot_node_primitives::{AvailableData, BlockData, PoV};
@@ -266,6 +265,7 @@ impl OverseerGen for SuggestGarbageCandidates {
&self,
connector: OverseerConnector,
args: OverseerGenArgs<'_, Spawner, RuntimeClient>,
ext_args: Option<ExtendedOverseerGenArgs>,
) -> Result<
(Overseer<SpawnGlue<Spawner>, Arc<DefaultSubsystemClient<RuntimeClient>>>, OverseerHandle),
Error,
@@ -293,12 +293,13 @@ impl OverseerGen for SuggestGarbageCandidates {
SpawnGlue(args.spawner.clone()),
);
prepared_overseer_builder(args)?
.replace_candidate_backing(move |cb| InterceptedSubsystem::new(cb, note_candidate))
.replace_candidate_validation(move |cb| {
InterceptedSubsystem::new(cb, validation_filter)
})
.build_with_connector(connector)
.map_err(|e| e.into())
validator_overseer_builder(
args,
ext_args.expect("Extended arguments required to build validator overseer are provided"),
)?
.replace_candidate_backing(move |cb| InterceptedSubsystem::new(cb, note_candidate))
.replace_candidate_validation(move |cb| InterceptedSubsystem::new(cb, validation_filter))
.build_with_connector(connector)
.map_err(|e| e.into())
}
}
@@ -18,13 +18,12 @@
//! to always return an empty set of disabled validators.
use polkadot_cli::{
prepared_overseer_builder,
service::{
AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, HeaderBackend, Overseer,
OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle, ParachainHost,
ProvideRuntimeApi,
AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, ExtendedOverseerGenArgs,
HeaderBackend, Overseer, OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle,
ParachainHost, ProvideRuntimeApi,
},
Cli,
validator_overseer_builder, Cli,
};
use polkadot_node_subsystem::SpawnGlue;
use polkadot_node_subsystem_types::DefaultSubsystemClient;
@@ -50,6 +49,7 @@ impl OverseerGen for SupportDisabled {
&self,
connector: OverseerConnector,
args: OverseerGenArgs<'_, Spawner, RuntimeClient>,
ext_args: Option<ExtendedOverseerGenArgs>,
) -> Result<
(Overseer<SpawnGlue<Spawner>, Arc<DefaultSubsystemClient<RuntimeClient>>>, OverseerHandle),
Error,
@@ -59,12 +59,15 @@ impl OverseerGen for SupportDisabled {
RuntimeClient::Api: ParachainHost<Block> + BabeApi<Block> + AuthorityDiscoveryApi<Block>,
Spawner: 'static + SpawnNamed + Clone + Unpin,
{
prepared_overseer_builder(args)?
.replace_runtime_api(move |ra_subsystem| {
InterceptedSubsystem::new(ra_subsystem, IgnoreDisabled)
})
.build_with_connector(connector)
.map_err(|e| e.into())
validator_overseer_builder(
args,
ext_args.expect("Extended arguments required to build validator overseer are provided"),
)?
.replace_runtime_api(move |ra_subsystem| {
InterceptedSubsystem::new(ra_subsystem, IgnoreDisabled)
})
.build_with_connector(connector)
.map_err(|e| e.into())
}
}
+70 -63
View File
@@ -31,7 +31,10 @@ pub mod overseer;
pub mod workers;
#[cfg(feature = "full-node")]
pub use self::overseer::{OverseerGen, OverseerGenArgs, RealOverseerGen};
pub use self::overseer::{
CollatorOverseerGen, ExtendedOverseerGenArgs, OverseerGen, OverseerGenArgs,
ValidatorOverseerGen,
};
#[cfg(test)]
mod tests;
@@ -775,8 +778,6 @@ pub fn new_full<OverseerGenerator: OverseerGen>(
let keystore = basics.keystore_container.local_keystore();
let auth_or_collator = role.is_authority() || is_parachain_node.is_collator();
// We only need to enable the pvf checker when this is a validator.
let pvf_checker_enabled = role.is_authority();
let select_chain = if auth_or_collator {
let metrics =
@@ -867,10 +868,6 @@ pub fn new_full<OverseerGenerator: OverseerGen>(
let req_protocol_names = ReqProtocolNames::new(&genesis_hash, config.chain_spec.fork_id());
let (pov_req_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names);
net_config.add_request_response_protocol(cfg);
let (chunk_req_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names);
net_config.add_request_response_protocol(cfg);
let (collation_req_v1_receiver, cfg) =
IncomingRequest::get_config_receiver(&req_protocol_names);
net_config.add_request_response_protocol(cfg);
@@ -880,12 +877,9 @@ pub fn new_full<OverseerGenerator: OverseerGen>(
let (available_data_req_receiver, cfg) =
IncomingRequest::get_config_receiver(&req_protocol_names);
net_config.add_request_response_protocol(cfg);
let (statement_req_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names);
let (pov_req_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names);
net_config.add_request_response_protocol(cfg);
let (candidate_req_v2_receiver, cfg) =
IncomingRequest::get_config_receiver(&req_protocol_names);
net_config.add_request_response_protocol(cfg);
let (dispute_req_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names);
let (chunk_req_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names);
net_config.add_request_response_protocol(cfg);
let grandpa_hard_forks = if config.chain_spec.is_kusama() {
@@ -900,6 +894,69 @@ pub fn new_full<OverseerGenerator: OverseerGen>(
grandpa_hard_forks,
));
let ext_overseer_args = if is_parachain_node.is_running_alongside_parachain_node() {
None
} else {
let parachains_db = open_database(&config.database)?;
let candidate_validation_config = if role.is_authority() {
let (prep_worker_path, exec_worker_path) = workers::determine_workers_paths(
workers_path,
workers_names,
node_version.clone(),
)?;
log::info!("🚀 Using prepare-worker binary at: {:?}", prep_worker_path);
log::info!("🚀 Using execute-worker binary at: {:?}", exec_worker_path);
Some(CandidateValidationConfig {
artifacts_cache_path: config
.database
.path()
.ok_or(Error::DatabasePathRequired)?
.join("pvf-artifacts"),
node_version,
secure_validator_mode,
prep_worker_path,
exec_worker_path,
})
} else {
None
};
let (statement_req_receiver, cfg) =
IncomingRequest::get_config_receiver(&req_protocol_names);
net_config.add_request_response_protocol(cfg);
let (candidate_req_v2_receiver, cfg) =
IncomingRequest::get_config_receiver(&req_protocol_names);
net_config.add_request_response_protocol(cfg);
let (dispute_req_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names);
net_config.add_request_response_protocol(cfg);
let approval_voting_config = ApprovalVotingConfig {
col_approval_data: parachains_db::REAL_COLUMNS.col_approval_data,
slot_duration_millis: slot_duration.as_millis() as u64,
};
let dispute_coordinator_config = DisputeCoordinatorConfig {
col_dispute_data: parachains_db::REAL_COLUMNS.col_dispute_coordinator_data,
};
let chain_selection_config = ChainSelectionConfig {
col_data: parachains_db::REAL_COLUMNS.col_chain_selection_data,
stagnant_check_interval: Default::default(),
stagnant_check_mode: chain_selection_subsystem::StagnantCheckMode::PruneOnly,
};
Some(ExtendedOverseerGenArgs {
keystore,
parachains_db,
candidate_validation_config,
availability_config: AVAILABILITY_CONFIG,
pov_req_receiver,
chunk_req_receiver,
statement_req_receiver,
candidate_req_v2_receiver,
approval_voting_config,
dispute_req_receiver,
dispute_coordinator_config,
chain_selection_config,
})
};
let (network, system_rpc_tx, tx_handler_controller, network_starter, sync_service) =
service::build_network(service::BuildNetworkParams {
config: &config,
@@ -936,44 +993,6 @@ pub fn new_full<OverseerGenerator: OverseerGen>(
);
}
let parachains_db = open_database(&config.database)?;
let approval_voting_config = ApprovalVotingConfig {
col_approval_data: parachains_db::REAL_COLUMNS.col_approval_data,
slot_duration_millis: slot_duration.as_millis() as u64,
};
let candidate_validation_config = if role.is_authority() {
let (prep_worker_path, exec_worker_path) =
workers::determine_workers_paths(workers_path, workers_names, node_version.clone())?;
log::info!("🚀 Using prepare-worker binary at: {:?}", prep_worker_path);
log::info!("🚀 Using execute-worker binary at: {:?}", exec_worker_path);
Some(CandidateValidationConfig {
artifacts_cache_path: config
.database
.path()
.ok_or(Error::DatabasePathRequired)?
.join("pvf-artifacts"),
node_version,
secure_validator_mode,
prep_worker_path,
exec_worker_path,
})
} else {
None
};
let chain_selection_config = ChainSelectionConfig {
col_data: parachains_db::REAL_COLUMNS.col_chain_selection_data,
stagnant_check_interval: Default::default(),
stagnant_check_mode: chain_selection_subsystem::StagnantCheckMode::PruneOnly,
};
let dispute_coordinator_config = DisputeCoordinatorConfig {
col_dispute_data: parachains_db::REAL_COLUMNS.col_dispute_coordinator_data,
};
let rpc_handlers = service::spawn_tasks(service::SpawnTasksParams {
config,
backend: backend.clone(),
@@ -1067,29 +1086,16 @@ pub fn new_full<OverseerGenerator: OverseerGen>(
.generate::<service::SpawnTaskHandle, FullClient>(
overseer_connector,
OverseerGenArgs {
keystore,
runtime_client: overseer_client.clone(),
parachains_db,
network_service: network.clone(),
sync_service: sync_service.clone(),
authority_discovery_service,
pov_req_receiver,
chunk_req_receiver,
collation_req_v1_receiver,
collation_req_v2_receiver,
available_data_req_receiver,
statement_req_receiver,
candidate_req_v2_receiver,
dispute_req_receiver,
registry: prometheus_registry.as_ref(),
spawner,
is_parachain_node,
approval_voting_config,
availability_config: AVAILABILITY_CONFIG,
candidate_validation_config,
chain_selection_config,
dispute_coordinator_config,
pvf_checker_enabled,
overseer_message_channel_capacity_override,
req_protocol_names,
peerset_protocol_names,
@@ -1098,6 +1104,7 @@ pub fn new_full<OverseerGenerator: OverseerGen>(
),
notification_services,
},
ext_overseer_args,
)
.map_err(|e| {
gum::error!("Failed to init overseer: {}", e);
+245 -78
View File
@@ -16,6 +16,7 @@
use super::{AuthorityDiscoveryApi, Block, Error, Hash, IsParachainNode, Registry};
use polkadot_node_subsystem_types::DefaultSubsystemClient;
use polkadot_overseer::{DummySubsystem, InitializedOverseerBuilder, SubsystemError};
use sc_transaction_pool_api::OffchainTransactionPoolFactory;
use sp_core::traits::SpawnNamed;
@@ -32,13 +33,10 @@ use polkadot_node_network_protocol::{
},
};
#[cfg(any(feature = "malus", test))]
pub use polkadot_overseer::{
dummy::{dummy_overseer_builder, DummySubsystem},
HeadSupportsParachains,
};
pub use polkadot_overseer::{dummy::dummy_overseer_builder, HeadSupportsParachains};
use polkadot_overseer::{
metrics::Metrics as OverseerMetrics, InitializedOverseerBuilder, MetricsTrait, Overseer,
OverseerConnector, OverseerHandle, SpawnGlue,
metrics::Metrics as OverseerMetrics, MetricsTrait, Overseer, OverseerConnector, OverseerHandle,
SpawnGlue,
};
use parking_lot::Mutex;
@@ -86,22 +84,14 @@ where
RuntimeClient::Api: ParachainHost<Block> + BabeApi<Block> + AuthorityDiscoveryApi<Block>,
Spawner: 'static + SpawnNamed + Clone + Unpin,
{
/// The keystore to use for i.e. validator keys.
pub keystore: Arc<LocalKeystore>,
/// Runtime client generic, providing the `ProvieRuntimeApi` trait besides others.
pub runtime_client: Arc<RuntimeClient>,
/// The underlying key value store for the parachains.
pub parachains_db: Arc<dyn polkadot_node_subsystem_util::database::Database>,
/// Underlying network service implementation.
pub network_service: Arc<sc_network::NetworkService<Block, Hash>>,
/// Underlying syncing service implementation.
pub sync_service: Arc<sc_network_sync::SyncingService<Block>>,
/// Underlying authority discovery service.
pub authority_discovery_service: AuthorityDiscoveryService,
/// POV request receiver.
pub pov_req_receiver: IncomingRequestReceiver<request_v1::PoVFetchingRequest>,
/// Erasure chunks request receiver.
pub chunk_req_receiver: IncomingRequestReceiver<request_v1::ChunkFetchingRequest>,
/// Collations request receiver for network protocol v1.
pub collation_req_v1_receiver: IncomingRequestReceiver<request_v1::CollationFetchingRequest>,
/// Collations request receiver for network protocol v2.
@@ -109,30 +99,12 @@ where
/// Receiver for available data requests.
pub available_data_req_receiver:
IncomingRequestReceiver<request_v1::AvailableDataFetchingRequest>,
/// Receiver for incoming large statement requests.
pub statement_req_receiver: IncomingRequestReceiver<request_v1::StatementFetchingRequest>,
/// Receiver for incoming candidate requests.
pub candidate_req_v2_receiver: IncomingRequestReceiver<request_v2::AttestedCandidateRequest>,
/// Receiver for incoming disputes.
pub dispute_req_receiver: IncomingRequestReceiver<request_v1::DisputeRequest>,
/// Prometheus registry, commonly used for production systems, less so for test.
pub registry: Option<&'a Registry>,
/// Task spawner to be used throughout the overseer and the APIs it provides.
pub spawner: Spawner,
/// Determines the behavior of the collator.
pub is_parachain_node: IsParachainNode,
/// Configuration for the approval voting subsystem.
pub approval_voting_config: ApprovalVotingConfig,
/// Configuration for the availability store subsystem.
pub availability_config: AvailabilityConfig,
/// Configuration for the candidate validation subsystem.
pub candidate_validation_config: Option<CandidateValidationConfig>,
/// Configuration for the chain selection subsystem.
pub chain_selection_config: ChainSelectionConfig,
/// Configuration for the dispute coordinator subsystem.
pub dispute_coordinator_config: DisputeCoordinatorConfig,
/// Enable PVF pre-checking
pub pvf_checker_enabled: bool,
/// Overseer channel capacity override.
pub overseer_message_channel_capacity_override: Option<usize>,
/// Request-response protocol names source.
@@ -145,39 +117,66 @@ where
pub notification_services: HashMap<PeerSet, Box<dyn NotificationService>>,
}
/// Obtain a prepared `OverseerBuilder`, that is initialized
/// with all default values.
pub fn prepared_overseer_builder<Spawner, RuntimeClient>(
pub struct ExtendedOverseerGenArgs {
/// The keystore to use for i.e. validator keys.
pub keystore: Arc<LocalKeystore>,
/// The underlying key value store for the parachains.
pub parachains_db: Arc<dyn polkadot_node_subsystem_util::database::Database>,
/// Configuration for the candidate validation subsystem.
pub candidate_validation_config: Option<CandidateValidationConfig>,
/// Configuration for the availability store subsystem.
pub availability_config: AvailabilityConfig,
/// POV request receiver.
pub pov_req_receiver: IncomingRequestReceiver<request_v1::PoVFetchingRequest>,
/// Erasure chunks request receiver.
pub chunk_req_receiver: IncomingRequestReceiver<request_v1::ChunkFetchingRequest>,
/// Receiver for incoming large statement requests.
pub statement_req_receiver: IncomingRequestReceiver<request_v1::StatementFetchingRequest>,
/// Receiver for incoming candidate requests.
pub candidate_req_v2_receiver: IncomingRequestReceiver<request_v2::AttestedCandidateRequest>,
/// Configuration for the approval voting subsystem.
pub approval_voting_config: ApprovalVotingConfig,
/// Receiver for incoming disputes.
pub dispute_req_receiver: IncomingRequestReceiver<request_v1::DisputeRequest>,
/// Configuration for the dispute coordinator subsystem.
pub dispute_coordinator_config: DisputeCoordinatorConfig,
/// Configuration for the chain selection subsystem.
pub chain_selection_config: ChainSelectionConfig,
}
/// Obtain a prepared validator `Overseer`, that is initialized with all default values.
pub fn validator_overseer_builder<Spawner, RuntimeClient>(
OverseerGenArgs {
keystore,
runtime_client,
parachains_db,
network_service,
sync_service,
authority_discovery_service,
pov_req_receiver,
chunk_req_receiver,
collation_req_v1_receiver,
collation_req_v2_receiver,
collation_req_v1_receiver: _,
collation_req_v2_receiver: _,
available_data_req_receiver,
statement_req_receiver,
candidate_req_v2_receiver,
dispute_req_receiver,
registry,
spawner,
is_parachain_node,
approval_voting_config,
availability_config,
candidate_validation_config,
chain_selection_config,
dispute_coordinator_config,
pvf_checker_enabled,
overseer_message_channel_capacity_override,
req_protocol_names,
peerset_protocol_names,
offchain_transaction_pool_factory,
notification_services,
}: OverseerGenArgs<Spawner, RuntimeClient>,
ExtendedOverseerGenArgs {
keystore,
parachains_db,
candidate_validation_config,
availability_config,
pov_req_receiver,
chunk_req_receiver,
statement_req_receiver,
candidate_req_v2_receiver,
approval_voting_config,
dispute_req_receiver,
dispute_coordinator_config,
chain_selection_config,
}: ExtendedOverseerGenArgs,
) -> Result<
InitializedOverseerBuilder<
SpawnGlue<Spawner>,
@@ -280,23 +279,15 @@ where
Metrics::register(registry)?, // candidate-validation metrics
Metrics::register(registry)?, // validation host metrics
))
.pvf_checker(PvfCheckerSubsystem::new(
pvf_checker_enabled,
keystore.clone(),
Metrics::register(registry)?,
))
.pvf_checker(PvfCheckerSubsystem::new(keystore.clone(), Metrics::register(registry)?))
.chain_api(ChainApiSubsystem::new(runtime_client.clone(), Metrics::register(registry)?))
.collation_generation(CollationGenerationSubsystem::new(Metrics::register(registry)?))
.collator_protocol({
let side = match is_parachain_node {
IsParachainNode::Collator(collator_pair) => ProtocolSide::Collator {
peer_id: network_service.local_peer_id(),
collator_pair,
request_receiver_v1: collation_req_v1_receiver,
request_receiver_v2: collation_req_v2_receiver,
metrics: Metrics::register(registry)?,
},
IsParachainNode::FullNode => ProtocolSide::None,
IsParachainNode::Collator(_) | IsParachainNode::FullNode =>
return Err(Error::Overseer(SubsystemError::Context(
"build validator overseer for parachain node".to_owned(),
))),
IsParachainNode::No => ProtocolSide::Validator {
keystore: keystore.clone(),
eviction_policy: Default::default(),
@@ -352,23 +343,173 @@ where
.metrics(metrics)
.spawner(spawner);
if let Some(capacity) = overseer_message_channel_capacity_override {
Ok(builder.message_channel_capacity(capacity))
let builder = if let Some(capacity) = overseer_message_channel_capacity_override {
builder.message_channel_capacity(capacity)
} else {
Ok(builder)
}
builder
};
Ok(builder)
}
/// Obtain a prepared collator `Overseer`, that is initialized with all default values.
pub fn collator_overseer_builder<Spawner, RuntimeClient>(
OverseerGenArgs {
runtime_client,
network_service,
sync_service,
authority_discovery_service,
collation_req_v1_receiver,
collation_req_v2_receiver,
available_data_req_receiver,
registry,
spawner,
is_parachain_node,
overseer_message_channel_capacity_override,
req_protocol_names,
peerset_protocol_names,
offchain_transaction_pool_factory,
notification_services,
}: OverseerGenArgs<Spawner, RuntimeClient>,
) -> Result<
InitializedOverseerBuilder<
SpawnGlue<Spawner>,
Arc<DefaultSubsystemClient<RuntimeClient>>,
DummySubsystem,
DummySubsystem,
DummySubsystem,
DummySubsystem,
DummySubsystem,
AvailabilityRecoverySubsystem,
DummySubsystem,
DummySubsystem,
DummySubsystem,
RuntimeApiSubsystem<DefaultSubsystemClient<RuntimeClient>>,
DummySubsystem,
NetworkBridgeRxSubsystem<
Arc<sc_network::NetworkService<Block, Hash>>,
AuthorityDiscoveryService,
>,
NetworkBridgeTxSubsystem<
Arc<sc_network::NetworkService<Block, Hash>>,
AuthorityDiscoveryService,
>,
ChainApiSubsystem<RuntimeClient>,
CollationGenerationSubsystem,
CollatorProtocolSubsystem,
DummySubsystem,
DummySubsystem,
DummySubsystem,
DummySubsystem,
DummySubsystem,
DummySubsystem,
ProspectiveParachainsSubsystem,
>,
Error,
>
where
RuntimeClient: 'static + ProvideRuntimeApi<Block> + HeaderBackend<Block> + AuxStore,
RuntimeClient::Api: ParachainHost<Block> + BabeApi<Block> + AuthorityDiscoveryApi<Block>,
Spawner: 'static + SpawnNamed + Clone + Unpin,
{
use polkadot_node_subsystem_util::metrics::Metrics;
let metrics = <OverseerMetrics as MetricsTrait>::register(registry)?;
let notification_sinks = Arc::new(Mutex::new(HashMap::new()));
let spawner = SpawnGlue(spawner);
let network_bridge_metrics: NetworkBridgeMetrics = Metrics::register(registry)?;
let runtime_api_client = Arc::new(DefaultSubsystemClient::new(
runtime_client.clone(),
offchain_transaction_pool_factory,
));
let builder = Overseer::builder()
.network_bridge_tx(NetworkBridgeTxSubsystem::new(
network_service.clone(),
authority_discovery_service.clone(),
network_bridge_metrics.clone(),
req_protocol_names,
peerset_protocol_names.clone(),
notification_sinks.clone(),
))
.network_bridge_rx(NetworkBridgeRxSubsystem::new(
network_service.clone(),
authority_discovery_service.clone(),
Box::new(sync_service.clone()),
network_bridge_metrics,
peerset_protocol_names,
notification_services,
notification_sinks,
))
.availability_distribution(DummySubsystem)
.availability_recovery(AvailabilityRecoverySubsystem::for_collator(
available_data_req_receiver,
Metrics::register(registry)?,
))
.availability_store(DummySubsystem)
.bitfield_distribution(DummySubsystem)
.bitfield_signing(DummySubsystem)
.candidate_backing(DummySubsystem)
.candidate_validation(DummySubsystem)
.pvf_checker(DummySubsystem)
.chain_api(ChainApiSubsystem::new(runtime_client.clone(), Metrics::register(registry)?))
.collation_generation(CollationGenerationSubsystem::new(Metrics::register(registry)?))
.collator_protocol({
let side = match is_parachain_node {
IsParachainNode::No =>
return Err(Error::Overseer(SubsystemError::Context(
"build parachain node overseer for validator".to_owned(),
))),
IsParachainNode::Collator(collator_pair) => ProtocolSide::Collator {
peer_id: network_service.local_peer_id(),
collator_pair,
request_receiver_v1: collation_req_v1_receiver,
request_receiver_v2: collation_req_v2_receiver,
metrics: Metrics::register(registry)?,
},
IsParachainNode::FullNode => ProtocolSide::None,
};
CollatorProtocolSubsystem::new(side)
})
.provisioner(DummySubsystem)
.runtime_api(RuntimeApiSubsystem::new(
runtime_api_client.clone(),
Metrics::register(registry)?,
spawner.clone(),
))
.statement_distribution(DummySubsystem)
.approval_distribution(DummySubsystem)
.approval_voting(DummySubsystem)
.gossip_support(DummySubsystem)
.dispute_coordinator(DummySubsystem)
.dispute_distribution(DummySubsystem)
.chain_selection(DummySubsystem)
.prospective_parachains(ProspectiveParachainsSubsystem::new(Metrics::register(registry)?))
.activation_external_listeners(Default::default())
.span_per_active_leaf(Default::default())
.active_leaves(Default::default())
.supports_parachains(runtime_api_client)
.metrics(metrics)
.spawner(spawner);
let builder = if let Some(capacity) = overseer_message_channel_capacity_override {
builder.message_channel_capacity(capacity)
} else {
builder
};
Ok(builder)
}
/// Trait for the `fn` generating the overseer.
///
/// Default behavior is to create an unmodified overseer, as `RealOverseerGen`
/// would do.
pub trait OverseerGen {
/// Overwrite the full generation of the overseer, including the subsystems.
fn generate<Spawner, RuntimeClient>(
&self,
connector: OverseerConnector,
args: OverseerGenArgs<Spawner, RuntimeClient>,
ext_args: Option<ExtendedOverseerGenArgs>,
) -> Result<
(Overseer<SpawnGlue<Spawner>, Arc<DefaultSubsystemClient<RuntimeClient>>>, OverseerHandle),
Error,
@@ -376,24 +517,22 @@ pub trait OverseerGen {
where
RuntimeClient: 'static + ProvideRuntimeApi<Block> + HeaderBackend<Block> + AuxStore,
RuntimeClient::Api: ParachainHost<Block> + BabeApi<Block> + AuthorityDiscoveryApi<Block>,
Spawner: 'static + SpawnNamed + Clone + Unpin,
{
let gen = RealOverseerGen;
RealOverseerGen::generate::<Spawner, RuntimeClient>(&gen, connector, args)
}
Spawner: 'static + SpawnNamed + Clone + Unpin;
// It would be nice to make `create_subsystems` part of this trait,
// but the amount of generic arguments that would be required as
// as consequence make this rather annoying to implement and use.
}
/// The regular set of subsystems.
pub struct RealOverseerGen;
pub struct ValidatorOverseerGen;
impl OverseerGen for RealOverseerGen {
impl OverseerGen for ValidatorOverseerGen {
fn generate<Spawner, RuntimeClient>(
&self,
connector: OverseerConnector,
args: OverseerGenArgs<Spawner, RuntimeClient>,
ext_args: Option<ExtendedOverseerGenArgs>,
) -> Result<
(Overseer<SpawnGlue<Spawner>, Arc<DefaultSubsystemClient<RuntimeClient>>>, OverseerHandle),
Error,
@@ -403,7 +542,35 @@ impl OverseerGen for RealOverseerGen {
RuntimeClient::Api: ParachainHost<Block> + BabeApi<Block> + AuthorityDiscoveryApi<Block>,
Spawner: 'static + SpawnNamed + Clone + Unpin,
{
prepared_overseer_builder(args)?
let ext_args = ext_args.ok_or(Error::Overseer(SubsystemError::Context(
"create validator overseer as mandatory extended arguments were not provided"
.to_owned(),
)))?;
validator_overseer_builder(args, ext_args)?
.build_with_connector(connector)
.map_err(|e| e.into())
}
}
/// Reduced set of subsystems, to use in collator and collator's full node.
pub struct CollatorOverseerGen;
impl OverseerGen for CollatorOverseerGen {
fn generate<Spawner, RuntimeClient>(
&self,
connector: OverseerConnector,
args: OverseerGenArgs<Spawner, RuntimeClient>,
_ext_args: Option<ExtendedOverseerGenArgs>,
) -> Result<
(Overseer<SpawnGlue<Spawner>, Arc<DefaultSubsystemClient<RuntimeClient>>>, OverseerHandle),
Error,
>
where
RuntimeClient: 'static + ProvideRuntimeApi<Block> + HeaderBackend<Block> + AuxStore,
RuntimeClient::Api: ParachainHost<Block> + BabeApi<Block> + AuthorityDiscoveryApi<Block>,
Spawner: 'static + SpawnNamed + Clone + Unpin,
{
collator_overseer_builder(args)?
.build_with_connector(connector)
.map_err(|e| e.into())
}
+20 -9
View File
@@ -28,7 +28,9 @@ use polkadot_overseer::Handle;
use polkadot_primitives::{Balance, CollatorPair, HeadData, Id as ParaId, ValidationCode};
use polkadot_runtime_common::BlockHashCount;
use polkadot_runtime_parachains::paras::{ParaGenesisArgs, ParaKind};
use polkadot_service::{Error, FullClient, IsParachainNode, NewFull, PrometheusConfig};
use polkadot_service::{
Error, FullClient, IsParachainNode, NewFull, OverseerGen, PrometheusConfig,
};
use polkadot_test_runtime::{
ParasCall, ParasSudoWrapperCall, Runtime, SignedExtra, SignedPayload, SudoCall,
UncheckedExtrinsic, VERSION,
@@ -69,10 +71,11 @@ pub use polkadot_service::{FullBackend, GetLastTimestamp};
/// Create a new full node.
#[sc_tracing::logging::prefix_logs_with(config.network.node_name.as_str())]
pub fn new_full(
pub fn new_full<OverseerGenerator: OverseerGen>(
config: Configuration,
is_parachain_node: IsParachainNode,
workers_path: Option<PathBuf>,
overseer_gen: OverseerGenerator,
) -> Result<NewFull, Error> {
let workers_path = Some(workers_path.unwrap_or_else(get_relative_workers_path_for_test));
@@ -88,7 +91,7 @@ pub fn new_full(
secure_validator_mode: false,
workers_path,
workers_names: None,
overseer_gen: polkadot_service::RealOverseerGen,
overseer_gen,
overseer_message_channel_capacity_override: None,
malus_finality_delay: None,
hwbench: None,
@@ -207,9 +210,13 @@ pub fn run_validator_node(
worker_program_path: Option<PathBuf>,
) -> PolkadotTestNode {
let multiaddr = config.network.listen_addresses[0].clone();
let NewFull { task_manager, client, network, rpc_handlers, overseer_handle, .. } =
new_full(config, IsParachainNode::No, worker_program_path)
.expect("could not create Polkadot test service");
let NewFull { task_manager, client, network, rpc_handlers, overseer_handle, .. } = new_full(
config,
IsParachainNode::No,
worker_program_path,
polkadot_service::ValidatorOverseerGen,
)
.expect("could not create Polkadot test service");
let overseer_handle = overseer_handle.expect("test node must have an overseer handle");
let peer_id = network.local_peer_id();
@@ -239,9 +246,13 @@ pub fn run_collator_node(
) -> PolkadotTestNode {
let config = node_config(storage_update_func, tokio_handle, key, boot_nodes, false);
let multiaddr = config.network.listen_addresses[0].clone();
let NewFull { task_manager, client, network, rpc_handlers, overseer_handle, .. } =
new_full(config, IsParachainNode::Collator(collator_pair), None)
.expect("could not create Polkadot test service");
let NewFull { task_manager, client, network, rpc_handlers, overseer_handle, .. } = new_full(
config,
IsParachainNode::Collator(collator_pair),
None,
polkadot_service::CollatorOverseerGen,
)
.expect("could not create Polkadot test service");
let overseer_handle = overseer_handle.expect("test node must have an overseer handle");
let peer_id = network.local_peer_id();
@@ -91,7 +91,7 @@ fn main() -> Result<()> {
workers_path: None,
workers_names: None,
overseer_gen: polkadot_service::RealOverseerGen,
overseer_gen: polkadot_service::CollatorOverseerGen,
overseer_message_channel_capacity_override: None,
malus_finality_delay: None,
hwbench: None,
@@ -93,7 +93,7 @@ fn main() -> Result<()> {
workers_path: None,
workers_names: None,
overseer_gen: polkadot_service::RealOverseerGen,
overseer_gen: polkadot_service::CollatorOverseerGen,
overseer_message_channel_capacity_override: None,
malus_finality_delay: None,
hwbench: None,
+15
View File
@@ -0,0 +1,15 @@
# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0
# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json
title: Do not run unneeded subsystems on collator and its alongside node
doc:
- audience: Node Dev
description: |
Optimizes overseer building strategy to only include subsystems needed to run the given
type of node. Reduces overseer overhead and also solves the problem with unused subsystems
getting stalled from time to time.
crates:
- name: polkadot-overseer
- name: polkadot-service