Plumbing to increase pvf workers configuration based on chain id (#4252)

Part of https://github.com/paritytech/polkadot-sdk/issues/4126 we want
to safely increase the execute_workers_max_num gradually from chain to
chain and assess if there are any negative impacts.

This PR performs the necessary plumbing to be able to increase it based
on the chain id, it increase the number of execution workers from 2 to 4
on test network but lives kusama and polkadot unchanged until we gather
more data.

---------

Signed-off-by: Alexandru Gheorghe <alexandru.gheorghe@parity.io>
This commit is contained in:
Alexandru Gheorghe
2024-04-24 09:15:39 +03:00
committed by GitHub
parent 0a56d071c7
commit 9a0049d0da
12 changed files with 95 additions and 3 deletions
@@ -312,6 +312,9 @@ fn build_polkadot_full_node(
overseer_message_channel_capacity_override: None,
malus_finality_delay: None,
hwbench,
execute_workers_max_num: None,
prepare_workers_hard_max_num: None,
prepare_workers_soft_max_num: None,
},
)?;
+17
View File
@@ -131,6 +131,23 @@ pub struct RunCmd {
#[arg(long, value_name = "PATH")]
pub workers_path: Option<PathBuf>,
/// Override the maximum number of pvf execute workers.
///
/// **Dangerous!** Do not touch unless explicitly advised to.
#[arg(long)]
pub execute_workers_max_num: Option<usize>,
/// Override the maximum number of pvf workers that can be spawned in the pvf prepare
/// pool for tasks with the priority below critical.
///
/// **Dangerous!** Do not touch unless explicitly advised to.
#[arg(long)]
pub prepare_workers_soft_max_num: Option<usize>,
/// Override the absolute number of pvf workers that can be spawned in the pvf prepare pool.
///
/// **Dangerous!** Do not touch unless explicitly advised to.
#[arg(long)]
pub prepare_workers_hard_max_num: Option<usize>,
/// TESTING ONLY: disable the version check between nodes and workers.
#[arg(long, hide = true)]
pub disable_worker_version_check: bool,
+3
View File
@@ -253,6 +253,9 @@ where
.overseer_channel_capacity_override,
malus_finality_delay: maybe_malus_finality_delay,
hwbench,
execute_workers_max_num: cli.run.execute_workers_max_num,
prepare_workers_hard_max_num: cli.run.prepare_workers_hard_max_num,
prepare_workers_soft_max_num: cli.run.prepare_workers_soft_max_num,
},
)
.map(|full| full.task_manager)?;
@@ -100,6 +100,13 @@ pub struct Config {
pub prep_worker_path: PathBuf,
/// Path to the execution worker binary
pub exec_worker_path: PathBuf,
/// The maximum number of pvf execution workers.
pub pvf_execute_workers_max_num: usize,
/// The maximum number of pvf workers that can be spawned in the pvf prepare pool for tasks
/// with the priority below critical.
pub pvf_prepare_workers_soft_max_num: usize,
/// The absolute number of pvf workers that can be spawned in the pvf prepare pool.
pub pvf_prepare_workers_hard_max_num: usize,
}
/// The candidate validation subsystem.
@@ -224,6 +231,9 @@ async fn run<Context>(
secure_validator_mode,
prep_worker_path,
exec_worker_path,
pvf_execute_workers_max_num,
pvf_prepare_workers_soft_max_num,
pvf_prepare_workers_hard_max_num,
}: Config,
) -> SubsystemResult<()> {
let (validation_host, task) = polkadot_node_core_pvf::start(
@@ -233,6 +243,9 @@ async fn run<Context>(
secure_validator_mode,
prep_worker_path,
exec_worker_path,
pvf_execute_workers_max_num,
pvf_prepare_workers_soft_max_num,
pvf_prepare_workers_hard_max_num,
),
pvf_metrics,
)
@@ -48,6 +48,9 @@ impl TestHost {
false,
prepare_worker_path,
execute_worker_path,
2,
1,
2,
);
f(&mut config);
let (host, task) = start(config, Metrics::default()).await.unwrap();
+6 -3
View File
@@ -188,6 +188,9 @@ impl Config {
secure_validator_mode: bool,
prepare_worker_program_path: PathBuf,
execute_worker_program_path: PathBuf,
execute_workers_max_num: usize,
prepare_workers_soft_max_num: usize,
prepare_workers_hard_max_num: usize,
) -> Self {
Self {
cache_path,
@@ -196,12 +199,12 @@ impl Config {
prepare_worker_program_path,
prepare_worker_spawn_timeout: Duration::from_secs(3),
prepare_workers_soft_max_num: 1,
prepare_workers_hard_max_num: 2,
prepare_workers_soft_max_num,
prepare_workers_hard_max_num,
execute_worker_program_path,
execute_worker_spawn_timeout: Duration::from_secs(3),
execute_workers_max_num: 2,
execute_workers_max_num,
}
}
}
+3
View File
@@ -63,6 +63,9 @@ impl TestHost {
false,
prepare_worker_path,
execute_worker_path,
2,
1,
2,
);
f(&mut config);
let (host, task) = start(config, Metrics::default()).await.unwrap();
+20
View File
@@ -643,6 +643,13 @@ pub struct NewFullParams<OverseerGenerator: OverseerGen> {
pub workers_path: Option<std::path::PathBuf>,
/// Optional custom names for the prepare and execute workers.
pub workers_names: Option<(String, String)>,
/// An optional number of the maximum number of pvf execute workers.
pub execute_workers_max_num: Option<usize>,
/// An optional maximum number of pvf workers that can be spawned in the pvf prepare pool for
/// tasks with the priority below critical.
pub prepare_workers_soft_max_num: Option<usize>,
/// An optional absolute number of pvf workers that can be spawned in the pvf prepare pool.
pub prepare_workers_hard_max_num: Option<usize>,
pub overseer_gen: OverseerGenerator,
pub overseer_message_channel_capacity_override: Option<usize>,
#[allow(dead_code)]
@@ -738,6 +745,9 @@ pub fn new_full<
overseer_message_channel_capacity_override,
malus_finality_delay: _malus_finality_delay,
hwbench,
execute_workers_max_num,
prepare_workers_soft_max_num,
prepare_workers_hard_max_num,
}: NewFullParams<OverseerGenerator>,
) -> Result<NewFull, Error> {
use polkadot_node_network_protocol::request_response::IncomingRequest;
@@ -943,6 +953,16 @@ pub fn new_full<
secure_validator_mode,
prep_worker_path,
exec_worker_path,
pvf_execute_workers_max_num: execute_workers_max_num.unwrap_or_else(
|| match config.chain_spec.identify_chain() {
// The intention is to use this logic for gradual increasing from 2 to 4
// of this configuration chain by chain untill it reaches production chain.
Chain::Polkadot | Chain::Kusama => 2,
Chain::Rococo | Chain::Westend | Chain::Unknown => 4,
},
),
pvf_prepare_workers_soft_max_num: prepare_workers_soft_max_num.unwrap_or(1),
pvf_prepare_workers_hard_max_num: prepare_workers_hard_max_num.unwrap_or(2),
})
} else {
None
+6
View File
@@ -97,6 +97,9 @@ pub fn new_full<OverseerGenerator: OverseerGen>(
overseer_message_channel_capacity_override: None,
malus_finality_delay: None,
hwbench: None,
execute_workers_max_num: None,
prepare_workers_hard_max_num: None,
prepare_workers_soft_max_num: None,
},
),
sc_network::config::NetworkBackendType::Litep2p =>
@@ -116,6 +119,9 @@ pub fn new_full<OverseerGenerator: OverseerGen>(
overseer_message_channel_capacity_override: None,
malus_finality_delay: None,
hwbench: None,
execute_workers_max_num: None,
prepare_workers_hard_max_num: None,
prepare_workers_soft_max_num: None,
},
),
}
@@ -95,6 +95,9 @@ fn main() -> Result<()> {
overseer_message_channel_capacity_override: None,
malus_finality_delay: None,
hwbench: None,
execute_workers_max_num: None,
prepare_workers_hard_max_num: None,
prepare_workers_soft_max_num: None,
},
)
.map_err(|e| e.to_string())?;
@@ -97,6 +97,9 @@ fn main() -> Result<()> {
overseer_message_channel_capacity_override: None,
malus_finality_delay: None,
hwbench: None,
execute_workers_max_num: None,
prepare_workers_hard_max_num: None,
prepare_workers_soft_max_num: None,
},
)
.map_err(|e| e.to_string())?;
+15
View File
@@ -0,0 +1,15 @@
title: "Add logic to increase pvf worker based on chain"
doc:
- audience: Node Operator
description: |
A new logic and cli parameters were added to allow increasing the number of pvf
workers based on the chain-id.
crates:
- name: polkadot-node-core-candidate-validation
bump: minor
- name: polkadot-cli
bump: minor
- name: polkadot-service
bump: minor