mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-14 23:51:05 +00:00
Use same fmt and clippy configs as in Substrate (#7611)
* Use same rustfmt.toml as Substrate Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * format format file Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Format with new config Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Add Substrate Clippy config Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Print Clippy version in CI Otherwise its difficult to reproduce locally. Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Make fmt happy Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> * Update node/core/pvf/src/error.rs Co-authored-by: Tsvetomir Dimitrov <tsvetomir@parity.io> * Update node/core/pvf/src/error.rs Co-authored-by: Tsvetomir Dimitrov <tsvetomir@parity.io> --------- Signed-off-by: Oliver Tale-Yazdi <oliver.tale-yazdi@parity.io> Co-authored-by: Tsvetomir Dimitrov <tsvetomir@parity.io>
This commit is contained in:
committed by
GitHub
parent
ac435c96cf
commit
342d720573
@@ -42,8 +42,8 @@ pub enum RequiredTranches {
|
||||
/// assignments that are before the local time.
|
||||
maximum_broadcast: DelayTranche,
|
||||
/// The clock drift, in ticks, to apply to the local clock when determining whether
|
||||
/// to broadcast an assignment or when to schedule a wakeup. The local clock should be treated
|
||||
/// as though it is `clock_drift` ticks earlier.
|
||||
/// to broadcast an assignment or when to schedule a wakeup. The local clock should be
|
||||
/// treated as though it is `clock_drift` ticks earlier.
|
||||
clock_drift: Tick,
|
||||
},
|
||||
/// An exact number of required tranches and a number of no-shows. This indicates that
|
||||
@@ -55,8 +55,8 @@ pub enum RequiredTranches {
|
||||
/// The amount of missing votes that should be tolerated.
|
||||
tolerated_missing: usize,
|
||||
/// When the next no-show would be, if any. This is used to schedule the next wakeup in the
|
||||
/// event that there are some assignments that don't have corresponding approval votes. If this
|
||||
/// is `None`, all assignments have approvals.
|
||||
/// event that there are some assignments that don't have corresponding approval votes. If
|
||||
/// this is `None`, all assignments have approvals.
|
||||
next_no_show: Option<Tick>,
|
||||
/// The last tick at which a needed assignment was received.
|
||||
last_assignment_tick: Option<Tick>,
|
||||
|
||||
@@ -218,13 +218,14 @@ impl AssignmentCriteria for RealAssignmentCriteria {
|
||||
}
|
||||
|
||||
/// Compute the assignments for a given block. Returns a map containing all assignments to cores in
|
||||
/// the block. If more than one assignment targets the given core, only the earliest assignment is kept.
|
||||
/// the block. If more than one assignment targets the given core, only the earliest assignment is
|
||||
/// kept.
|
||||
///
|
||||
/// The `leaving_cores` parameter indicates all cores within the block where a candidate was included,
|
||||
/// as well as the group index backing those.
|
||||
/// The `leaving_cores` parameter indicates all cores within the block where a candidate was
|
||||
/// included, as well as the group index backing those.
|
||||
///
|
||||
/// The current description of the protocol assigns every validator to check every core. But at different times.
|
||||
/// The idea is that most assignments are never triggered and fall by the wayside.
|
||||
/// The current description of the protocol assigns every validator to check every core. But at
|
||||
/// different times. The idea is that most assignments are never triggered and fall by the wayside.
|
||||
///
|
||||
/// This will not assign to anything the local validator was part of the backing group for.
|
||||
pub(crate) fn compute_assignments(
|
||||
@@ -463,8 +464,8 @@ pub(crate) enum InvalidAssignmentReason {
|
||||
/// * Sample is out of bounds
|
||||
/// * Validator is present in backing group.
|
||||
///
|
||||
/// This function does not check whether the core is actually a valid assignment or not. That should be done
|
||||
/// outside the scope of this function.
|
||||
/// This function does not check whether the core is actually a valid assignment or not. That should
|
||||
/// be done outside the scope of this function.
|
||||
pub(crate) fn check_assignment_cert(
|
||||
claimed_core_index: CoreIndex,
|
||||
validator_index: ValidatorIndex,
|
||||
|
||||
@@ -104,7 +104,8 @@ enum ImportedBlockInfoError {
|
||||
VrfInfoUnavailable,
|
||||
}
|
||||
|
||||
/// Computes information about the imported block. Returns an error if the info couldn't be extracted.
|
||||
/// Computes information about the imported block. Returns an error if the info couldn't be
|
||||
/// extracted.
|
||||
#[overseer::contextbounds(ApprovalVoting, prefix = self::overseer)]
|
||||
async fn imported_block_info<Context>(
|
||||
ctx: &mut Context,
|
||||
@@ -181,20 +182,21 @@ async fn imported_block_info<Context>(
|
||||
// It's not obvious whether to use the hash or the parent hash for this, intuitively. We
|
||||
// want to use the block hash itself, and here's why:
|
||||
//
|
||||
// First off, 'epoch' in BABE means 'session' in other places. 'epoch' is the terminology from
|
||||
// the paper, which we fulfill using 'session's, which are a Substrate consensus concept.
|
||||
// First off, 'epoch' in BABE means 'session' in other places. 'epoch' is the terminology
|
||||
// from the paper, which we fulfill using 'session's, which are a Substrate consensus
|
||||
// concept.
|
||||
//
|
||||
// In BABE, the on-chain and off-chain view of the current epoch can differ at epoch boundaries
|
||||
// because epochs change precisely at a slot. When a block triggers a new epoch, the state of
|
||||
// its parent will still have the old epoch. Conversely, we have the invariant that every
|
||||
// block in BABE has the epoch _it was authored in_ within its post-state. So we use the
|
||||
// block, and not its parent.
|
||||
// In BABE, the on-chain and off-chain view of the current epoch can differ at epoch
|
||||
// boundaries because epochs change precisely at a slot. When a block triggers a new epoch,
|
||||
// the state of its parent will still have the old epoch. Conversely, we have the invariant
|
||||
// that every block in BABE has the epoch _it was authored in_ within its post-state. So we
|
||||
// use the block, and not its parent.
|
||||
//
|
||||
// It's worth nothing that Polkadot session changes, at least for the purposes of parachains,
|
||||
// would function the same way, except for the fact that they're always delayed by one block.
|
||||
// This gives us the opposite invariant for sessions - the parent block's post-state gives
|
||||
// us the canonical information about the session index for any of its children, regardless
|
||||
// of which slot number they might be produced at.
|
||||
// It's worth nothing that Polkadot session changes, at least for the purposes of
|
||||
// parachains, would function the same way, except for the fact that they're always delayed
|
||||
// by one block. This gives us the opposite invariant for sessions - the parent block's
|
||||
// post-state gives us the canonical information about the session index for any of its
|
||||
// children, regardless of which slot number they might be produced at.
|
||||
ctx.send_message(RuntimeApiMessage::Request(
|
||||
block_hash,
|
||||
RuntimeApiRequest::CurrentBabeEpoch(s_tx),
|
||||
|
||||
@@ -1232,8 +1232,8 @@ async fn handle_from_overseer<Context>(
|
||||
);
|
||||
|
||||
// Our first wakeup will just be the tranche of our assignment,
|
||||
// if any. This will likely be superseded by incoming assignments
|
||||
// and approvals which trigger rescheduling.
|
||||
// if any. This will likely be superseded by incoming
|
||||
// assignments and approvals which trigger rescheduling.
|
||||
actions.push(Action::ScheduleWakeup {
|
||||
block_hash: block_batch.block_hash,
|
||||
block_number: block_batch.block_number,
|
||||
@@ -1256,12 +1256,14 @@ async fn handle_from_overseer<Context>(
|
||||
crate::ops::canonicalize(db, block_number, block_hash)
|
||||
.map_err(|e| SubsystemError::with_origin("db", e))?;
|
||||
|
||||
// `prune_finalized_wakeups` prunes all finalized block hashes. We prune spans accordingly.
|
||||
// `prune_finalized_wakeups` prunes all finalized block hashes. We prune spans
|
||||
// accordingly.
|
||||
wakeups.prune_finalized_wakeups(block_number, &mut state.spans);
|
||||
|
||||
// // `prune_finalized_wakeups` prunes all finalized block hashes. We prune spans accordingly.
|
||||
// let hash_set = wakeups.block_numbers.values().flatten().collect::<HashSet<_>>();
|
||||
// state.spans.retain(|hash, _| hash_set.contains(hash));
|
||||
// // `prune_finalized_wakeups` prunes all finalized block hashes. We prune spans
|
||||
// accordingly. let hash_set =
|
||||
// wakeups.block_numbers.values().flatten().collect::<HashSet<_>>(); state.spans.
|
||||
// retain(|hash, _| hash_set.contains(hash));
|
||||
|
||||
Vec::new()
|
||||
},
|
||||
@@ -1403,8 +1405,8 @@ async fn get_approval_signatures_for_candidate<Context>(
|
||||
tx_distribution,
|
||||
));
|
||||
|
||||
// Because of the unbounded sending and the nature of the call (just fetching data from state),
|
||||
// this should not block long:
|
||||
// Because of the unbounded sending and the nature of the call (just fetching data from
|
||||
// state), this should not block long:
|
||||
match rx_distribution.timeout(WAIT_FOR_SIGS_TIMEOUT).await {
|
||||
None => {
|
||||
gum::warn!(
|
||||
@@ -2117,9 +2119,10 @@ impl ApprovalStateTransition {
|
||||
}
|
||||
}
|
||||
|
||||
// Advance the approval state, either by importing an approval vote which is already checked to be valid and corresponding to an assigned
|
||||
// validator on the candidate and block, or by noting that there are no further wakeups or tranches needed. This updates the block entry and candidate entry as
|
||||
// necessary and schedules any further wakeups.
|
||||
// Advance the approval state, either by importing an approval vote which is already checked to be
|
||||
// valid and corresponding to an assigned validator on the candidate and block, or by noting that
|
||||
// there are no further wakeups or tranches needed. This updates the block entry and candidate entry
|
||||
// as necessary and schedules any further wakeups.
|
||||
async fn advance_approval_state<Sender>(
|
||||
sender: &mut Sender,
|
||||
state: &State,
|
||||
@@ -2251,7 +2254,8 @@ where
|
||||
// 1. This is not a local approval, as we don't store anything new in the approval entry.
|
||||
// 2. The candidate is not newly approved, as we haven't altered the approval entry's
|
||||
// approved flag with `mark_approved` above.
|
||||
// 3. The approver, if any, had already approved the candidate, as we haven't altered the bitfield.
|
||||
// 3. The approver, if any, had already approved the candidate, as we haven't altered the
|
||||
// bitfield.
|
||||
if transition.is_local_approval() || newly_approved || !already_approved_by.unwrap_or(true)
|
||||
{
|
||||
// In all other cases, we need to write the candidate entry.
|
||||
@@ -2279,7 +2283,8 @@ fn should_trigger_assignment(
|
||||
&approval_entry,
|
||||
RequiredTranches::All,
|
||||
)
|
||||
.is_approved(Tick::max_value()), // when all are required, we are just waiting for the first 1/3+
|
||||
// when all are required, we are just waiting for the first 1/3+
|
||||
.is_approved(Tick::max_value()),
|
||||
RequiredTranches::Pending { maximum_broadcast, clock_drift, .. } => {
|
||||
let drifted_tranche_now =
|
||||
tranche_now.saturating_sub(clock_drift as DelayTranche);
|
||||
@@ -2615,8 +2620,8 @@ async fn launch_approval<Context>(
|
||||
match val_rx.await {
|
||||
Err(_) => return ApprovalState::failed(validator_index, candidate_hash),
|
||||
Ok(Ok(ValidationResult::Valid(_, _))) => {
|
||||
// Validation checked out. Issue an approval command. If the underlying service is unreachable,
|
||||
// then there isn't anything we can do.
|
||||
// Validation checked out. Issue an approval command. If the underlying service is
|
||||
// unreachable, then there isn't anything we can do.
|
||||
|
||||
gum::trace!(target: LOG_TARGET, ?candidate_hash, ?para_id, "Candidate Valid");
|
||||
|
||||
|
||||
@@ -161,7 +161,8 @@ pub fn canonicalize(
|
||||
}
|
||||
}
|
||||
|
||||
// Update all blocks-at-height keys, deleting all those which now have empty `block_assignments`.
|
||||
// Update all blocks-at-height keys, deleting all those which now have empty
|
||||
// `block_assignments`.
|
||||
for (h, at) in visited_heights.into_iter() {
|
||||
if at.is_empty() {
|
||||
overlay_db.delete_blocks_at_height(h);
|
||||
@@ -170,8 +171,8 @@ pub fn canonicalize(
|
||||
}
|
||||
}
|
||||
|
||||
// due to the fork pruning, this range actually might go too far above where our actual highest block is,
|
||||
// if a relatively short fork is canonicalized.
|
||||
// due to the fork pruning, this range actually might go too far above where our actual highest
|
||||
// block is, if a relatively short fork is canonicalized.
|
||||
// TODO https://github.com/paritytech/polkadot/issues/3389
|
||||
let new_range = StoredBlockRange(canon_number + 1, std::cmp::max(range.1, canon_number + 2));
|
||||
|
||||
|
||||
@@ -67,8 +67,8 @@ const META_PREFIX: &[u8; 4] = b"meta";
|
||||
const UNFINALIZED_PREFIX: &[u8; 11] = b"unfinalized";
|
||||
const PRUNE_BY_TIME_PREFIX: &[u8; 13] = b"prune_by_time";
|
||||
|
||||
// We have some keys we want to map to empty values because existence of the key is enough. We use this because
|
||||
// rocksdb doesn't support empty values.
|
||||
// We have some keys we want to map to empty values because existence of the key is enough. We use
|
||||
// this because rocksdb doesn't support empty values.
|
||||
const TOMBSTONE_VALUE: &[u8] = b" ";
|
||||
|
||||
/// Unavailable blocks are kept for 1 hour.
|
||||
@@ -139,10 +139,11 @@ enum State {
|
||||
/// Candidate data was first observed at the given time but is not available in any block.
|
||||
#[codec(index = 0)]
|
||||
Unavailable(BETimestamp),
|
||||
/// The candidate was first observed at the given time and was included in the given list of unfinalized blocks, which may be
|
||||
/// empty. The timestamp here is not used for pruning. Either one of these blocks will be finalized or the state will regress to
|
||||
/// `State::Unavailable`, in which case the same timestamp will be reused. Blocks are sorted ascending first by block number and
|
||||
/// then hash.
|
||||
/// The candidate was first observed at the given time and was included in the given list of
|
||||
/// unfinalized blocks, which may be empty. The timestamp here is not used for pruning. Either
|
||||
/// one of these blocks will be finalized or the state will regress to `State::Unavailable`, in
|
||||
/// which case the same timestamp will be reused. Blocks are sorted ascending first by block
|
||||
/// number and then hash.
|
||||
#[codec(index = 1)]
|
||||
Unfinalized(BETimestamp, Vec<(BEBlockNumber, Hash)>),
|
||||
/// Candidate data has appeared in a finalized block and did so at the given time.
|
||||
@@ -820,8 +821,8 @@ fn note_block_included(
|
||||
|
||||
match load_meta(db, config, &candidate_hash)? {
|
||||
None => {
|
||||
// This is alarming. We've observed a block being included without ever seeing it backed.
|
||||
// Warn and ignore.
|
||||
// This is alarming. We've observed a block being included without ever seeing it
|
||||
// backed. Warn and ignore.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?candidate_hash,
|
||||
@@ -894,9 +895,9 @@ async fn process_block_finalized<Context>(
|
||||
let mut db_transaction = DBTransaction::new();
|
||||
let (start_prefix, end_prefix) = finalized_block_range(finalized_number);
|
||||
|
||||
// We have to do some juggling here of the `iter` to make sure it doesn't cross the `.await` boundary
|
||||
// as it is not `Send`. That is why we create the iterator once within this loop, drop it,
|
||||
// do an asynchronous request, and then instantiate the exact same iterator again.
|
||||
// We have to do some juggling here of the `iter` to make sure it doesn't cross the `.await`
|
||||
// boundary as it is not `Send`. That is why we create the iterator once within this loop,
|
||||
// drop it, do an asynchronous request, and then instantiate the exact same iterator again.
|
||||
let batch_num = {
|
||||
let mut iter = subsystem
|
||||
.db
|
||||
@@ -961,8 +962,9 @@ async fn process_block_finalized<Context>(
|
||||
|
||||
update_blocks_at_finalized_height(&subsystem, &mut db_transaction, batch, batch_num, now)?;
|
||||
|
||||
// We need to write at the end of the loop so the prefix iterator doesn't pick up the same values again
|
||||
// in the next iteration. Another unfortunate effect of having to re-initialize the iterator.
|
||||
// We need to write at the end of the loop so the prefix iterator doesn't pick up the same
|
||||
// values again in the next iteration. Another unfortunate effect of having to re-initialize
|
||||
// the iterator.
|
||||
subsystem.db.write(db_transaction)?;
|
||||
}
|
||||
|
||||
@@ -1215,7 +1217,8 @@ fn process_message(
|
||||
// We do not bubble up internal errors to caller subsystems, instead the
|
||||
// tx channel is dropped and that error is caught by the caller subsystem.
|
||||
//
|
||||
// We bubble up the specific error here so `av-store` logs still tell what happend.
|
||||
// We bubble up the specific error here so `av-store` logs still tell what
|
||||
// happend.
|
||||
return Err(e.into())
|
||||
},
|
||||
}
|
||||
@@ -1298,8 +1301,8 @@ fn store_available_data(
|
||||
.with_candidate(candidate_hash)
|
||||
.with_pov(&available_data.pov);
|
||||
|
||||
// Important note: This check below is critical for consensus and the `backing` subsystem relies on it to
|
||||
// ensure candidate validity.
|
||||
// Important note: This check below is critical for consensus and the `backing` subsystem relies
|
||||
// on it to ensure candidate validity.
|
||||
let chunks = erasure::obtain_chunks_v1(n_validators, &available_data)?;
|
||||
let branches = erasure::branches(chunks.as_ref());
|
||||
|
||||
|
||||
@@ -422,7 +422,8 @@ struct CandidateBackingJob<Context> {
|
||||
awaiting_validation: HashSet<CandidateHash>,
|
||||
/// Data needed for retrying in case of `ValidatedCandidateCommand::AttestNoPoV`.
|
||||
fallbacks: HashMap<CandidateHash, (AttestingData, Option<jaeger::Span>)>,
|
||||
/// `Some(h)` if this job has already issued `Seconded` statement for some candidate with `h` hash.
|
||||
/// `Some(h)` if this job has already issued `Seconded` statement for some candidate with `h`
|
||||
/// hash.
|
||||
seconded: Option<CandidateHash>,
|
||||
/// The candidates that are includable, by hash. Each entry here indicates
|
||||
/// that we've sent the provisioner the backed candidate.
|
||||
@@ -562,9 +563,10 @@ async fn store_available_data(
|
||||
expected_erasure_root: Hash,
|
||||
) -> Result<(), Error> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
// Important: the `av-store` subsystem will check if the erasure root of the `available_data` matches `expected_erasure_root`
|
||||
// which was provided by the collator in the `CandidateReceipt`. This check is consensus critical and the `backing` subsystem
|
||||
// relies on it for ensuring candidate validity.
|
||||
// Important: the `av-store` subsystem will check if the erasure root of the `available_data`
|
||||
// matches `expected_erasure_root` which was provided by the collator in the `CandidateReceipt`.
|
||||
// This check is consensus critical and the `backing` subsystem relies on it for ensuring
|
||||
// candidate validity.
|
||||
sender
|
||||
.send_message(AvailabilityStoreMessage::StoreAvailableData {
|
||||
candidate_hash,
|
||||
@@ -582,8 +584,8 @@ async fn store_available_data(
|
||||
|
||||
// Make a `PoV` available.
|
||||
//
|
||||
// This calls the AV store to write the available data to storage. The AV store also checks the erasure root matches
|
||||
// the `expected_erasure_root`.
|
||||
// This calls the AV store to write the available data to storage. The AV store also checks the
|
||||
// erasure root matches the `expected_erasure_root`.
|
||||
// This returns `Err()` on erasure root mismatch or due to any AV store subsystem error.
|
||||
//
|
||||
// Otherwise, it returns either `Ok(())`
|
||||
|
||||
@@ -54,7 +54,8 @@ impl Metrics {
|
||||
self.0.as_ref().map(|metrics| metrics.process_statement.start_timer())
|
||||
}
|
||||
|
||||
/// Provide a timer for handling `CandidateBackingMessage::GetBackedCandidates` which observes on drop.
|
||||
/// Provide a timer for handling `CandidateBackingMessage::GetBackedCandidates` which observes
|
||||
/// on drop.
|
||||
pub fn time_get_backed_candidates(
|
||||
&self,
|
||||
) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
|
||||
@@ -84,7 +84,8 @@ impl Default for TestState {
|
||||
];
|
||||
|
||||
let keystore = Arc::new(sc_keystore::LocalKeystore::in_memory());
|
||||
// Make sure `Alice` key is in the keystore, so this mocked node will be a parachain validator.
|
||||
// Make sure `Alice` key is in the keystore, so this mocked node will be a parachain
|
||||
// validator.
|
||||
Keystore::sr25519_generate_new(&*keystore, ValidatorId::ID, Some(&validators[0].to_seed()))
|
||||
.expect("Insert key into keystore");
|
||||
|
||||
|
||||
@@ -137,8 +137,8 @@ async fn get_availability_cores(
|
||||
|
||||
/// - get the list of core states from the runtime
|
||||
/// - for each core, concurrently determine chunk availability (see `get_core_availability`)
|
||||
/// - return the bitfield if there were no errors at any point in this process
|
||||
/// (otherwise, it's prone to false negatives)
|
||||
/// - return the bitfield if there were no errors at any point in this process (otherwise, it's
|
||||
/// prone to false negatives)
|
||||
async fn construct_availability_bitfield(
|
||||
relay_parent: Hash,
|
||||
span: &jaeger::Span,
|
||||
|
||||
@@ -67,15 +67,15 @@ mod tests;
|
||||
|
||||
const LOG_TARGET: &'static str = "parachain::candidate-validation";
|
||||
|
||||
/// The amount of time to wait before retrying after a retry-able backing validation error. We use a lower value for the
|
||||
/// backing case, to fit within the lower backing timeout.
|
||||
/// The amount of time to wait before retrying after a retry-able backing validation error. We use a
|
||||
/// lower value for the backing case, to fit within the lower backing timeout.
|
||||
#[cfg(not(test))]
|
||||
const PVF_BACKING_EXECUTION_RETRY_DELAY: Duration = Duration::from_millis(500);
|
||||
#[cfg(test)]
|
||||
const PVF_BACKING_EXECUTION_RETRY_DELAY: Duration = Duration::from_millis(200);
|
||||
/// The amount of time to wait before retrying after a retry-able approval validation error. We use a higher value for
|
||||
/// the approval case since we have more time, and if we wait longer it is more likely that transient conditions will
|
||||
/// resolve.
|
||||
/// The amount of time to wait before retrying after a retry-able approval validation error. We use
|
||||
/// a higher value for the approval case since we have more time, and if we wait longer it is more
|
||||
/// likely that transient conditions will resolve.
|
||||
#[cfg(not(test))]
|
||||
const PVF_APPROVAL_EXECUTION_RETRY_DELAY: Duration = Duration::from_secs(3);
|
||||
#[cfg(test)]
|
||||
@@ -451,9 +451,9 @@ where
|
||||
const ASSUMPTIONS: &[OccupiedCoreAssumption] = &[
|
||||
OccupiedCoreAssumption::Included,
|
||||
OccupiedCoreAssumption::TimedOut,
|
||||
// `TimedOut` and `Free` both don't perform any speculation and therefore should be the same
|
||||
// for our purposes here. In other words, if `TimedOut` matched then the `Free` must be
|
||||
// matched as well.
|
||||
// `TimedOut` and `Free` both don't perform any speculation and therefore should be the
|
||||
// same for our purposes here. In other words, if `TimedOut` matched then the `Free` must
|
||||
// be matched as well.
|
||||
];
|
||||
|
||||
// Consider running these checks in parallel to reduce validation latency.
|
||||
@@ -482,9 +482,10 @@ where
|
||||
AssumptionCheckOutcome::Matches(validation_data, validation_code) =>
|
||||
Ok(Some((validation_data, validation_code))),
|
||||
AssumptionCheckOutcome::DoesNotMatch => {
|
||||
// If neither the assumption of the occupied core having the para included or the assumption
|
||||
// of the occupied core timing out are valid, then the persisted_validation_data_hash in the descriptor
|
||||
// is not based on the relay parent and is thus invalid.
|
||||
// If neither the assumption of the occupied core having the para included or the
|
||||
// assumption of the occupied core timing out are valid, then the
|
||||
// persisted_validation_data_hash in the descriptor is not based on the relay parent and
|
||||
// is thus invalid.
|
||||
Ok(None)
|
||||
},
|
||||
AssumptionCheckOutcome::BadRequest =>
|
||||
@@ -704,7 +705,8 @@ where
|
||||
"Invalid candidate (commitments hash)"
|
||||
);
|
||||
|
||||
// If validation produced a new set of commitments, we treat the candidate as invalid.
|
||||
// If validation produced a new set of commitments, we treat the candidate as
|
||||
// invalid.
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::CommitmentsHashMismatch))
|
||||
} else {
|
||||
Ok(ValidationResult::Valid(outputs, persisted_validation_data))
|
||||
@@ -744,7 +746,8 @@ trait ValidationBackend {
|
||||
prep_timeout,
|
||||
PrepareJobKind::Compilation,
|
||||
);
|
||||
// We keep track of the total time that has passed and stop retrying if we are taking too long.
|
||||
// We keep track of the total time that has passed and stop retrying if we are taking too
|
||||
// long.
|
||||
let total_time_start = Instant::now();
|
||||
|
||||
let mut validation_result =
|
||||
@@ -780,8 +783,8 @@ trait ValidationBackend {
|
||||
_ => break,
|
||||
}
|
||||
|
||||
// If we got a possibly transient error, retry once after a brief delay, on the assumption
|
||||
// that the conditions that caused this error may have resolved on their own.
|
||||
// If we got a possibly transient error, retry once after a brief delay, on the
|
||||
// assumption that the conditions that caused this error may have resolved on their own.
|
||||
{
|
||||
// Wait a brief delay before retrying.
|
||||
futures_timer::Delay::new(retry_delay).await;
|
||||
|
||||
@@ -44,13 +44,15 @@ mod tree;
|
||||
mod tests;
|
||||
|
||||
const LOG_TARGET: &str = "parachain::chain-selection";
|
||||
/// Timestamp based on the 1 Jan 1970 UNIX base, which is persistent across node restarts and OS reboots.
|
||||
/// Timestamp based on the 1 Jan 1970 UNIX base, which is persistent across node restarts and OS
|
||||
/// reboots.
|
||||
type Timestamp = u64;
|
||||
|
||||
// If a block isn't approved in 120 seconds, nodes will abandon it
|
||||
// and begin building on another chain.
|
||||
const STAGNANT_TIMEOUT: Timestamp = 120;
|
||||
// Delay prunning of the stagnant keys in prune only mode by 25 hours to avoid interception with the finality
|
||||
// Delay prunning of the stagnant keys in prune only mode by 25 hours to avoid interception with the
|
||||
// finality
|
||||
const STAGNANT_PRUNE_DELAY: Timestamp = 25 * 60 * 60;
|
||||
// Maximum number of stagnant entries cleaned during one `STAGNANT_TIMEOUT` iteration
|
||||
const MAX_STAGNANT_ENTRIES: usize = 1000;
|
||||
|
||||
@@ -52,8 +52,8 @@ const CLEANED_VOTES_WATERMARK_KEY: &[u8; 23] = b"cleaned-votes-watermark";
|
||||
/// this should not be done at once, but rather in smaller batches so nodes won't get stalled by
|
||||
/// this.
|
||||
///
|
||||
/// 300 is with session duration of 1 hour and 30 parachains around <3_000_000 key purges in the worst
|
||||
/// case. Which is already quite a lot, at the same time we have around 21_000 sessions on
|
||||
/// 300 is with session duration of 1 hour and 30 parachains around <3_000_000 key purges in the
|
||||
/// worst case. Which is already quite a lot, at the same time we have around 21_000 sessions on
|
||||
/// Kusama. This means at 300 purged sessions per session, cleaning everything up will take
|
||||
/// around 3 days. Depending on how severe disk usage becomes, we might want to bump the batch
|
||||
/// size, at the cost of risking issues at session boundaries (performance).
|
||||
@@ -346,7 +346,8 @@ pub(crate) fn note_earliest_session(
|
||||
|
||||
if pruned_disputes.len() != 0 {
|
||||
overlay_db.write_recent_disputes(new_recent_disputes);
|
||||
// Note: Deleting old candidate votes is handled in `write` based on the earliest session.
|
||||
// Note: Deleting old candidate votes is handled in `write` based on the
|
||||
// earliest session.
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -19,12 +19,12 @@
|
||||
//! This module encapsulates the actual logic for importing new votes and provides easy access of
|
||||
//! the current state for votes for a particular candidate.
|
||||
//!
|
||||
//! In particular there is `CandidateVoteState` which tells what can be concluded for a particular set of
|
||||
//! votes. E.g. whether a dispute is ongoing, whether it is confirmed, concluded, ..
|
||||
//! In particular there is `CandidateVoteState` which tells what can be concluded for a particular
|
||||
//! set of votes. E.g. whether a dispute is ongoing, whether it is confirmed, concluded, ..
|
||||
//!
|
||||
//! Then there is `ImportResult` which reveals information about what changed once additional votes
|
||||
//! got imported on top of an existing `CandidateVoteState` and reveals "dynamic" information, like whether
|
||||
//! due to the import a dispute was raised/got confirmed, ...
|
||||
//! got imported on top of an existing `CandidateVoteState` and reveals "dynamic" information, like
|
||||
//! whether due to the import a dispute was raised/got confirmed, ...
|
||||
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
|
||||
|
||||
@@ -92,8 +92,8 @@ pub struct InitialData {
|
||||
pub(crate) struct Initialized {
|
||||
keystore: Arc<LocalKeystore>,
|
||||
runtime_info: RuntimeInfo,
|
||||
/// This is the highest `SessionIndex` seen via `ActiveLeavesUpdate`. It doesn't matter if it was
|
||||
/// cached successfully or not. It is used to detect ancient disputes.
|
||||
/// This is the highest `SessionIndex` seen via `ActiveLeavesUpdate`. It doesn't matter if it
|
||||
/// was cached successfully or not. It is used to detect ancient disputes.
|
||||
highest_session_seen: SessionIndex,
|
||||
/// Will be set to `true` if an error occured during the last caching attempt
|
||||
gaps_in_cache: bool,
|
||||
@@ -308,8 +308,8 @@ impl Initialized {
|
||||
Ok(session_idx)
|
||||
if self.gaps_in_cache || session_idx > self.highest_session_seen =>
|
||||
{
|
||||
// Fetch the last `DISPUTE_WINDOW` number of sessions unless there are no gaps in
|
||||
// cache and we are not missing too many `SessionInfo`s
|
||||
// Fetch the last `DISPUTE_WINDOW` number of sessions unless there are no gaps
|
||||
// in cache and we are not missing too many `SessionInfo`s
|
||||
let mut lower_bound = session_idx.saturating_sub(DISPUTE_WINDOW.get() - 1);
|
||||
if !self.gaps_in_cache && self.highest_session_seen > lower_bound {
|
||||
lower_bound = self.highest_session_seen + 1
|
||||
@@ -1133,8 +1133,8 @@ impl Initialized {
|
||||
}
|
||||
|
||||
// Participate in dispute if we did not cast a vote before and actually have keys to cast a
|
||||
// local vote. Disputes should fall in one of the categories below, otherwise we will refrain
|
||||
// from participation:
|
||||
// local vote. Disputes should fall in one of the categories below, otherwise we will
|
||||
// refrain from participation:
|
||||
// - `is_included` lands in prioritised queue
|
||||
// - `is_confirmed` | `is_backed` lands in best effort queue
|
||||
// We don't participate in disputes on finalized candidates.
|
||||
|
||||
@@ -17,12 +17,13 @@
|
||||
//! Implements the dispute coordinator subsystem.
|
||||
//!
|
||||
//! This is the central subsystem of the node-side components which participate in disputes.
|
||||
//! This subsystem wraps a database which tracks all statements observed by all validators over some window of sessions.
|
||||
//! Votes older than this session window are pruned.
|
||||
//! This subsystem wraps a database which tracks all statements observed by all validators over some
|
||||
//! window of sessions. Votes older than this session window are pruned.
|
||||
//!
|
||||
//! This subsystem will be the point which produce dispute votes, either positive or negative, based on locally-observed
|
||||
//! validation results as well as a sink for votes received by other subsystems. When importing a dispute vote from
|
||||
//! another node, this will trigger dispute participation to recover and validate the block.
|
||||
//! This subsystem will be the point which produce dispute votes, either positive or negative, based
|
||||
//! on locally-observed validation results as well as a sink for votes received by other subsystems.
|
||||
//! When importing a dispute vote from another node, this will trigger dispute participation to
|
||||
//! recover and validate the block.
|
||||
|
||||
use std::{num::NonZeroUsize, sync::Arc};
|
||||
|
||||
@@ -92,10 +93,10 @@ mod spam_slots;
|
||||
|
||||
/// Handling of participation requests via `Participation`.
|
||||
///
|
||||
/// `Participation` provides an API (`Participation::queue_participation`) for queuing of dispute participations and will process those
|
||||
/// participation requests, such that most important/urgent disputes will be resolved and processed
|
||||
/// first and more importantly it will order requests in a way so disputes will get resolved, even
|
||||
/// if there are lots of them.
|
||||
/// `Participation` provides an API (`Participation::queue_participation`) for queuing of dispute
|
||||
/// participations and will process those participation requests, such that most important/urgent
|
||||
/// disputes will be resolved and processed first and more importantly it will order requests in a
|
||||
/// way so disputes will get resolved, even if there are lots of them.
|
||||
pub(crate) mod participation;
|
||||
|
||||
/// Pure processing of vote imports.
|
||||
|
||||
@@ -294,8 +294,8 @@ impl Queues {
|
||||
return Self::pop_impl(&mut self.priority)
|
||||
}
|
||||
|
||||
// `pop_best_effort` and `pop_priority` do the same but on different `BTreeMap`s. This function has
|
||||
// the extracted implementation
|
||||
// `pop_best_effort` and `pop_priority` do the same but on different `BTreeMap`s. This function
|
||||
// has the extracted implementation
|
||||
fn pop_impl(
|
||||
target: &mut BTreeMap<CandidateComparator, ParticipationRequest>,
|
||||
) -> Option<(CandidateComparator, ParticipationRequest)> {
|
||||
@@ -331,9 +331,10 @@ impl Queues {
|
||||
#[derive(Copy, Clone)]
|
||||
#[cfg_attr(test, derive(Debug))]
|
||||
struct CandidateComparator {
|
||||
/// Block number of the relay parent. It's wrapped in an `Option<>` because there are cases when
|
||||
/// it can't be obtained. For example when the node is lagging behind and new leaves are received
|
||||
/// with a slight delay. Candidates with unknown relay parent are treated with the lowest priority.
|
||||
/// Block number of the relay parent. It's wrapped in an `Option<>` because there are cases
|
||||
/// when it can't be obtained. For example when the node is lagging behind and new leaves are
|
||||
/// received with a slight delay. Candidates with unknown relay parent are treated with the
|
||||
/// lowest priority.
|
||||
///
|
||||
/// The order enforced by `CandidateComparator` is important because we want to participate in
|
||||
/// the oldest disputes first.
|
||||
@@ -346,9 +347,10 @@ struct CandidateComparator {
|
||||
/// that is not stable. If a new fork appears after the fact, we would start ordering the same
|
||||
/// candidate differently, which would result in the same candidate getting queued twice.
|
||||
relay_parent_block_number: Option<BlockNumber>,
|
||||
/// By adding the `CandidateHash`, we can guarantee a unique ordering across candidates with the
|
||||
/// same relay parent block number. Candidates without `relay_parent_block_number` are ordered by
|
||||
/// the `candidate_hash` (and treated with the lowest priority, as already mentioned).
|
||||
/// By adding the `CandidateHash`, we can guarantee a unique ordering across candidates with
|
||||
/// the same relay parent block number. Candidates without `relay_parent_block_number` are
|
||||
/// ordered by the `candidate_hash` (and treated with the lowest priority, as already
|
||||
/// mentioned).
|
||||
candidate_hash: CandidateHash,
|
||||
}
|
||||
|
||||
@@ -364,11 +366,11 @@ impl CandidateComparator {
|
||||
/// Create a candidate comparator for a given candidate.
|
||||
///
|
||||
/// Returns:
|
||||
/// - `Ok(CandidateComparator{Some(relay_parent_block_number), candidate_hash})` when the
|
||||
/// - `Ok(CandidateComparator{Some(relay_parent_block_number), candidate_hash})` when the
|
||||
/// relay parent can be obtained. This is the happy case.
|
||||
/// - `Ok(CandidateComparator{None, candidate_hash})` in case the candidate's relay parent
|
||||
/// can't be obtained.
|
||||
/// - `FatalError` in case the chain API call fails with an unexpected error.
|
||||
/// - `FatalError` in case the chain API call fails with an unexpected error.
|
||||
pub async fn new(
|
||||
sender: &mut impl overseer::DisputeCoordinatorSenderTrait,
|
||||
candidate: &CandidateReceipt,
|
||||
|
||||
@@ -53,8 +53,8 @@ fn clone_request(request: &ParticipationRequest) -> ParticipationRequest {
|
||||
/// Check that dequeuing acknowledges order.
|
||||
///
|
||||
/// Any priority item will be dequeued before any best effort items, priority and best effort with
|
||||
/// known parent block number items will be processed in order. Best effort items without known parent
|
||||
/// block number should be treated with lowest priority.
|
||||
/// known parent block number items will be processed in order. Best effort items without known
|
||||
/// parent block number should be treated with lowest priority.
|
||||
#[test]
|
||||
fn ordering_works_as_expected() {
|
||||
let metrics = Metrics::default();
|
||||
|
||||
@@ -305,7 +305,8 @@ fn reqs_get_queued_on_no_recent_block() {
|
||||
|
||||
// Responds to messages from the test and verifies its behaviour
|
||||
let request_handler = async {
|
||||
// If we receive `BlockNumber` request this implicitly proves that the participation is queued
|
||||
// If we receive `BlockNumber` request this implicitly proves that the participation is
|
||||
// queued
|
||||
assert_matches!(
|
||||
ctx_handle.recv().await,
|
||||
AllMessages::ChainApi(ChainApiMessage::BlockNumber(_, tx)) => {
|
||||
|
||||
@@ -98,7 +98,8 @@ mod ref_counted_candidates_tests {
|
||||
/// Keeps track of scraped candidates. Supports `insert`, `remove_up_to_height` and `contains`
|
||||
/// operations.
|
||||
pub struct ScrapedCandidates {
|
||||
/// Main data structure which keeps the candidates we know about. `contains` does lookups only here.
|
||||
/// Main data structure which keeps the candidates we know about. `contains` does lookups only
|
||||
/// here.
|
||||
candidates: RefCountedCandidates,
|
||||
/// Keeps track at which block number a candidate was inserted. Used in `remove_up_to_height`.
|
||||
/// Without this tracking we won't be able to remove all candidates before block X.
|
||||
@@ -117,7 +118,8 @@ impl ScrapedCandidates {
|
||||
self.candidates.contains(candidate_hash)
|
||||
}
|
||||
|
||||
// Removes all candidates up to a given height. The candidates at the block height are NOT removed.
|
||||
// Removes all candidates up to a given height. The candidates at the block height are NOT
|
||||
// removed.
|
||||
pub fn remove_up_to_height(&mut self, height: &BlockNumber) -> HashSet<CandidateHash> {
|
||||
let mut candidates_modified: HashSet<CandidateHash> = HashSet::new();
|
||||
let not_stale = self.candidates_by_block_number.split_off(&height);
|
||||
|
||||
@@ -120,7 +120,8 @@ impl Inclusions {
|
||||
) {
|
||||
for candidate in candidates_modified {
|
||||
if let Some(blocks_including) = self.inclusions_inner.get_mut(&candidate) {
|
||||
// Returns everything after the given key, including the key. This works because the blocks are sorted in ascending order.
|
||||
// Returns everything after the given key, including the key. This works because the
|
||||
// blocks are sorted in ascending order.
|
||||
*blocks_including = blocks_including.split_off(height);
|
||||
}
|
||||
}
|
||||
@@ -150,8 +151,8 @@ impl Inclusions {
|
||||
///
|
||||
/// Concretely:
|
||||
///
|
||||
/// - Monitors for `CandidateIncluded` events to keep track of candidates that have been
|
||||
/// included on chains.
|
||||
/// - Monitors for `CandidateIncluded` events to keep track of candidates that have been included on
|
||||
/// chains.
|
||||
/// - Monitors for `CandidateBacked` events to keep track of all backed candidates.
|
||||
/// - Calls `FetchOnChainVotes` for each block to gather potentially missed votes from chain.
|
||||
///
|
||||
@@ -294,11 +295,11 @@ impl ChainScraper {
|
||||
|
||||
/// Prune finalized candidates.
|
||||
///
|
||||
/// We keep each candidate for `DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION` blocks after finalization.
|
||||
/// After that we treat it as low priority.
|
||||
/// We keep each candidate for `DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION` blocks after
|
||||
/// finalization. After that we treat it as low priority.
|
||||
pub fn process_finalized_block(&mut self, finalized_block_number: &BlockNumber) {
|
||||
// `DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION - 1` because `finalized_block_number`counts to the
|
||||
// candidate lifetime.
|
||||
// `DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION - 1` because
|
||||
// `finalized_block_number`counts to the candidate lifetime.
|
||||
match finalized_block_number.checked_sub(DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION - 1)
|
||||
{
|
||||
Some(key_to_prune) => {
|
||||
|
||||
@@ -183,7 +183,8 @@ fn get_backed_candidate_event(block_number: BlockNumber) -> Vec<CandidateEvent>
|
||||
GroupIndex::from(0),
|
||||
)]
|
||||
}
|
||||
/// Hash for a 'magic' candidate. This is meant to be a special candidate used to verify special cases.
|
||||
/// Hash for a 'magic' candidate. This is meant to be a special candidate used to verify special
|
||||
/// cases.
|
||||
fn get_magic_candidate_hash() -> Hash {
|
||||
BlakeTwo256::hash(&"abc".encode())
|
||||
}
|
||||
@@ -425,7 +426,7 @@ fn scraper_requests_candidates_of_non_finalized_ancestors() {
|
||||
&chain,
|
||||
finalized_block_number,
|
||||
BLOCKS_TO_SKIP -
|
||||
(finalized_block_number - DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION) as usize, // Expect the provider not to go past finalized block.
|
||||
(finalized_block_number - DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION) as usize, /* Expect the provider not to go past finalized block. */
|
||||
get_backed_and_included_candidate_events,
|
||||
);
|
||||
join(process_active_leaves_update(ctx.sender(), &mut ordering, next_update), overseer_fut)
|
||||
@@ -468,7 +469,8 @@ fn scraper_prunes_finalized_candidates() {
|
||||
|
||||
let candidate = make_candidate_receipt(get_block_number_hash(TEST_TARGET_BLOCK_NUMBER));
|
||||
|
||||
// After `DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION` blocks the candidate should be removed
|
||||
// After `DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION` blocks the candidate should be
|
||||
// removed
|
||||
finalized_block_number =
|
||||
TEST_TARGET_BLOCK_NUMBER + DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION;
|
||||
process_finalized_block(&mut scraper, &finalized_block_number);
|
||||
@@ -518,8 +520,9 @@ fn scraper_handles_backed_but_not_included_candidate() {
|
||||
finalized_block_number += 1;
|
||||
process_finalized_block(&mut scraper, &finalized_block_number);
|
||||
|
||||
// `FIRST_TEST_BLOCK` is finalized, which is within `BACKED_CANDIDATE_LIFETIME_AFTER_FINALIZATION` window.
|
||||
// The candidate should still be backed.
|
||||
// `FIRST_TEST_BLOCK` is finalized, which is within
|
||||
// `BACKED_CANDIDATE_LIFETIME_AFTER_FINALIZATION` window. The candidate should still be
|
||||
// backed.
|
||||
let candidate = make_candidate_receipt(get_block_number_hash(TEST_TARGET_BLOCK_NUMBER));
|
||||
assert!(!scraper.is_candidate_included(&candidate.hash()));
|
||||
assert!(scraper.is_candidate_backed(&candidate.hash()));
|
||||
@@ -576,7 +579,8 @@ fn scraper_handles_the_same_candidate_incuded_in_two_different_block_heights() {
|
||||
.await;
|
||||
|
||||
// Finalize blocks to enforce pruning of scraped events.
|
||||
// The magic candidate was added twice, so it shouldn't be removed if we finalize two more blocks.
|
||||
// The magic candidate was added twice, so it shouldn't be removed if we finalize two more
|
||||
// blocks.
|
||||
finalized_block_number = test_targets.first().expect("there are two block nums") +
|
||||
DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION;
|
||||
process_finalized_block(&mut scraper, &finalized_block_number);
|
||||
@@ -641,7 +645,8 @@ fn inclusions_per_candidate_properly_adds_and_prunes() {
|
||||
])
|
||||
);
|
||||
|
||||
// After `DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION` blocks the earlier inclusion should be removed
|
||||
// After `DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION` blocks the earlier inclusion should
|
||||
// be removed
|
||||
finalized_block_number =
|
||||
TEST_TARGET_BLOCK_NUMBER + DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION;
|
||||
process_finalized_block(&mut scraper, &finalized_block_number);
|
||||
|
||||
@@ -734,8 +734,9 @@ fn too_many_unconfirmed_statements_are_considered_spam() {
|
||||
.await;
|
||||
|
||||
// Participation has to fail here, otherwise the dispute will be confirmed. However
|
||||
// participation won't happen at all because the dispute is neither backed, not confirmed
|
||||
// nor the candidate is included. Or in other words - we'll refrain from participation.
|
||||
// participation won't happen at all because the dispute is neither backed, not
|
||||
// confirmed nor the candidate is included. Or in other words - we'll refrain from
|
||||
// participation.
|
||||
|
||||
{
|
||||
let (tx, rx) = oneshot::channel();
|
||||
@@ -2050,7 +2051,8 @@ fn concluded_supermajority_against_non_active_after_time() {
|
||||
ImportStatementsResult::ValidImport => {}
|
||||
);
|
||||
|
||||
// Use a different expected commitments hash to ensure the candidate validation returns invalid.
|
||||
// Use a different expected commitments hash to ensure the candidate validation returns
|
||||
// invalid.
|
||||
participation_with_distribution(
|
||||
&mut virtual_overseer,
|
||||
&candidate_hash,
|
||||
@@ -2351,7 +2353,8 @@ fn resume_dispute_with_local_statement() {
|
||||
|
||||
assert_eq!(messages.len(), 1, "A message should have gone out.");
|
||||
|
||||
// Assert that subsystem is not sending Participation messages because we issued a local statement
|
||||
// Assert that subsystem is not sending Participation messages because we issued a local
|
||||
// statement
|
||||
assert!(virtual_overseer.recv().timeout(TEST_TIMEOUT).await.is_none());
|
||||
|
||||
virtual_overseer.send(FromOrchestra::Signal(OverseerSignal::Conclude)).await;
|
||||
@@ -2445,7 +2448,8 @@ fn resume_dispute_without_local_statement_or_local_key() {
|
||||
Box::pin(async move {
|
||||
test_state.handle_resume_sync(&mut virtual_overseer, session).await;
|
||||
|
||||
// Assert that subsystem is not sending Participation messages because we issued a local statement
|
||||
// Assert that subsystem is not sending Participation messages because we issued a
|
||||
// local statement
|
||||
assert!(virtual_overseer.recv().timeout(TEST_TIMEOUT).await.is_none());
|
||||
|
||||
virtual_overseer.send(FromOrchestra::Signal(OverseerSignal::Conclude)).await;
|
||||
@@ -2751,7 +2755,8 @@ fn redundant_votes_ignored() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// Make sure no disputes are recorded when there are no opposing votes, even if we reached supermajority.
|
||||
/// Make sure no disputes are recorded when there are no opposing votes, even if we reached
|
||||
/// supermajority.
|
||||
fn no_onesided_disputes() {
|
||||
test_harness(|mut test_state, mut virtual_overseer| {
|
||||
Box::pin(async move {
|
||||
@@ -3124,16 +3129,17 @@ fn participation_requests_reprioritized_for_newly_included() {
|
||||
candidate_receipt.descriptor.pov_hash = Hash::from(
|
||||
[repetition; 32], // Altering this receipt so its hash will be changed
|
||||
);
|
||||
// Set consecutive parents (starting from zero). They will order the candidates for participation.
|
||||
// Set consecutive parents (starting from zero). They will order the candidates for
|
||||
// participation.
|
||||
let parent_block_num: BlockNumber = repetition as BlockNumber - 1;
|
||||
candidate_receipt.descriptor.relay_parent =
|
||||
test_state.block_num_to_header.get(&parent_block_num).unwrap().clone();
|
||||
receipts.push(candidate_receipt.clone());
|
||||
}
|
||||
|
||||
// Mark all candidates as backed, so their participation requests make it to best effort.
|
||||
// These calls must all occur before including the candidates due to test overseer
|
||||
// oddities.
|
||||
// Mark all candidates as backed, so their participation requests make it to best
|
||||
// effort. These calls must all occur before including the candidates due to test
|
||||
// overseer oddities.
|
||||
let mut candidate_events = Vec::new();
|
||||
for r in receipts.iter() {
|
||||
candidate_events.push(make_candidate_backed_event(r.clone()))
|
||||
@@ -3172,7 +3178,8 @@ fn participation_requests_reprioritized_for_newly_included() {
|
||||
.await;
|
||||
|
||||
// Handle corresponding messages to unblock import
|
||||
// we need to handle `ApprovalVotingMessage::GetApprovalSignaturesForCandidate` for import
|
||||
// we need to handle `ApprovalVotingMessage::GetApprovalSignaturesForCandidate` for
|
||||
// import
|
||||
handle_approval_vote_request(
|
||||
&mut virtual_overseer,
|
||||
&candidate_hash,
|
||||
@@ -3180,8 +3187,9 @@ fn participation_requests_reprioritized_for_newly_included() {
|
||||
)
|
||||
.await;
|
||||
|
||||
// We'll trigger participation for the first `MAX_PARALLEL_PARTICIPATIONS` candidates.
|
||||
// The rest will be queued => we need to handle `ChainApiMessage::BlockNumber` for them.
|
||||
// We'll trigger participation for the first `MAX_PARALLEL_PARTICIPATIONS`
|
||||
// candidates. The rest will be queued => we need to handle
|
||||
// `ChainApiMessage::BlockNumber` for them.
|
||||
if idx >= crate::participation::MAX_PARALLEL_PARTICIPATIONS {
|
||||
// We send the `idx` as parent block number, because it is used for ordering.
|
||||
// This way we get predictable ordering and participation.
|
||||
@@ -3201,11 +3209,13 @@ fn participation_requests_reprioritized_for_newly_included() {
|
||||
)
|
||||
.await;
|
||||
|
||||
// NB: The checks below are a bit racy. In theory candidate 2 can be processed even before candidate 0 and this is okay. If any
|
||||
// of the asserts in the two functions after this comment fail -> rework `participation_with_distribution` to expect a set of
|
||||
// NB: The checks below are a bit racy. In theory candidate 2 can be processed even
|
||||
// before candidate 0 and this is okay. If any of the asserts in the two functions after
|
||||
// this comment fail -> rework `participation_with_distribution` to expect a set of
|
||||
// commitment hashes instead of just one.
|
||||
|
||||
// This is the candidate for which participation was started initially (`MAX_PARALLEL_PARTICIPATIONS` threshold was not yet hit)
|
||||
// This is the candidate for which participation was started initially
|
||||
// (`MAX_PARALLEL_PARTICIPATIONS` threshold was not yet hit)
|
||||
participation_with_distribution(
|
||||
&mut virtual_overseer,
|
||||
&receipts.get(0).expect("There is more than one candidate").hash(),
|
||||
@@ -3326,7 +3336,8 @@ fn informs_chain_selection_when_dispute_concluded_against() {
|
||||
ImportStatementsResult::ValidImport => {}
|
||||
);
|
||||
|
||||
// Use a different expected commitments hash to ensure the candidate validation returns invalid.
|
||||
// Use a different expected commitments hash to ensure the candidate validation returns
|
||||
// invalid.
|
||||
participation_with_distribution(
|
||||
&mut virtual_overseer,
|
||||
&candidate_hash,
|
||||
@@ -3440,7 +3451,8 @@ fn session_info_is_requested_only_once() {
|
||||
|
||||
test_state.handle_resume_sync(&mut virtual_overseer, session).await;
|
||||
|
||||
// This leaf activation shouldn't fetch `SessionInfo` because the session is already cached
|
||||
// This leaf activation shouldn't fetch `SessionInfo` because the session is already
|
||||
// cached
|
||||
test_state
|
||||
.activate_leaf_at_session(
|
||||
&mut virtual_overseer,
|
||||
@@ -3475,8 +3487,8 @@ fn session_info_is_requested_only_once() {
|
||||
});
|
||||
}
|
||||
|
||||
// Big jump means the new session we see with a leaf update is at least a `DISPUTE_WINDOW` bigger than
|
||||
// the already known one. In this case The whole `DISPUTE_WINDOW` should be fetched.
|
||||
// Big jump means the new session we see with a leaf update is at least a `DISPUTE_WINDOW` bigger
|
||||
// than the already known one. In this case The whole `DISPUTE_WINDOW` should be fetched.
|
||||
#[test]
|
||||
fn session_info_big_jump_works() {
|
||||
test_harness(|mut test_state, mut virtual_overseer| {
|
||||
@@ -3485,7 +3497,8 @@ fn session_info_big_jump_works() {
|
||||
|
||||
test_state.handle_resume_sync(&mut virtual_overseer, session_on_startup).await;
|
||||
|
||||
// This leaf activation shouldn't fetch `SessionInfo` because the session is already cached
|
||||
// This leaf activation shouldn't fetch `SessionInfo` because the session is already
|
||||
// cached
|
||||
test_state
|
||||
.activate_leaf_at_session(
|
||||
&mut virtual_overseer,
|
||||
@@ -3525,8 +3538,8 @@ fn session_info_big_jump_works() {
|
||||
});
|
||||
}
|
||||
|
||||
// Small jump means the new session we see with a leaf update is at less than last known one + `DISPUTE_WINDOW`. In this
|
||||
// case fetching should start from last known one + 1.
|
||||
// Small jump means the new session we see with a leaf update is at less than last known one +
|
||||
// `DISPUTE_WINDOW`. In this case fetching should start from last known one + 1.
|
||||
#[test]
|
||||
fn session_info_small_jump_works() {
|
||||
test_harness(|mut test_state, mut virtual_overseer| {
|
||||
@@ -3535,7 +3548,8 @@ fn session_info_small_jump_works() {
|
||||
|
||||
test_state.handle_resume_sync(&mut virtual_overseer, session_on_startup).await;
|
||||
|
||||
// This leaf activation shouldn't fetch `SessionInfo` because the session is already cached
|
||||
// This leaf activation shouldn't fetch `SessionInfo` because the session is already
|
||||
// cached
|
||||
test_state
|
||||
.activate_leaf_at_session(
|
||||
&mut virtual_overseer,
|
||||
|
||||
@@ -16,11 +16,12 @@
|
||||
|
||||
//! The parachain inherent data provider
|
||||
//!
|
||||
//! Parachain backing and approval is an off-chain process, but the parachain needs to progress on chain as well. To
|
||||
//! make it progress on chain a block producer needs to forward information about the state of a parachain to the
|
||||
//! runtime. This information is forwarded through an inherent to the runtime. Here we provide the
|
||||
//! [`ParachainInherentDataProvider`] that requests the relevant data from the provisioner subsystem and creates the
|
||||
//! the inherent data that the runtime will use to create an inherent.
|
||||
//! Parachain backing and approval is an off-chain process, but the parachain needs to progress on
|
||||
//! chain as well. To make it progress on chain a block producer needs to forward information about
|
||||
//! the state of a parachain to the runtime. This information is forwarded through an inherent to
|
||||
//! the runtime. Here we provide the [`ParachainInherentDataProvider`] that requests the relevant
|
||||
//! data from the provisioner subsystem and creates the the inherent data that the runtime will use
|
||||
//! to create an inherent.
|
||||
|
||||
#![deny(unused_crate_dependencies, unused_results)]
|
||||
|
||||
|
||||
@@ -14,7 +14,8 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! The disputes module is responsible for selecting dispute votes to be sent with the inherent data.
|
||||
//! The disputes module is responsible for selecting dispute votes to be sent with the inherent
|
||||
//! data.
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use futures::channel::oneshot;
|
||||
@@ -22,7 +23,8 @@ use polkadot_node_primitives::CandidateVotes;
|
||||
use polkadot_node_subsystem::{messages::DisputeCoordinatorMessage, overseer};
|
||||
use polkadot_primitives::{CandidateHash, SessionIndex};
|
||||
|
||||
/// Request the relevant dispute statements for a set of disputes identified by `CandidateHash` and the `SessionIndex`.
|
||||
/// Request the relevant dispute statements for a set of disputes identified by `CandidateHash` and
|
||||
/// the `SessionIndex`.
|
||||
async fn request_votes(
|
||||
sender: &mut impl overseer::ProvisionerSenderTrait,
|
||||
disputes_to_query: Vec<(SessionIndex, CandidateHash)>,
|
||||
|
||||
@@ -48,7 +48,8 @@ pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200;
|
||||
/// Controls how much dispute votes to be fetched from the `dispute-coordinator` per iteration in
|
||||
/// `fn vote_selection`. The purpose is to fetch the votes in batches until
|
||||
/// `MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME` is reached. If all votes are fetched in single call
|
||||
/// we might fetch votes which we never use. This will create unnecessary load on `dispute-coordinator`.
|
||||
/// we might fetch votes which we never use. This will create unnecessary load on
|
||||
/// `dispute-coordinator`.
|
||||
///
|
||||
/// This value should be less than `MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME`. Increase it in case
|
||||
/// `provisioner` sends too many `QueryCandidateVotes` messages to `dispite-coordinator`.
|
||||
@@ -68,22 +69,23 @@ const VOTES_SELECTION_BATCH_SIZE: usize = 11;
|
||||
/// * Offchain vs Onchain
|
||||
/// * Concluded onchain vs Unconcluded onchain
|
||||
///
|
||||
/// Provisioner fetches all disputes from `dispute-coordinator` and separates them in multiple partitions.
|
||||
/// Please refer to `struct PartitionedDisputes` for details about the actual partitions.
|
||||
/// Each partition has got a priority implicitly assigned to it and the disputes are selected based on this
|
||||
/// priority (e.g. disputes in partition 1, then if there is space - disputes from partition 2 and so on).
|
||||
/// Provisioner fetches all disputes from `dispute-coordinator` and separates them in multiple
|
||||
/// partitions. Please refer to `struct PartitionedDisputes` for details about the actual
|
||||
/// partitions. Each partition has got a priority implicitly assigned to it and the disputes are
|
||||
/// selected based on this priority (e.g. disputes in partition 1, then if there is space - disputes
|
||||
/// from partition 2 and so on).
|
||||
///
|
||||
/// # Votes selection
|
||||
///
|
||||
/// Besides the prioritization described above the votes in each partition are filtered too. Provisioner
|
||||
/// fetches all onchain votes and filters them out from all partitions. As a result the Runtime receives
|
||||
/// only fresh votes (votes it didn't know about).
|
||||
/// Besides the prioritization described above the votes in each partition are filtered too.
|
||||
/// Provisioner fetches all onchain votes and filters them out from all partitions. As a result the
|
||||
/// Runtime receives only fresh votes (votes it didn't know about).
|
||||
///
|
||||
/// # How the onchain votes are fetched
|
||||
///
|
||||
/// The logic outlined above relies on `RuntimeApiRequest::Disputes` message from the Runtime. The user
|
||||
/// check the Runtime version before calling `select_disputes`. If the function is used with old runtime
|
||||
/// an error is logged and the logic will continue with empty onchain votes `HashMap`.
|
||||
/// The logic outlined above relies on `RuntimeApiRequest::Disputes` message from the Runtime. The
|
||||
/// user check the Runtime version before calling `select_disputes`. If the function is used with
|
||||
/// old runtime an error is logged and the logic will continue with empty onchain votes `HashMap`.
|
||||
pub async fn select_disputes<Sender>(
|
||||
sender: &mut Sender,
|
||||
metrics: &metrics::Metrics,
|
||||
@@ -110,7 +112,8 @@ where
|
||||
r
|
||||
},
|
||||
Err(GetOnchainDisputesError::NotSupported(runtime_api_err, relay_parent)) => {
|
||||
// Runtime version is checked before calling this method, so the error below should never happen!
|
||||
// Runtime version is checked before calling this method, so the error below should
|
||||
// never happen!
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
?runtime_api_err,
|
||||
@@ -152,7 +155,8 @@ where
|
||||
gum::trace!(target: LOG_TARGET, ?leaf, "Filtering recent disputes");
|
||||
|
||||
// Filter out unconfirmed disputes. However if the dispute is already onchain - don't skip it.
|
||||
// In this case we'd better push as much fresh votes as possible to bring it to conclusion faster.
|
||||
// In this case we'd better push as much fresh votes as possible to bring it to conclusion
|
||||
// faster.
|
||||
let recent_disputes = recent_disputes
|
||||
.into_iter()
|
||||
.filter(|d| d.2.is_confirmed_concluded() || onchain.contains_key(&(d.0, d.1)))
|
||||
@@ -178,9 +182,9 @@ where
|
||||
make_multi_dispute_statement_set(metrics, result)
|
||||
}
|
||||
|
||||
/// Selects dispute votes from `PartitionedDisputes` which should be sent to the runtime. Votes which
|
||||
/// are already onchain are filtered out. Result should be sorted by `(SessionIndex, CandidateHash)`
|
||||
/// which is enforced by the `BTreeMap`. This is a requirement from the runtime.
|
||||
/// Selects dispute votes from `PartitionedDisputes` which should be sent to the runtime. Votes
|
||||
/// which are already onchain are filtered out. Result should be sorted by `(SessionIndex,
|
||||
/// CandidateHash)` which is enforced by the `BTreeMap`. This is a requirement from the runtime.
|
||||
async fn vote_selection<Sender>(
|
||||
sender: &mut Sender,
|
||||
partitioned: PartitionedDisputes,
|
||||
@@ -237,9 +241,9 @@ where
|
||||
for (session_index, candidate_hash, selected_votes) in votes {
|
||||
let votes_len = selected_votes.valid.raw().len() + selected_votes.invalid.len();
|
||||
if votes_len + total_votes_len > MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME {
|
||||
// we are done - no more votes can be added. Importantly, we don't add any votes for a dispute here
|
||||
// if we can't fit them all. This gives us an important invariant, that backing votes for
|
||||
// disputes make it into the provisioned vote set.
|
||||
// we are done - no more votes can be added. Importantly, we don't add any votes for
|
||||
// a dispute here if we can't fit them all. This gives us an important invariant,
|
||||
// that backing votes for disputes make it into the provisioned vote set.
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
?request_votes_counter,
|
||||
@@ -483,7 +487,8 @@ fn make_multi_dispute_statement_set(
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Gets the on-chain disputes at a given block number and returns them as a `HashMap` so that searching in them is cheap.
|
||||
/// Gets the on-chain disputes at a given block number and returns them as a `HashMap` so that
|
||||
/// searching in them is cheap.
|
||||
pub async fn get_onchain_disputes<Sender>(
|
||||
sender: &mut Sender,
|
||||
relay_parent: Hash,
|
||||
|
||||
@@ -237,21 +237,22 @@ fn partitioning_happy_case() {
|
||||
);
|
||||
}
|
||||
|
||||
// This test verifies the double voting behavior. Currently we don't care if a supermajority is achieved with or
|
||||
// without the 'help' of a double vote (a validator voting for and against at the same time). This makes the test
|
||||
// a bit pointless but anyway I'm leaving it here to make this decision explicit and have the test code ready in
|
||||
// case this behavior needs to be further tested in the future.
|
||||
// Link to the PR with the discussions: https://github.com/paritytech/polkadot/pull/5567
|
||||
// This test verifies the double voting behavior. Currently we don't care if a supermajority is
|
||||
// achieved with or without the 'help' of a double vote (a validator voting for and against at the
|
||||
// same time). This makes the test a bit pointless but anyway I'm leaving it here to make this
|
||||
// decision explicit and have the test code ready in case this behavior needs to be further tested
|
||||
// in the future. Link to the PR with the discussions: https://github.com/paritytech/polkadot/pull/5567
|
||||
#[test]
|
||||
fn partitioning_doubled_onchain_vote() {
|
||||
let mut input = Vec::<(SessionIndex, CandidateHash, DisputeStatus)>::new();
|
||||
let mut onchain = HashMap::<(u32, CandidateHash), DisputeState>::new();
|
||||
|
||||
// Dispute A relies on a 'double onchain vote' to conclude. Validator with index 0 has voted both `for` and `against`.
|
||||
// Despite that this dispute should be considered 'can conclude onchain'.
|
||||
// Dispute A relies on a 'double onchain vote' to conclude. Validator with index 0 has voted
|
||||
// both `for` and `against`. Despite that this dispute should be considered 'can conclude
|
||||
// onchain'.
|
||||
let dispute_a = (3, CandidateHash(Hash::random()), DisputeStatus::Active);
|
||||
// Dispute B has supermajority + 1 votes, so the doubled onchain vote doesn't affect it. It should be considered
|
||||
// as 'can conclude onchain'.
|
||||
// Dispute B has supermajority + 1 votes, so the doubled onchain vote doesn't affect it. It
|
||||
// should be considered as 'can conclude onchain'.
|
||||
let dispute_b = (4, CandidateHash(Hash::random()), DisputeStatus::Active);
|
||||
input.push(dispute_a.clone());
|
||||
input.push(dispute_b.clone());
|
||||
|
||||
@@ -81,7 +81,8 @@ pub enum Error {
|
||||
OverseerExited(SubsystemError),
|
||||
}
|
||||
|
||||
/// Used by `get_onchain_disputes` to represent errors related to fetching on-chain disputes from the Runtime
|
||||
/// Used by `get_onchain_disputes` to represent errors related to fetching on-chain disputes from
|
||||
/// the Runtime
|
||||
#[allow(dead_code)] // Remove when promoting to stable
|
||||
#[fatality::fatality]
|
||||
pub enum GetOnchainDisputesError {
|
||||
|
||||
@@ -466,11 +466,11 @@ async fn send_inherent_data(
|
||||
/// - not more than one per validator
|
||||
/// - each 1 bit must correspond to an occupied core
|
||||
///
|
||||
/// If we have too many, an arbitrary selection policy is fine. For purposes of maximizing availability,
|
||||
/// we pick the one with the greatest number of 1 bits.
|
||||
/// If we have too many, an arbitrary selection policy is fine. For purposes of maximizing
|
||||
/// availability, we pick the one with the greatest number of 1 bits.
|
||||
///
|
||||
/// Note: This does not enforce any sorting precondition on the output; the ordering there will be unrelated
|
||||
/// to the sorting of the input.
|
||||
/// Note: This does not enforce any sorting precondition on the output; the ordering there will be
|
||||
/// unrelated to the sorting of the input.
|
||||
fn select_availability_bitfields(
|
||||
cores: &[CoreState],
|
||||
bitfields: &[SignedAvailabilityBitfield],
|
||||
@@ -532,7 +532,8 @@ fn select_availability_bitfields(
|
||||
selected.into_values().collect()
|
||||
}
|
||||
|
||||
/// Determine which cores are free, and then to the degree possible, pick a candidate appropriate to each free core.
|
||||
/// Determine which cores are free, and then to the degree possible, pick a candidate appropriate to
|
||||
/// each free core.
|
||||
async fn select_candidates(
|
||||
availability_cores: &[CoreState],
|
||||
bitfields: &[SignedAvailabilityBitfield],
|
||||
@@ -593,7 +594,8 @@ async fn select_candidates(
|
||||
|
||||
let computed_validation_data_hash = validation_data.hash();
|
||||
|
||||
// we arbitrarily pick the first of the backed candidates which match the appropriate selection criteria
|
||||
// we arbitrarily pick the first of the backed candidates which match the appropriate
|
||||
// selection criteria
|
||||
if let Some(candidate) = candidates.iter().find(|backed_candidate| {
|
||||
let descriptor = &backed_candidate.descriptor;
|
||||
descriptor.para_id == scheduled_core.para_id &&
|
||||
@@ -628,12 +630,12 @@ async fn select_candidates(
|
||||
gum::trace!(target: LOG_TARGET, leaf_hash=?relay_parent,
|
||||
"Got {} backed candidates", candidates.len());
|
||||
|
||||
// `selected_candidates` is generated in ascending order by core index, and `GetBackedCandidates`
|
||||
// _should_ preserve that property, but let's just make sure.
|
||||
// `selected_candidates` is generated in ascending order by core index, and
|
||||
// `GetBackedCandidates` _should_ preserve that property, but let's just make sure.
|
||||
//
|
||||
// We can't easily map from `BackedCandidate` to `core_idx`, but we know that every selected candidate
|
||||
// maps to either 0 or 1 backed candidate, and the hashes correspond. Therefore, by checking them
|
||||
// in order, we can ensure that the backed candidates are also in order.
|
||||
// We can't easily map from `BackedCandidate` to `core_idx`, but we know that every selected
|
||||
// candidate maps to either 0 or 1 backed candidate, and the hashes correspond. Therefore, by
|
||||
// checking them in order, we can ensure that the backed candidates are also in order.
|
||||
let mut backed_idx = 0;
|
||||
for selected in selected_candidates {
|
||||
if selected ==
|
||||
@@ -705,8 +707,9 @@ fn bitfields_indicate_availability(
|
||||
let validator_idx = bitfield.validator_index().0 as usize;
|
||||
match availability.get_mut(validator_idx) {
|
||||
None => {
|
||||
// in principle, this function might return a `Result<bool, Error>` so that we can more clearly express this error condition
|
||||
// however, in practice, that would just push off an error-handling routine which would look a whole lot like this one.
|
||||
// in principle, this function might return a `Result<bool, Error>` so that we can
|
||||
// more clearly express this error condition however, in practice, that would just
|
||||
// push off an error-handling routine which would look a whole lot like this one.
|
||||
// simpler to just handle the error internally here.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
@@ -726,8 +729,8 @@ fn bitfields_indicate_availability(
|
||||
3 * availability.count_ones() >= 2 * availability.len()
|
||||
}
|
||||
|
||||
// If we have to be absolutely precise here, this method gets the version of the `ParachainHost` api.
|
||||
// For brevity we'll just call it 'runtime version'.
|
||||
// If we have to be absolutely precise here, this method gets the version of the `ParachainHost`
|
||||
// api. For brevity we'll just call it 'runtime version'.
|
||||
async fn has_required_runtime(
|
||||
sender: &mut impl overseer::ProvisionerSenderTrait,
|
||||
relay_parent: Hash,
|
||||
|
||||
@@ -28,9 +28,10 @@ struct MetricsInner {
|
||||
/// Bitfields array length in `ProvisionerInherentData` (the result for `RequestInherentData`)
|
||||
inherent_data_response_bitfields: prometheus::Histogram,
|
||||
|
||||
/// The following metrics track how many disputes/votes the runtime will have to process. These will count
|
||||
/// all recent statements meaning every dispute from last sessions: 10 min on Rococo, 60 min on Kusama and
|
||||
/// 4 hours on Polkadot. The metrics are updated only when the node authors a block, so values vary across nodes.
|
||||
/// The following metrics track how many disputes/votes the runtime will have to process. These
|
||||
/// will count all recent statements meaning every dispute from last sessions: 10 min on
|
||||
/// Rococo, 60 min on Kusama and 4 hours on Polkadot. The metrics are updated only when the
|
||||
/// node authors a block, so values vary across nodes.
|
||||
inherent_data_dispute_statement_sets: prometheus::Counter<prometheus::U64>,
|
||||
inherent_data_dispute_statements: prometheus::CounterVec<prometheus::U64>,
|
||||
|
||||
|
||||
@@ -90,7 +90,8 @@ mod select_availability_bitfields {
|
||||
let cores = vec![occupied_core(0), occupied_core(1)];
|
||||
|
||||
// we pass in three bitfields with two validators
|
||||
// this helps us check the postcondition that we get two bitfields back, for which the validators differ
|
||||
// this helps us check the postcondition that we get two bitfields back, for which the
|
||||
// validators differ
|
||||
let bitfields = vec![
|
||||
signed_bitfield(&keystore, bitvec.clone(), ValidatorIndex(0)),
|
||||
signed_bitfield(&keystore, bitvec.clone(), ValidatorIndex(1)),
|
||||
|
||||
@@ -110,8 +110,8 @@ struct State {
|
||||
///
|
||||
/// Here are some fun facts about these futures:
|
||||
///
|
||||
/// - Pre-checking can take quite some time, in the matter of tens of seconds, so the futures here
|
||||
/// can soak for quite some time.
|
||||
/// - Pre-checking can take quite some time, in the matter of tens of seconds, so the futures
|
||||
/// here can soak for quite some time.
|
||||
/// - Pre-checking of one PVF can take drastically more time than pre-checking of another PVF.
|
||||
/// This leads to results coming out of order.
|
||||
///
|
||||
|
||||
@@ -110,8 +110,8 @@ impl TestState {
|
||||
Self { leaves, sessions, last_session_index }
|
||||
}
|
||||
|
||||
/// A convenience function to receive a message from the overseer and returning `None` if nothing
|
||||
/// was received within a reasonable (for local tests anyway) timeout.
|
||||
/// A convenience function to receive a message from the overseer and returning `None` if
|
||||
/// nothing was received within a reasonable (for local tests anyway) timeout.
|
||||
async fn recv_timeout(&mut self, handle: &mut VirtualOverseer) -> Option<AllMessages> {
|
||||
futures::select! {
|
||||
msg = handle.recv().fuse() => {
|
||||
|
||||
@@ -18,8 +18,8 @@ use crate::prepare::PrepareStats;
|
||||
use parity_scale_codec::{Decode, Encode};
|
||||
use std::fmt;
|
||||
|
||||
/// Result of PVF preparation performed by the validation host. Contains stats about the preparation if
|
||||
/// successful
|
||||
/// Result of PVF preparation performed by the validation host. Contains stats about the preparation
|
||||
/// if successful
|
||||
pub type PrepareResult = Result<PrepareStats, PrepareError>;
|
||||
|
||||
/// An error that occurred during the prepare part of the PVF pipeline.
|
||||
@@ -35,13 +35,15 @@ pub enum PrepareError {
|
||||
Panic(String),
|
||||
/// Failed to prepare the PVF due to the time limit.
|
||||
TimedOut,
|
||||
/// An IO error occurred. This state is reported by either the validation host or by the worker.
|
||||
/// An IO error occurred. This state is reported by either the validation host or by the
|
||||
/// worker.
|
||||
IoErr(String),
|
||||
/// The temporary file for the artifact could not be created at the given cache path. This state is reported by the
|
||||
/// validation host (not by the worker).
|
||||
/// The temporary file for the artifact could not be created at the given cache path. This
|
||||
/// state is reported by the validation host (not by the worker).
|
||||
CreateTmpFileErr(String),
|
||||
/// The response from the worker is received, but the file cannot be renamed (moved) to the final destination
|
||||
/// location. This state is reported by the validation host (not by the worker).
|
||||
/// The response from the worker is received, but the file cannot be renamed (moved) to the
|
||||
/// final destination location. This state is reported by the validation host (not by the
|
||||
/// worker).
|
||||
RenameTmpFileErr(String),
|
||||
}
|
||||
|
||||
@@ -81,15 +83,16 @@ impl fmt::Display for PrepareError {
|
||||
|
||||
/// Some internal error occurred.
|
||||
///
|
||||
/// Should only ever be used for validation errors independent of the candidate and PVF, or for errors we ruled out
|
||||
/// during pre-checking (so preparation errors are fine).
|
||||
/// Should only ever be used for validation errors independent of the candidate and PVF, or for
|
||||
/// errors we ruled out during pre-checking (so preparation errors are fine).
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub enum InternalValidationError {
|
||||
/// Some communication error occurred with the host.
|
||||
HostCommunication(String),
|
||||
/// Could not find or open compiled artifact file.
|
||||
CouldNotOpenFile(String),
|
||||
/// An error occurred in the CPU time monitor thread. Should be totally unrelated to validation.
|
||||
/// An error occurred in the CPU time monitor thread. Should be totally unrelated to
|
||||
/// validation.
|
||||
CpuTimeMonitorThread(String),
|
||||
/// Some non-deterministic preparation error occurred.
|
||||
NonDeterministicPrepareError(PrepareError),
|
||||
|
||||
@@ -35,10 +35,10 @@ use std::any::{Any, TypeId};
|
||||
// left for the stack; this is, of course, overridable at link time when compiling the runtime)
|
||||
// plus the number of pages specified in the `extra_heap_pages` passed to the executor.
|
||||
//
|
||||
// By default, rustc (or `lld` specifically) should allocate 1 MiB for the shadow stack, or 16 pages.
|
||||
// The data section for runtimes are typically rather small and can fit in a single digit number of
|
||||
// WASM pages, so let's say an extra 16 pages. Thus let's assume that 32 pages or 2 MiB are used for
|
||||
// these needs by default.
|
||||
// By default, rustc (or `lld` specifically) should allocate 1 MiB for the shadow stack, or 16
|
||||
// pages. The data section for runtimes are typically rather small and can fit in a single digit
|
||||
// number of WASM pages, so let's say an extra 16 pages. Thus let's assume that 32 pages or 2 MiB
|
||||
// are used for these needs by default.
|
||||
const DEFAULT_HEAP_PAGES_ESTIMATE: u32 = 32;
|
||||
const EXTRA_HEAP_PAGES: u32 = 2048;
|
||||
|
||||
@@ -65,9 +65,9 @@ pub const DEFAULT_CONFIG: Config = Config {
|
||||
//
|
||||
// Here is how the values below were chosen.
|
||||
//
|
||||
// At the moment of writing, the default native stack size limit is 1 MiB. Assuming a logical item
|
||||
// (see the docs about the field and the instrumentation algorithm) is 8 bytes, 1 MiB can
|
||||
// fit 2x 65536 logical items.
|
||||
// At the moment of writing, the default native stack size limit is 1 MiB. Assuming a
|
||||
// logical item (see the docs about the field and the instrumentation algorithm) is 8 bytes,
|
||||
// 1 MiB can fit 2x 65536 logical items.
|
||||
//
|
||||
// Since reaching the native stack limit is undesirable, we halve the logical item limit and
|
||||
// also increase the native 256x. This hopefully should preclude wasm code from reaching
|
||||
@@ -113,7 +113,7 @@ pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result<Semantics, S
|
||||
ExecutorParam::WasmExtBulkMemory => sem.wasm_bulk_memory = true,
|
||||
// TODO: Not implemented yet; <https://github.com/paritytech/polkadot/issues/6472>.
|
||||
ExecutorParam::PrecheckingMaxMemory(_) => (),
|
||||
ExecutorParam::PvfPrepTimeout(_, _) | ExecutorParam::PvfExecTimeout(_, _) => (), // Not used here
|
||||
ExecutorParam::PvfPrepTimeout(_, _) | ExecutorParam::PvfExecTimeout(_, _) => (), /* Not used here */
|
||||
}
|
||||
}
|
||||
sem.deterministic_stack_limit = Some(stack_limit);
|
||||
@@ -135,8 +135,8 @@ impl Executor {
|
||||
Ok(Self { config })
|
||||
}
|
||||
|
||||
/// Executes the given PVF in the form of a compiled artifact and returns the result of execution
|
||||
/// upon success.
|
||||
/// Executes the given PVF in the form of a compiled artifact and returns the result of
|
||||
/// execution upon success.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
|
||||
@@ -251,9 +251,9 @@ pub mod thread {
|
||||
Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new()))
|
||||
}
|
||||
|
||||
/// Runs a worker thread. Will first enable security features, and afterwards notify the threads waiting on the
|
||||
/// condvar. Catches panics during execution and resumes the panics after triggering the condvar, so that the
|
||||
/// waiting thread is notified on panics.
|
||||
/// Runs a worker thread. Will first enable security features, and afterwards notify the threads
|
||||
/// waiting on the condvar. Catches panics during execution and resumes the panics after
|
||||
/// triggering the condvar, so that the waiting thread is notified on panics.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
|
||||
@@ -239,7 +239,8 @@ pub fn worker_entrypoint(
|
||||
WaitOutcome::TimedOut => {
|
||||
match cpu_time_monitor_thread.join() {
|
||||
Ok(Some(cpu_time_elapsed)) => {
|
||||
// Log if we exceed the timeout and the other thread hasn't finished.
|
||||
// Log if we exceed the timeout and the other thread hasn't
|
||||
// finished.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
|
||||
@@ -190,8 +190,9 @@ pub fn worker_entrypoint(
|
||||
|
||||
// If we are pre-checking, check for runtime construction errors.
|
||||
//
|
||||
// As pre-checking is more strict than just preparation in terms of memory and
|
||||
// time, it is okay to do extra checks here. This takes negligible time anyway.
|
||||
// As pre-checking is more strict than just preparation in terms of memory
|
||||
// and time, it is okay to do extra checks here. This takes negligible time
|
||||
// anyway.
|
||||
if let PrepareJobKind::Prechecking = prepare_job_kind {
|
||||
result = result.and_then(|output| {
|
||||
runtime_construction_check(output.0.as_ref(), executor_params)?;
|
||||
@@ -253,10 +254,11 @@ pub fn worker_entrypoint(
|
||||
|
||||
// Write the serialized artifact into a temp file.
|
||||
//
|
||||
// PVF host only keeps artifacts statuses in its memory, successfully
|
||||
// compiled code gets stored on the disk (and consequently deserialized
|
||||
// by execute-workers). The prepare worker is only required to send `Ok`
|
||||
// to the pool to indicate the success.
|
||||
// PVF host only keeps artifacts statuses in its memory,
|
||||
// successfully compiled code gets stored on the disk (and
|
||||
// consequently deserialized by execute-workers). The prepare worker
|
||||
// is only required to send `Ok` to the pool to indicate the
|
||||
// success.
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
@@ -275,7 +277,8 @@ pub fn worker_entrypoint(
|
||||
WaitOutcome::TimedOut => {
|
||||
match cpu_time_monitor_thread.join() {
|
||||
Ok(Some(cpu_time_elapsed)) => {
|
||||
// Log if we exceed the timeout and the other thread hasn't finished.
|
||||
// Log if we exceed the timeout and the other thread hasn't
|
||||
// finished.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
|
||||
@@ -83,8 +83,8 @@ pub mod memory_tracker {
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// For simplicity, any errors are returned as a string. As this is not a critical component, errors
|
||||
/// are used for informational purposes (logging) only.
|
||||
/// For simplicity, any errors are returned as a string. As this is not a critical component,
|
||||
/// errors are used for informational purposes (logging) only.
|
||||
pub fn memory_tracker_loop(condvar: thread::Cond) -> Result<MemoryAllocationStats, String> {
|
||||
// NOTE: This doesn't need to be too fine-grained since preparation currently takes 3-10s or
|
||||
// more. Apart from that, there is not really a science to this number.
|
||||
|
||||
@@ -224,7 +224,8 @@ impl Artifacts {
|
||||
.is_none());
|
||||
}
|
||||
|
||||
/// Remove and retrieve the artifacts from the table that are older than the supplied Time-To-Live.
|
||||
/// Remove and retrieve the artifacts from the table that are older than the supplied
|
||||
/// Time-To-Live.
|
||||
pub fn prune(&mut self, artifact_ttl: Duration) -> Vec<ArtifactId> {
|
||||
let now = SystemTime::now();
|
||||
|
||||
|
||||
@@ -38,29 +38,30 @@ pub enum InvalidCandidate {
|
||||
/// The worker has died during validation of a candidate. That may fall in one of the following
|
||||
/// categories, which we cannot distinguish programmatically:
|
||||
///
|
||||
/// (a) Some sort of transient glitch caused the worker process to abort. An example would be that
|
||||
/// the host machine ran out of free memory and the OOM killer started killing the processes,
|
||||
/// and in order to save the parent it will "sacrifice child" first.
|
||||
/// (a) Some sort of transient glitch caused the worker process to abort. An example would be
|
||||
/// that the host machine ran out of free memory and the OOM killer started killing the
|
||||
/// processes, and in order to save the parent it will "sacrifice child" first.
|
||||
///
|
||||
/// (b) The candidate triggered a code path that has lead to the process death. For example,
|
||||
/// the PVF found a way to consume unbounded amount of resources and then it either exceeded
|
||||
/// an `rlimit` (if set) or, again, invited OOM killer. Another possibility is a bug in
|
||||
/// wasmtime allowed the PVF to gain control over the execution worker.
|
||||
/// the PVF found a way to consume unbounded amount of resources and then it either
|
||||
/// exceeded an `rlimit` (if set) or, again, invited OOM killer. Another possibility is a
|
||||
/// bug in wasmtime allowed the PVF to gain control over the execution worker.
|
||||
///
|
||||
/// We attribute such an event to an *invalid candidate* in either case.
|
||||
///
|
||||
/// The rationale for this is that a glitch may lead to unfair rejecting candidate by a single
|
||||
/// validator. If the glitch is somewhat more persistent the validator will reject all candidate
|
||||
/// thrown at it and hopefully the operator notices it by decreased reward performance of the
|
||||
/// validator. On the other hand, if the worker died because of (b) we would have better chances
|
||||
/// to stop the attack.
|
||||
/// validator. If the glitch is somewhat more persistent the validator will reject all
|
||||
/// candidate thrown at it and hopefully the operator notices it by decreased reward
|
||||
/// performance of the validator. On the other hand, if the worker died because of (b) we would
|
||||
/// have better chances to stop the attack.
|
||||
AmbiguousWorkerDeath,
|
||||
/// PVF execution (compilation is not included) took more time than was allotted.
|
||||
HardTimeout,
|
||||
/// A panic occurred and we can't be sure whether the candidate is really invalid or some internal glitch occurred.
|
||||
/// Whenever we are unsure, we can never treat an error as internal as we would abstain from voting. This is bad
|
||||
/// because if the issue was due to the candidate, then all validators would abstain, stalling finality on the
|
||||
/// chain. So we will first retry the candidate, and if the issue persists we are forced to vote invalid.
|
||||
/// A panic occurred and we can't be sure whether the candidate is really invalid or some
|
||||
/// internal glitch occurred. Whenever we are unsure, we can never treat an error as internal
|
||||
/// as we would abstain from voting. This is bad because if the issue was due to the candidate,
|
||||
/// then all validators would abstain, stalling finality on the chain. So we will first retry
|
||||
/// the candidate, and if the issue persists we are forced to vote invalid.
|
||||
Panic(String),
|
||||
}
|
||||
|
||||
|
||||
@@ -419,7 +419,8 @@ fn spawn_extra_worker(queue: &mut Queue, job: ExecuteJob) {
|
||||
/// beforehand. In such a way, a race condition is avoided: during the worker being spawned,
|
||||
/// another job in the queue, with an incompatible execution environment, may become stale, and
|
||||
/// the queue would have to kill a newly started worker and spawn another one.
|
||||
/// Nevertheless, if the worker finishes executing the job, it becomes idle and may be used to execute other jobs with a compatible execution environment.
|
||||
/// Nevertheless, if the worker finishes executing the job, it becomes idle and may be used to
|
||||
/// execute other jobs with a compatible execution environment.
|
||||
async fn spawn_worker_task(
|
||||
program_path: PathBuf,
|
||||
job: ExecuteJob,
|
||||
|
||||
@@ -74,8 +74,9 @@ pub enum Outcome {
|
||||
/// PVF execution completed successfully and the result is returned. The worker is ready for
|
||||
/// another job.
|
||||
Ok { result_descriptor: ValidationResult, duration: Duration, idle_worker: IdleWorker },
|
||||
/// The candidate validation failed. It may be for example because the wasm execution triggered a trap.
|
||||
/// Errors related to the preparation process are not expected to be encountered by the execution workers.
|
||||
/// The candidate validation failed. It may be for example because the wasm execution triggered
|
||||
/// a trap. Errors related to the preparation process are not expected to be encountered by the
|
||||
/// execution workers.
|
||||
InvalidCandidate { err: String, idle_worker: IdleWorker },
|
||||
/// An internal error happened during the validation. Such an error is most likely related to
|
||||
/// some transient glitch.
|
||||
@@ -95,7 +96,8 @@ pub enum Outcome {
|
||||
/// Given the idle token of a worker and parameters of work, communicates with the worker and
|
||||
/// returns the outcome.
|
||||
///
|
||||
/// NOTE: Not returning the idle worker token in `Outcome` will trigger the child process being killed.
|
||||
/// NOTE: Not returning the idle worker token in `Outcome` will trigger the child process being
|
||||
/// killed.
|
||||
pub async fn start_work(
|
||||
worker: IdleWorker,
|
||||
artifact: ArtifactPathId,
|
||||
|
||||
@@ -455,8 +455,8 @@ async fn handle_precheck_pvf(
|
||||
ArtifactState::Preparing { waiting_for_response, num_failures: _ } =>
|
||||
waiting_for_response.push(result_sender),
|
||||
ArtifactState::FailedToProcess { error, .. } => {
|
||||
// Do not retry failed preparation if another pre-check request comes in. We do not retry pre-checking,
|
||||
// anyway.
|
||||
// Do not retry failed preparation if another pre-check request comes in. We do not
|
||||
// retry pre-checking, anyway.
|
||||
let _ = result_sender.send(PrepareResult::Err(error.clone()));
|
||||
},
|
||||
}
|
||||
@@ -470,8 +470,8 @@ async fn handle_precheck_pvf(
|
||||
|
||||
/// Handles PVF execution.
|
||||
///
|
||||
/// This will try to prepare the PVF, if a prepared artifact does not already exist. If there is already a
|
||||
/// preparation job, we coalesce the two preparation jobs.
|
||||
/// This will try to prepare the PVF, if a prepared artifact does not already exist. If there is
|
||||
/// already a preparation job, we coalesce the two preparation jobs.
|
||||
///
|
||||
/// If the prepare job succeeded previously, we will enqueue an execute job right away.
|
||||
///
|
||||
@@ -521,7 +521,8 @@ async fn handle_execute_pvf(
|
||||
"handle_execute_pvf: Re-queuing PVF preparation for prepared artifact with missing file."
|
||||
);
|
||||
|
||||
// The artifact has been prepared previously but the file is missing, prepare it again.
|
||||
// The artifact has been prepared previously but the file is missing, prepare it
|
||||
// again.
|
||||
*state = ArtifactState::Preparing {
|
||||
waiting_for_response: Vec::new(),
|
||||
num_failures: 0,
|
||||
@@ -721,8 +722,8 @@ async fn handle_prepare_done(
|
||||
pending_requests
|
||||
{
|
||||
if result_tx.is_canceled() {
|
||||
// Preparation could've taken quite a bit of time and the requester may be not interested
|
||||
// in execution anymore, in which case we just skip the request.
|
||||
// Preparation could've taken quite a bit of time and the requester may be not
|
||||
// interested in execution anymore, in which case we just skip the request.
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -855,8 +856,8 @@ fn can_retry_prepare_after_failure(
|
||||
return false
|
||||
}
|
||||
|
||||
// Retry if the retry cooldown has elapsed and if we have already retried less than `NUM_PREPARE_RETRIES` times. IO
|
||||
// errors may resolve themselves.
|
||||
// Retry if the retry cooldown has elapsed and if we have already retried less than
|
||||
// `NUM_PREPARE_RETRIES` times. IO errors may resolve themselves.
|
||||
SystemTime::now() >= last_time_failed + PREPARE_FAILURE_COOLDOWN &&
|
||||
num_failures <= NUM_PREPARE_RETRIES
|
||||
}
|
||||
|
||||
@@ -32,26 +32,26 @@
|
||||
//! (a) PVF pre-checking. This takes the `Pvf` code and tries to prepare it (verify and
|
||||
//! compile) in order to pre-check its validity.
|
||||
//!
|
||||
//! (b) PVF execution. This accepts the PVF [`params`][`polkadot_parachain::primitives::ValidationParams`]
|
||||
//! and the `Pvf` code, prepares (verifies and compiles) the code, and then executes PVF
|
||||
//! with the `params`.
|
||||
//! (b) PVF execution. This accepts the PVF
|
||||
//! [`params`][`polkadot_parachain::primitives::ValidationParams`] and the `Pvf` code, prepares
|
||||
//! (verifies and compiles) the code, and then executes PVF with the `params`.
|
||||
//!
|
||||
//! (c) Heads up. This request allows to signal that the given PVF may be needed soon and that it
|
||||
//! should be prepared for execution.
|
||||
//!
|
||||
//! The preparation results are cached for some time after they either used or was signaled in heads up.
|
||||
//! All requests that depends on preparation of the same PVF are bundled together and will be executed
|
||||
//! as soon as the artifact is prepared.
|
||||
//! The preparation results are cached for some time after they either used or was signaled in heads
|
||||
//! up. All requests that depends on preparation of the same PVF are bundled together and will be
|
||||
//! executed as soon as the artifact is prepared.
|
||||
//!
|
||||
//! # Priority
|
||||
//!
|
||||
//! PVF execution requests can specify the [priority][`Priority`] with which the given request should
|
||||
//! be handled. Different priority levels have different effects. This is discussed below.
|
||||
//! PVF execution requests can specify the [priority][`Priority`] with which the given request
|
||||
//! should be handled. Different priority levels have different effects. This is discussed below.
|
||||
//!
|
||||
//! Preparation started by a heads up signal always starts with the background priority. If there
|
||||
//! is already a request for that PVF preparation under way the priority is inherited. If after heads
|
||||
//! up, a new PVF execution request comes in with a higher priority, then the original task's priority
|
||||
//! will be adjusted to match the new one if it's larger.
|
||||
//! is already a request for that PVF preparation under way the priority is inherited. If after
|
||||
//! heads up, a new PVF execution request comes in with a higher priority, then the original task's
|
||||
//! priority will be adjusted to match the new one if it's larger.
|
||||
//!
|
||||
//! Priority can never go down, only up.
|
||||
//!
|
||||
@@ -63,11 +63,11 @@
|
||||
//! dissimilar to actors. Each of such "processes" is a future task that contains an event loop that
|
||||
//! processes incoming messages, potentially delegating sub-tasks to other "processes".
|
||||
//!
|
||||
//! Two of these processes are queues. The first one is for preparation jobs and the second one is for
|
||||
//! execution. Both of the queues are backed by separate pools of workers of different kind.
|
||||
//! Two of these processes are queues. The first one is for preparation jobs and the second one is
|
||||
//! for execution. Both of the queues are backed by separate pools of workers of different kind.
|
||||
//!
|
||||
//! Preparation workers handle preparation requests by prevalidating and instrumenting PVF wasm code,
|
||||
//! and then passing it into the compiler, to prepare the artifact.
|
||||
//! Preparation workers handle preparation requests by prevalidating and instrumenting PVF wasm
|
||||
//! code, and then passing it into the compiler, to prepare the artifact.
|
||||
//!
|
||||
//! ## Artifacts
|
||||
//!
|
||||
|
||||
@@ -85,7 +85,8 @@ impl Metrics {
|
||||
|
||||
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
|
||||
if let Some(tracker_stats) = memory_stats.memory_tracker_stats {
|
||||
// We convert these stats from B to KB to match the unit of `ru_maxrss` from `getrusage`.
|
||||
// We convert these stats from B to KB to match the unit of `ru_maxrss` from
|
||||
// `getrusage`.
|
||||
let max_resident_kb = (tracker_stats.resident / 1024) as f64;
|
||||
let max_allocated_kb = (tracker_stats.allocated / 1024) as f64;
|
||||
|
||||
|
||||
@@ -61,9 +61,9 @@ pub enum ToPool {
|
||||
|
||||
/// Request the given worker to start working on the given code.
|
||||
///
|
||||
/// Once the job either succeeded or failed, a [`FromPool::Concluded`] message will be sent back.
|
||||
/// It's also possible that the worker dies before handling the message in which case [`FromPool::Rip`]
|
||||
/// will be sent back.
|
||||
/// Once the job either succeeded or failed, a [`FromPool::Concluded`] message will be sent
|
||||
/// back. It's also possible that the worker dies before handling the message in which case
|
||||
/// [`FromPool::Rip`] will be sent back.
|
||||
///
|
||||
/// In either case, the worker is considered busy and no further `StartWork` messages should be
|
||||
/// sent until either `Concluded` or `Rip` message is received.
|
||||
@@ -237,8 +237,8 @@ fn handle_to_pool(
|
||||
);
|
||||
} else {
|
||||
// idle token is present after spawn and after a job is concluded;
|
||||
// the precondition for `StartWork` is it should be sent only if all previous work
|
||||
// items concluded;
|
||||
// the precondition for `StartWork` is it should be sent only if all previous
|
||||
// work items concluded;
|
||||
// thus idle token is Some;
|
||||
// qed.
|
||||
never!("unexpected absence of the idle token in prepare pool");
|
||||
@@ -311,7 +311,8 @@ fn handle_mux(
|
||||
match outcome {
|
||||
Outcome::Concluded { worker: idle, result } =>
|
||||
handle_concluded_no_rip(from_pool, spawned, worker, idle, result),
|
||||
// Return `Concluded`, but do not kill the worker since the error was on the host side.
|
||||
// Return `Concluded`, but do not kill the worker since the error was on the host
|
||||
// side.
|
||||
Outcome::CreateTmpFileErr { worker: idle, err } => handle_concluded_no_rip(
|
||||
from_pool,
|
||||
spawned,
|
||||
@@ -319,7 +320,8 @@ fn handle_mux(
|
||||
idle,
|
||||
Err(PrepareError::CreateTmpFileErr(err)),
|
||||
),
|
||||
// Return `Concluded`, but do not kill the worker since the error was on the host side.
|
||||
// Return `Concluded`, but do not kill the worker since the error was on the host
|
||||
// side.
|
||||
Outcome::RenameTmpFileErr { worker: idle, result: _, err } =>
|
||||
handle_concluded_no_rip(
|
||||
from_pool,
|
||||
|
||||
@@ -96,8 +96,9 @@ impl WorkerData {
|
||||
}
|
||||
}
|
||||
|
||||
/// A queue structured like this is prone to starving, however, we don't care that much since we expect
|
||||
/// there is going to be a limited number of critical jobs and we don't really care if background starve.
|
||||
/// A queue structured like this is prone to starving, however, we don't care that much since we
|
||||
/// expect there is going to be a limited number of critical jobs and we don't really care if
|
||||
/// background starve.
|
||||
#[derive(Default)]
|
||||
struct Unscheduled {
|
||||
normal: VecDeque<Job>,
|
||||
|
||||
@@ -247,8 +247,8 @@ where
|
||||
|
||||
let outcome = f(tmp_file.clone(), stream).await;
|
||||
|
||||
// The function called above is expected to move `tmp_file` to a new location upon success. However,
|
||||
// the function may as well fail and in that case we should remove the tmp file here.
|
||||
// The function called above is expected to move `tmp_file` to a new location upon success.
|
||||
// However, the function may as well fail and in that case we should remove the tmp file here.
|
||||
//
|
||||
// In any case, we try to remove the file here so that there are no leftovers. We only report
|
||||
// errors that are different from the `NotFound`.
|
||||
|
||||
@@ -196,13 +196,15 @@ pub enum SpawnErr {
|
||||
Handshake,
|
||||
}
|
||||
|
||||
/// This is a representation of a potentially running worker. Drop it and the process will be killed.
|
||||
/// This is a representation of a potentially running worker. Drop it and the process will be
|
||||
/// killed.
|
||||
///
|
||||
/// A worker's handle is also a future that resolves when it's detected that the worker's process
|
||||
/// has been terminated. Since the worker is running in another process it is obviously not
|
||||
/// necessary to poll this future to make the worker run, it's only for termination detection.
|
||||
///
|
||||
/// This future relies on the fact that a child process's stdout `fd` is closed upon it's termination.
|
||||
/// This future relies on the fact that a child process's stdout `fd` is closed upon it's
|
||||
/// termination.
|
||||
#[pin_project]
|
||||
pub struct WorkerHandle {
|
||||
child: process::Child,
|
||||
@@ -240,15 +242,15 @@ impl WorkerHandle {
|
||||
child_id,
|
||||
stdout,
|
||||
program: program.as_ref().to_path_buf(),
|
||||
// We don't expect the bytes to be ever read. But in case we do, we should not use a buffer
|
||||
// of a small size, because otherwise if the child process does return any data we will end up
|
||||
// issuing a syscall for each byte. We also prefer not to do allocate that on the stack, since
|
||||
// each poll the buffer will be allocated and initialized (and that's due `poll_read` takes &mut [u8]
|
||||
// and there are no guarantees that a `poll_read` won't ever read from there even though that's
|
||||
// unlikely).
|
||||
// We don't expect the bytes to be ever read. But in case we do, we should not use a
|
||||
// buffer of a small size, because otherwise if the child process does return any data
|
||||
// we will end up issuing a syscall for each byte. We also prefer not to do allocate
|
||||
// that on the stack, since each poll the buffer will be allocated and initialized (and
|
||||
// that's due `poll_read` takes &mut [u8] and there are no guarantees that a `poll_read`
|
||||
// won't ever read from there even though that's unlikely).
|
||||
//
|
||||
// OTOH, we also don't want to be super smart here and we could just afford to allocate a buffer
|
||||
// for that here.
|
||||
// OTOH, we also don't want to be super smart here and we could just afford to allocate
|
||||
// a buffer for that here.
|
||||
drop_box: vec![0; 8192].into_boxed_slice(),
|
||||
})
|
||||
}
|
||||
@@ -280,8 +282,8 @@ impl futures::Future for WorkerHandle {
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
// The implementation is guaranteed to not to return `WouldBlock` and Interrupted. This
|
||||
// leaves us with legit errors which we suppose were due to termination.
|
||||
// The implementation is guaranteed to not to return `WouldBlock` and Interrupted.
|
||||
// This leaves us with legit errors which we suppose were due to termination.
|
||||
|
||||
// Log the status code.
|
||||
gum::debug!(
|
||||
|
||||
@@ -321,7 +321,8 @@ where
|
||||
return futures::pending!()
|
||||
}
|
||||
|
||||
// If there are active requests, this will always resolve to `Some(_)` when a request is finished.
|
||||
// If there are active requests, this will always resolve to `Some(_)` when a request is
|
||||
// finished.
|
||||
if let Some(Ok(Some(result))) = self.active_requests.next().await {
|
||||
self.store_cache(result);
|
||||
}
|
||||
@@ -343,10 +344,10 @@ where
|
||||
{
|
||||
loop {
|
||||
// Let's add some back pressure when the subsystem is running at `MAX_PARALLEL_REQUESTS`.
|
||||
// This can never block forever, because `active_requests` is owned by this task and any mutations
|
||||
// happen either in `poll_requests` or `spawn_request` - so if `is_busy` returns true, then
|
||||
// even if all of the requests finish before us calling `poll_requests` the `active_requests` length
|
||||
// remains invariant.
|
||||
// This can never block forever, because `active_requests` is owned by this task and any
|
||||
// mutations happen either in `poll_requests` or `spawn_request` - so if `is_busy` returns
|
||||
// true, then even if all of the requests finish before us calling `poll_requests` the
|
||||
// `active_requests` length remains invariant.
|
||||
if subsystem.is_busy() {
|
||||
// Since we are not using any internal waiting queues, we need to wait for exactly
|
||||
// one request to complete before we can read the next one from the overseer channel.
|
||||
|
||||
@@ -895,7 +895,8 @@ fn multiple_requests_in_parallel_are_working() {
|
||||
receivers.push(rx);
|
||||
}
|
||||
|
||||
// The backpressure from reaching `MAX_PARALLEL_REQUESTS` will make the test block, we need to drop the lock.
|
||||
// The backpressure from reaching `MAX_PARALLEL_REQUESTS` will make the test block, we need
|
||||
// to drop the lock.
|
||||
drop(lock);
|
||||
|
||||
for _ in 0..MAX_PARALLEL_REQUESTS * 100 {
|
||||
|
||||
Reference in New Issue
Block a user