collation-generation + collator-protocol: collate on multiple assigned cores (#3795)

This works only for collators that implement the `collator_fn` allowing `collation-generation` subsystem to pull collations triggered on new heads. Also enables `request_v2::CollationFetchingResponse::CollationWithParentHeadData` for test adder/undying collators. TODO: - [x] fix tests - [x] new tests - [x] PR doc --------- Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>
2026-06-15 19:41:05 +00:00 · 2024-03-27 16:44:10 +02:00
parent 25af0adf78
commit 417c54c61c
15 changed files with 556 additions and 140 deletions
@@ -44,8 +44,8 @@ use polkadot_node_subsystem::{
 };
 use polkadot_node_subsystem_util::{
 	has_required_runtime, request_async_backing_params, request_availability_cores,
-	request_claim_queue, request_persisted_validation_data, request_validation_code,
-	request_validation_code_hash, request_validators,
+	request_claim_queue, request_para_backing_state, request_persisted_validation_data,
+	request_validation_code, request_validation_code_hash, request_validators,
 };
 use polkadot_primitives::{
 	collator_signature_payload, CandidateCommitments, CandidateDescriptor, CandidateReceipt,
@@ -212,6 +212,7 @@ async fn handle_new_activations<Context>(
 	if config.collator.is_none() {
 		return Ok(())
 	}
+	let para_id = config.para_id;

 	let _overall_timer = metrics.time_new_activations();

@@ -225,25 +226,23 @@ async fn handle_new_activations<Context>(
 		);

 		let availability_cores = availability_cores??;
-		let n_validators = validators??.len();
 		let async_backing_params = async_backing_params?.ok();
+		let n_validators = validators??.len();
 		let maybe_claim_queue = fetch_claim_queue(ctx.sender(), relay_parent).await?;

-		for (core_idx, core) in availability_cores.into_iter().enumerate() {
-			let _availability_core_timer = metrics.time_new_activations_availability_core();
+		// The loop bellow will fill in cores that the para is allowed to build on.
+		let mut cores_to_build_on = Vec::new();

-			let (scheduled_core, assumption) = match core {
-				CoreState::Scheduled(scheduled_core) =>
-					(scheduled_core, OccupiedCoreAssumption::Free),
+		for (core_idx, core) in availability_cores.into_iter().enumerate() {
+			let scheduled_core = match core {
+				CoreState::Scheduled(scheduled_core) => scheduled_core,
 				CoreState::Occupied(occupied_core) => match async_backing_params {
 					Some(params) if params.max_candidate_depth >= 1 => {
 						// maximum candidate depth when building on top of a block
 						// pending availability is necessarily 1 - the depth of the
 						// pending block is 0 so the child has depth 1.

-						// TODO [now]: this assumes that next up == current.
-						// in practice we should only set `OccupiedCoreAssumption::Included`
-						// when the candidate occupying the core is also of the same para.
+						// Use claim queue if available, or fallback to `next_up_on_available`
 						let res = match maybe_claim_queue {
 							Some(ref claim_queue) => {
 								// read what's in the claim queue for this core
@@ -257,8 +256,7 @@ async fn handle_new_activations<Context>(
 								// `next_up_on_available`
 								occupied_core.next_up_on_available
 							},
-						}
-						.map(|scheduled| (scheduled, OccupiedCoreAssumption::Included));
+						};

 						match res {
 							Some(res) => res,
@@ -279,7 +277,7 @@ async fn handle_new_activations<Context>(
 					gum::trace!(
 						target: LOG_TARGET,
 						core_idx = %core_idx,
-						"core is free. Keep going.",
+						"core is not assigned to any para. Keep going.",
 					);
 					continue
 				},
@@ -294,64 +292,90 @@ async fn handle_new_activations<Context>(
 					their_para = %scheduled_core.para_id,
 					"core is not assigned to our para. Keep going.",
 				);
-				continue
+			} else {
+				// Accumulate cores for building collation(s) outside the loop.
+				cores_to_build_on.push(CoreIndex(core_idx as u32));
 			}
+		}

-			// we get validation data and validation code synchronously for each core instead of
-			// within the subtask loop, because we have only a single mutable handle to the
-			// context, so the work can't really be distributed
+		// Skip to next relay parent if there is no core assigned to us.
+		if cores_to_build_on.is_empty() {
+			continue
+		}

-			let validation_data = match request_persisted_validation_data(
-				relay_parent,
-				scheduled_core.para_id,
-				assumption,
-				ctx.sender(),
-			)
-			.await
-			.await??
-			{
-				Some(v) => v,
-				None => {
-					gum::trace!(
-						target: LOG_TARGET,
-						core_idx = %core_idx,
-						relay_parent = ?relay_parent,
-						our_para = %config.para_id,
-						their_para = %scheduled_core.para_id,
-						"validation data is not available",
-					);
-					continue
-				},
-			};
+		let para_backing_state =
+			request_para_backing_state(relay_parent, config.para_id, ctx.sender())
+				.await
+				.await??
+				.ok_or(crate::error::Error::MissingParaBackingState)?;

-			let validation_code_hash = match obtain_validation_code_hash_with_assumption(
-				relay_parent,
-				scheduled_core.para_id,
-				assumption,
-				ctx.sender(),
-			)
-			.await?
-			{
-				Some(v) => v,
-				None => {
-					gum::trace!(
-						target: LOG_TARGET,
-						core_idx = %core_idx,
-						relay_parent = ?relay_parent,
-						our_para = %config.para_id,
-						their_para = %scheduled_core.para_id,
-						"validation code hash is not found.",
-					);
-					continue
-				},
-			};
+		// We are being very optimistic here, but one of the cores could pend availability some more
+		// block, ore even time out.
+		// For timeout assumption the collator can't really know because it doesn't receive bitfield
+		// gossip.
+		let assumption = if para_backing_state.pending_availability.is_empty() {
+			OccupiedCoreAssumption::Free
+		} else {
+			OccupiedCoreAssumption::Included
+		};

-			let task_config = config.clone();
-			let metrics = metrics.clone();
-			let mut task_sender = ctx.sender().clone();
-			ctx.spawn(
-				"collation-builder",
-				Box::pin(async move {
+		gum::debug!(
+			target: LOG_TARGET,
+			relay_parent = ?relay_parent,
+			our_para = %config.para_id,
+			?assumption,
+			"Occupied core(s) assumption",
+		);
+
+		let mut validation_data = match request_persisted_validation_data(
+			relay_parent,
+			config.para_id,
+			assumption,
+			ctx.sender(),
+		)
+		.await
+		.await??
+		{
+			Some(v) => v,
+			None => {
+				gum::debug!(
+					target: LOG_TARGET,
+					relay_parent = ?relay_parent,
+					our_para = %config.para_id,
+					"validation data is not available",
+				);
+				continue
+			},
+		};
+
+		let validation_code_hash = match obtain_validation_code_hash_with_assumption(
+			relay_parent,
+			config.para_id,
+			assumption,
+			ctx.sender(),
+		)
+		.await?
+		{
+			Some(v) => v,
+			None => {
+				gum::debug!(
+					target: LOG_TARGET,
+					relay_parent = ?relay_parent,
+					our_para = %config.para_id,
+					"validation code hash is not found.",
+				);
+				continue
+			},
+		};
+
+		let task_config = config.clone();
+		let metrics = metrics.clone();
+		let mut task_sender = ctx.sender().clone();
+
+		ctx.spawn(
+			"chained-collation-builder",
+			Box::pin(async move {
+				for core_index in cores_to_build_on {
 					let collator_fn = match task_config.collator.as_ref() {
 						Some(x) => x,
 						None => return,
@@ -363,21 +387,23 @@ async fn handle_new_activations<Context>(
 							None => {
 								gum::debug!(
 									target: LOG_TARGET,
-									para_id = %scheduled_core.para_id,
+									?para_id,
 									"collator returned no collation on collate",
 								);
 								return
 							},
 						};

+					let parent_head = collation.head_data.clone();
 					construct_and_distribute_receipt(
 						PreparedCollation {
 							collation,
-							para_id: scheduled_core.para_id,
+							para_id,
 							relay_parent,
-							validation_data,
+							validation_data: validation_data.clone(),
 							validation_code_hash,
 							n_validators,
+							core_index,
 						},
 						task_config.key.clone(),
 						&mut task_sender,
@@ -385,9 +411,13 @@ async fn handle_new_activations<Context>(
 						&metrics,
 					)
 					.await;
-				}),
-			)?;
-		}
+
+					// Chain the collations. All else stays the same as we build the chained
+					// collation on same relay parent.
+					validation_data.parent_head = parent_head;
+				}
+			}),
+		)?;
 	}

 	Ok(())
@@ -408,6 +438,7 @@ async fn handle_submit_collation<Context>(
 		parent_head,
 		validation_code_hash,
 		result_sender,
+		core_index,
 	} = params;

 	let validators = request_validators(relay_parent, ctx.sender()).await.await??;
@@ -444,6 +475,7 @@ async fn handle_submit_collation<Context>(
 		validation_data,
 		validation_code_hash,
 		n_validators,
+		core_index,
 	};

 	construct_and_distribute_receipt(
@@ -465,6 +497,7 @@ struct PreparedCollation {
 	validation_data: PersistedValidationData,
 	validation_code_hash: ValidationCodeHash,
 	n_validators: usize,
+	core_index: CoreIndex,
 }

 /// Takes a prepared collation, along with its context, and produces a candidate receipt
@@ -483,6 +516,7 @@ async fn construct_and_distribute_receipt(
 		validation_data,
 		validation_code_hash,
 		n_validators,
+		core_index,
 	} = collation;

 	let persisted_validation_data_hash = validation_data.hash();
@@ -578,6 +612,7 @@ async fn construct_and_distribute_receipt(
 			pov,
 			parent_head_data,
 			result_sender,
+			core_index,
 		})
 		.await;
 }