Remove request multiplexer (#3624)

* WIP: Get rid of request multiplexer.

* WIP

* Receiver for handling of incoming requests.

* Get rid of useless `Fault` abstraction.

The things the type system let us do are not worth getting abstracted in
its own type. Instead error handling is going to be merely a pattern.

* Make most things compile again.

* Port availability distribution away from request multiplexer.

* Formatting.

* Port dispute distribution over.

* Fixup statement distribution.

* Handle request directly in collator protocol.

+ Only allow fatal errors at top level.

* Use direct request channel for availability recovery.

* Finally get rid of request multiplexer

Fixes #2842 and paves the way for more back pressure possibilities.

* Fix overseer and statement distribution tests.

* Fix collator protocol and network bridge tests.

* Fix tests in availability recovery.

* Fix availability distribution tests.

* Fix dispute distribution tests.

* Add missing dependency

* Typos.

* Review remarks.

* More remarks.
This commit is contained in:
Robert Klotzner
2021-08-12 13:11:36 +02:00
committed by GitHub
parent ecf71233c3
commit 55154a8d37
51 changed files with 1509 additions and 1746 deletions
@@ -20,28 +20,93 @@ use std::sync::Arc;
use futures::channel::oneshot;
use polkadot_node_network_protocol::request_response::{request::IncomingRequest, v1};
use polkadot_node_network_protocol::{
request_response::{incoming, v1, IncomingRequest, IncomingRequestReceiver},
UnifiedReputationChange as Rep,
};
use polkadot_node_primitives::{AvailableData, ErasureChunk};
use polkadot_primitives::v1::{CandidateHash, ValidatorIndex};
use polkadot_subsystem::{jaeger, messages::AvailabilityStoreMessage, SubsystemContext};
use polkadot_subsystem::{jaeger, messages::AvailabilityStoreMessage, SubsystemSender};
use crate::{
error::{NonFatal, Result},
error::{NonFatal, NonFatalResult, Result},
metrics::{Metrics, FAILED, NOT_FOUND, SUCCEEDED},
LOG_TARGET,
};
const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded.");
/// Receiver task to be forked as a separate task to handle PoV requests.
pub async fn run_pov_receiver<Sender>(
mut sender: Sender,
mut receiver: IncomingRequestReceiver<v1::PoVFetchingRequest>,
metrics: Metrics,
) where
Sender: SubsystemSender,
{
loop {
match receiver.recv(|| vec![COST_INVALID_REQUEST]).await {
Ok(msg) => {
answer_pov_request_log(&mut sender, msg, &metrics).await;
},
Err(incoming::Error::Fatal(f)) => {
tracing::debug!(
target: LOG_TARGET,
error = ?f,
"Shutting down POV receiver."
);
return
},
Err(incoming::Error::NonFatal(error)) => {
tracing::debug!(target: LOG_TARGET, ?error, "Error decoding incoming PoV request.");
},
}
}
}
/// Receiver task to be forked as a separate task to handle chunk requests.
pub async fn run_chunk_receiver<Sender>(
mut sender: Sender,
mut receiver: IncomingRequestReceiver<v1::ChunkFetchingRequest>,
metrics: Metrics,
) where
Sender: SubsystemSender,
{
loop {
match receiver.recv(|| vec![COST_INVALID_REQUEST]).await {
Ok(msg) => {
answer_chunk_request_log(&mut sender, msg, &metrics).await;
},
Err(incoming::Error::Fatal(f)) => {
tracing::debug!(
target: LOG_TARGET,
error = ?f,
"Shutting down chunk receiver."
);
return
},
Err(incoming::Error::NonFatal(error)) => {
tracing::debug!(
target: LOG_TARGET,
?error,
"Error decoding incoming chunk request."
);
},
}
}
}
/// Variant of `answer_pov_request` that does Prometheus metric and logging on errors.
///
/// Any errors of `answer_pov_request` will simply be logged.
pub async fn answer_pov_request_log<Context>(
ctx: &mut Context,
pub async fn answer_pov_request_log<Sender>(
sender: &mut Sender,
req: IncomingRequest<v1::PoVFetchingRequest>,
metrics: &Metrics,
) where
Context: SubsystemContext,
Sender: SubsystemSender,
{
let res = answer_pov_request(ctx, req).await;
let res = answer_pov_request(sender, req).await;
match res {
Ok(result) => metrics.on_served_pov(if result { SUCCEEDED } else { NOT_FOUND }),
Err(err) => {
@@ -58,15 +123,15 @@ pub async fn answer_pov_request_log<Context>(
/// Variant of `answer_chunk_request` that does Prometheus metric and logging on errors.
///
/// Any errors of `answer_request` will simply be logged.
pub async fn answer_chunk_request_log<Context>(
ctx: &mut Context,
pub async fn answer_chunk_request_log<Sender>(
sender: &mut Sender,
req: IncomingRequest<v1::ChunkFetchingRequest>,
metrics: &Metrics,
) -> ()
where
Context: SubsystemContext,
Sender: SubsystemSender,
{
let res = answer_chunk_request(ctx, req).await;
let res = answer_chunk_request(sender, req).await;
match res {
Ok(result) => metrics.on_served_chunk(if result { SUCCEEDED } else { NOT_FOUND }),
Err(err) => {
@@ -83,16 +148,16 @@ where
/// Answer an incoming PoV fetch request by querying the av store.
///
/// Returns: `Ok(true)` if chunk was found and served.
pub async fn answer_pov_request<Context>(
ctx: &mut Context,
pub async fn answer_pov_request<Sender>(
sender: &mut Sender,
req: IncomingRequest<v1::PoVFetchingRequest>,
) -> Result<bool>
where
Context: SubsystemContext,
Sender: SubsystemSender,
{
let _span = jaeger::Span::new(req.payload.candidate_hash, "answer-pov-request");
let av_data = query_available_data(ctx, req.payload.candidate_hash).await?;
let av_data = query_available_data(sender, req.payload.candidate_hash).await?;
let result = av_data.is_some();
@@ -111,18 +176,18 @@ where
/// Answer an incoming chunk request by querying the av store.
///
/// Returns: `Ok(true)` if chunk was found and served.
pub async fn answer_chunk_request<Context>(
ctx: &mut Context,
pub async fn answer_chunk_request<Sender>(
sender: &mut Sender,
req: IncomingRequest<v1::ChunkFetchingRequest>,
) -> Result<bool>
where
Context: SubsystemContext,
Sender: SubsystemSender,
{
let span = jaeger::Span::new(req.payload.candidate_hash, "answer-chunk-request");
let _child_span = span.child("answer-chunk-request").with_chunk_index(req.payload.index.0);
let chunk = query_chunk(ctx, req.payload.candidate_hash, req.payload.index).await?;
let chunk = query_chunk(sender, req.payload.candidate_hash, req.payload.index).await?;
let result = chunk.is_some();
@@ -145,16 +210,19 @@ where
}
/// Query chunk from the availability store.
async fn query_chunk<Context>(
ctx: &mut Context,
async fn query_chunk<Sender>(
sender: &mut Sender,
candidate_hash: CandidateHash,
validator_index: ValidatorIndex,
) -> Result<Option<ErasureChunk>>
) -> NonFatalResult<Option<ErasureChunk>>
where
Context: SubsystemContext,
Sender: SubsystemSender,
{
let (tx, rx) = oneshot::channel();
ctx.send_message(AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx))
sender
.send_message(
AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx).into(),
)
.await;
let result = rx.await.map_err(|e| {
@@ -171,15 +239,16 @@ where
}
/// Query PoV from the availability store.
async fn query_available_data<Context>(
ctx: &mut Context,
async fn query_available_data<Sender>(
sender: &mut Sender,
candidate_hash: CandidateHash,
) -> Result<Option<AvailableData>>
) -> NonFatalResult<Option<AvailableData>>
where
Context: SubsystemContext,
Sender: SubsystemSender,
{
let (tx, rx) = oneshot::channel();
ctx.send_message(AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx))
sender
.send_message(AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx).into())
.await;
let result = rx.await.map_err(|e| NonFatal::QueryAvailableDataResponseChannel(e))?;