Req/res optimization for statement distribution (#2803)

* Wip

* Increase proposer timeout.

* WIP.

* Better timeout values now that we are going to be connected to all nodes. (#2778)

* Better timeout values.

* Fix typo.

* Fix validator bandwidth.

* Fix compilation.

* Better and more consistent sizes.

Most importantly code size is now 5 Meg, which is the limit we currently
want to support in statement distribution.

* Introduce statement fetching request.

* WIP

* Statement cache retrieval logic.

* Review remarks by @rphmeier

* Fixes.

* Better requester logic.

* WIP: Handle requester messages.

* Missing dep.

* Fix request launching logic.

* Finish fetching logic.

* Sending logic.

* Redo code size calculations.

Now that max code size is compressed size.

* Update Cargo.lock (new dep)

* Get request receiver to statement distribution.

* Expose new functionality for responding to requests.

* Cleanup.

* Responder logic.

* Fixes + Cleanup.

* Cargo.lock

* Whitespace.

* Add lost copyright.

* Launch responder task.

* Typo.

* info -> warn

* Typo.

* Fix.

* Fix.

* Update comment.

* Doc fix.

* Better large statement heuristics.

* Fix tests.

* Fix network bridge tests.

* Add test for size estimate.

* Very simple tests that checks we get LargeStatement.

* Basic check, that fetching of large candidates is performed.

* More tests.

* Basic metrics for responder.

* More metrics.

* Use Encode::encoded_size().

* Some useful spans.

* Get rid of redundant metrics.

* Don't add peer on duplicate.

* Properly check hash

instead of relying on signatures alone.

* Preserve ordering + better flood protection.

* Get rid of redundant clone.

* Don't shutdown responder on failed query.

And add test for this.

* Smaller fixes.

* Quotes.

* Better queue size calculation.

* A bit saner response sizes.

* Fixes.
This commit is contained in:
Robert Klotzner
2021-04-09 23:30:12 +02:00
committed by GitHub
parent 69bd6d8ef2
commit 305375e1e4
19 changed files with 1711 additions and 190 deletions
+61 -3
View File
@@ -28,8 +28,9 @@ use sc_network::Event as NetworkEvent;
use sp_consensus::SyncOracle;
use polkadot_subsystem::{
ActiveLeavesUpdate, ActivatedLeaf, Subsystem, SubsystemContext, SpawnedSubsystem, SubsystemError,
SubsystemResult, SubsystemSender, OverseerSignal, FromOverseer,
ActivatedLeaf, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem,
Subsystem, SubsystemContext, SubsystemError, SubsystemResult, SubsystemSender,
messages::StatementDistributionMessage
};
use polkadot_subsystem::messages::{
NetworkBridgeMessage, AllMessages,
@@ -842,12 +843,16 @@ where
let NetworkBridge {
network_service,
request_multiplexer,
mut request_multiplexer,
authority_discovery_service,
metrics,
sync_oracle,
} = bridge;
let statement_receiver = request_multiplexer
.get_statement_fetching()
.expect("Gets initialized, must be `Some` on startup. qed.");
let (validation_worker_tx, validation_worker_rx) = mpsc::channel(1024);
let (remote, network_event_handler) = handle_network_messages(
@@ -861,6 +866,10 @@ where
ctx.spawn("network-bridge-network-worker", Box::pin(remote)).await?;
ctx.send_message(AllMessages::StatementDistribution(
StatementDistributionMessage::StatementFetchingReceiver(statement_receiver)
)).await;
let subsystem_event_handler = handle_subsystem_messages(
ctx,
network_service,
@@ -1777,6 +1786,13 @@ mod tests {
let view = view![Hash::repeat_byte(1)];
assert_matches!(
virtual_overseer.recv().await,
AllMessages::StatementDistribution(
StatementDistributionMessage::StatementFetchingReceiver(_)
)
);
// bridge will inform about all connected peers.
{
assert_sends_validation_event_to_all(
@@ -1822,6 +1838,13 @@ mod tests {
ObservedRole::Full,
).await;
assert_matches!(
virtual_overseer.recv().await,
AllMessages::StatementDistribution(
StatementDistributionMessage::StatementFetchingReceiver(_)
)
);
// bridge will inform about all connected peers.
{
assert_sends_validation_event_to_all(
@@ -1887,6 +1910,13 @@ mod tests {
network_handle.connect_peer(peer.clone(), PeerSet::Validation, ObservedRole::Full).await;
network_handle.connect_peer(peer.clone(), PeerSet::Collation, ObservedRole::Full).await;
assert_matches!(
virtual_overseer.recv().await,
AllMessages::StatementDistribution(
StatementDistributionMessage::StatementFetchingReceiver(_)
)
);
// bridge will inform about all connected peers.
{
assert_sends_validation_event_to_all(
@@ -1964,6 +1994,13 @@ mod tests {
network_handle.connect_peer(peer_a.clone(), PeerSet::Validation, ObservedRole::Full).await;
network_handle.connect_peer(peer_b.clone(), PeerSet::Collation, ObservedRole::Full).await;
assert_matches!(
virtual_overseer.recv().await,
AllMessages::StatementDistribution(
StatementDistributionMessage::StatementFetchingReceiver(_)
)
);
// bridge will inform about all connected peers.
{
assert_sends_validation_event_to_all(
@@ -2052,6 +2089,13 @@ mod tests {
network_handle.connect_peer(peer.clone(), PeerSet::Validation, ObservedRole::Full).await;
network_handle.connect_peer(peer.clone(), PeerSet::Collation, ObservedRole::Full).await;
assert_matches!(
virtual_overseer.recv().await,
AllMessages::StatementDistribution(
StatementDistributionMessage::StatementFetchingReceiver(_)
)
);
// bridge will inform about all connected peers.
{
assert_sends_validation_event_to_all(
@@ -2205,6 +2249,13 @@ mod tests {
network_handle.connect_peer(peer.clone(), PeerSet::Validation, ObservedRole::Full).await;
network_handle.connect_peer(peer.clone(), PeerSet::Collation, ObservedRole::Full).await;
assert_matches!(
virtual_overseer.recv().await,
AllMessages::StatementDistribution(
StatementDistributionMessage::StatementFetchingReceiver(_)
)
);
// bridge will inform about all connected peers.
{
assert_sends_validation_event_to_all(
@@ -2366,6 +2417,13 @@ mod tests {
0,
);
assert_matches!(
virtual_overseer.recv().await,
AllMessages::StatementDistribution(
StatementDistributionMessage::StatementFetchingReceiver(_)
)
);
assert_sends_validation_event_to_all(
NetworkBridgeEvent::OurViewChange(our_view.clone()),
&mut virtual_overseer,
@@ -37,8 +37,11 @@ use polkadot_subsystem::messages::AllMessages;
/// type, useful for the network bridge to send them via the `Overseer` to other subsystems.
///
/// The resulting stream will end once any of its input ends.
///
/// TODO: Get rid of this: https://github.com/paritytech/polkadot/issues/2842
pub struct RequestMultiplexer {
receivers: Vec<(Protocol, mpsc::Receiver<network::IncomingRequest>)>,
statement_fetching: Option<mpsc::Receiver<network::IncomingRequest>>,
next_poll: usize,
}
@@ -58,21 +61,38 @@ impl RequestMultiplexer {
/// `RequestMultiplexer` from it. The returned `RequestResponseConfig`s must be passed to the
/// network implementation.
pub fn new() -> (Self, Vec<RequestResponseConfig>) {
let (receivers, cfgs): (Vec<_>, Vec<_>) = Protocol::iter()
let (mut receivers, cfgs): (Vec<_>, Vec<_>) = Protocol::iter()
.map(|p| {
let (rx, cfg) = p.get_config();
((p, rx), cfg)
})
.unzip();
let index = receivers.iter().enumerate().find_map(|(i, (p, _))|
if let Protocol::StatementFetching = p {
Some(i)
} else {
None
}
).expect("Statement fetching must be registered. qed.");
let statement_fetching = Some(receivers.remove(index).1);
(
Self {
receivers,
statement_fetching,
next_poll: 0,
},
cfgs,
)
}
/// Get the receiver for handling statement fetching requests.
///
/// This function will only return `Some` once.
pub fn get_statement_fetching(&mut self) -> Option<mpsc::Receiver<network::IncomingRequest>> {
std::mem::take(&mut self.statement_fetching)
}
}
impl Stream for RequestMultiplexer {
@@ -151,6 +171,9 @@ fn multiplex_single(
decode_with_peer::<v1::AvailableDataFetchingRequest>(peer, payload)?,
pending_response,
)),
Protocol::StatementFetching => {
panic!("Statement fetching requests are handled directly. qed.");
}
};
Ok(r)
}