Integrate litep2p into Polkadot SDK (#2944)

[litep2p](https://github.com/altonen/litep2p) is a libp2p-compatible P2P
networking library. It supports all of the features of `rust-libp2p`
that are currently being utilized by Polkadot SDK.

Compared to `rust-libp2p`, `litep2p` has a quite different architecture
which is why the new `litep2p` network backend is only able to use a
little of the existing code in `sc-network`. The design has been mainly
influenced by how we'd wish to structure our networking-related code in
Polkadot SDK: independent higher-levels protocols directly communicating
with the network over links that support bidirectional backpressure. A
good example would be `NotificationHandle`/`RequestResponseHandle`
abstractions which allow, e.g., `SyncingEngine` to directly communicate
with peers to announce/request blocks.

I've tried running `polkadot --network-backend litep2p` with a few
different peer configurations and there is a noticeable reduction in
networking CPU usage. For high load (`--out-peers 200`), networking CPU
usage goes down from ~110% to ~30% (80 pp) and for normal load
(`--out-peers 40`), the usage goes down from ~55% to ~18% (37 pp).

These should not be taken as final numbers because:

a) there are still some low-hanging optimization fruits, such as
enabling [receive window
auto-tuning](https://github.com/libp2p/rust-yamux/pull/176), integrating
`Peerset` more closely with `litep2p` or improving memory usage of the
WebSocket transport
b) fixing bugs/instabilities that incorrectly cause `litep2p` to do less
work will increase the networking CPU usage
c) verification in a more diverse set of tests/conditions is needed

Nevertheless, these numbers should give an early estimate for CPU usage
of the new networking backend.

This PR consists of three separate changes:
* introduce a generic `PeerId` (wrapper around `Multihash`) so that we
don't have use `NetworkService::PeerId` in every part of the code that
uses a `PeerId`
* introduce `NetworkBackend` trait, implement it for the libp2p network
stack and make Polkadot SDK generic over `NetworkBackend`
  * implement `NetworkBackend` for litep2p

The new library should be considered experimental which is why
`rust-libp2p` will remain as the default option for the time being. This
PR currently depends on the master branch of `litep2p` but I'll cut a
new release for the library once all review comments have been
addresses.

---------

Signed-off-by: Alexandru Vasile <alexandru.vasile@parity.io>
Co-authored-by: Dmitry Markin <dmitry@markin.tech>
Co-authored-by: Alexandru Vasile <60601340+lexnv@users.noreply.github.com>
Co-authored-by: Alexandru Vasile <alexandru.vasile@parity.io>
This commit is contained in:
Aaro Altonen
2024-04-08 19:44:13 +03:00
committed by GitHub
parent 9543d31474
commit 80616f6d03
181 changed files with 11055 additions and 1862 deletions
@@ -23,7 +23,8 @@ use async_trait::async_trait;
use sc_authority_discovery::Service as AuthorityDiscoveryService;
use polkadot_primitives::AuthorityDiscoveryId;
use sc_network::{Multiaddr, PeerId};
use sc_network::Multiaddr;
use sc_network_types::PeerId;
/// An abstraction over the authority discovery service.
///
+4 -3
View File
@@ -25,7 +25,8 @@ use std::{collections::HashMap, fmt};
#[doc(hidden)]
pub use polkadot_node_jaeger as jaeger;
pub use sc_network::{IfDisconnected, PeerId};
pub use sc_network::IfDisconnected;
pub use sc_network_types::PeerId;
#[doc(hidden)]
pub use std::sync::Arc;
@@ -610,7 +611,7 @@ pub mod v1 {
///
/// The payload is the local peer id of the node, which serves to prove that it
/// controls the collator key it is declaring an intention to collate under.
pub fn declare_signature_payload(peer_id: &sc_network::PeerId) -> Vec<u8> {
pub fn declare_signature_payload(peer_id: &sc_network_types::PeerId) -> Vec<u8> {
let mut payload = peer_id.to_bytes();
payload.extend_from_slice(b"COLL");
payload
@@ -863,7 +864,7 @@ pub mod v2 {
///
/// The payload is the local peer id of the node, which serves to prove that it
/// controls the collator key it is declaring an intention to collate under.
pub fn declare_signature_payload(peer_id: &sc_network::PeerId) -> Vec<u8> {
pub fn declare_signature_payload(peer_id: &sc_network_types::PeerId) -> Vec<u8> {
let mut payload = peer_id.to_bytes();
payload.extend_from_slice(b"COLL");
payload
+26 -10
View File
@@ -19,13 +19,14 @@
use derive_more::Display;
use polkadot_primitives::Hash;
use sc_network::{
config::{NonDefaultSetConfig, SetConfig},
types::ProtocolName,
NotificationService,
config::SetConfig, peer_store::PeerStoreProvider, service::NotificationMetrics,
types::ProtocolName, NetworkBackend, NotificationService,
};
use sp_runtime::traits::Block;
use std::{
collections::{hash_map::Entry, HashMap},
ops::{Index, IndexMut},
sync::Arc,
};
use strum::{EnumIter, IntoEnumIterator};
@@ -65,11 +66,13 @@ impl PeerSet {
///
/// Those should be used in the network configuration to register the protocols with the
/// network service.
pub fn get_info(
pub fn get_info<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
self,
is_authority: IsAuthority,
peerset_protocol_names: &PeerSetProtocolNames,
) -> (NonDefaultSetConfig, (PeerSet, Box<dyn NotificationService>)) {
metrics: NotificationMetrics,
peer_store_handle: Arc<dyn PeerStoreProvider>,
) -> (N::NotificationProtocolConfig, (PeerSet, Box<dyn NotificationService>)) {
// Networking layer relies on `get_main_name()` being the main name of the protocol
// for peersets and connection management.
let protocol = peerset_protocol_names.get_main_name(self);
@@ -82,7 +85,7 @@ impl PeerSet {
match self {
PeerSet::Validation => {
let (config, notification_service) = NonDefaultSetConfig::new(
let (config, notification_service) = N::notification_config(
protocol,
fallback_names,
max_notification_size,
@@ -97,12 +100,14 @@ impl PeerSet {
reserved_nodes: Vec::new(),
non_reserved_mode: sc_network::config::NonReservedPeerMode::Accept,
},
metrics,
peer_store_handle,
);
(config, (PeerSet::Validation, notification_service))
},
PeerSet::Collation => {
let (config, notification_service) = NonDefaultSetConfig::new(
let (config, notification_service) = N::notification_config(
protocol,
fallback_names,
max_notification_size,
@@ -119,6 +124,8 @@ impl PeerSet {
sc_network::config::NonReservedPeerMode::Deny
},
},
metrics,
peer_store_handle,
);
(config, (PeerSet::Collation, notification_service))
@@ -207,12 +214,21 @@ impl<T> IndexMut<PeerSet> for PerPeerSet<T> {
///
/// Should be used during network configuration (added to `NetworkConfiguration::extra_sets`)
/// or shortly after startup to register the protocols with the network service.
pub fn peer_sets_info(
pub fn peer_sets_info<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
is_authority: IsAuthority,
peerset_protocol_names: &PeerSetProtocolNames,
) -> Vec<(NonDefaultSetConfig, (PeerSet, Box<dyn NotificationService>))> {
metrics: NotificationMetrics,
peer_store_handle: Arc<dyn PeerStoreProvider>,
) -> Vec<(N::NotificationProtocolConfig, (PeerSet, Box<dyn NotificationService>))> {
PeerSet::iter()
.map(|s| s.get_info(is_authority, &peerset_protocol_names))
.map(|s| {
s.get_info::<B, N>(
is_authority,
&peerset_protocol_names,
metrics.clone(),
Arc::clone(&peer_store_handle),
)
})
.collect()
}
@@ -16,7 +16,7 @@
//! Error handling related code and Error/Result definitions.
use sc_network::PeerId;
use sc_network_types::PeerId;
use parity_scale_codec::Error as DecodingError;
@@ -20,7 +20,9 @@ use futures::{channel::oneshot, StreamExt};
use parity_scale_codec::{Decode, Encode};
use sc_network::{config as netconfig, config::RequestResponseConfig, PeerId};
use sc_network::{config as netconfig, NetworkBackend};
use sc_network_types::PeerId;
use sp_runtime::traits::Block;
use super::{IsRequest, ReqProtocolNames};
use crate::UnifiedReputationChange;
@@ -52,10 +54,10 @@ where
///
/// This Register that config with substrate networking and receive incoming requests via the
/// returned `IncomingRequestReceiver`.
pub fn get_config_receiver(
pub fn get_config_receiver<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
req_protocol_names: &ReqProtocolNames,
) -> (IncomingRequestReceiver<Req>, RequestResponseConfig) {
let (raw, cfg) = Req::PROTOCOL.get_config(req_protocol_names);
) -> (IncomingRequestReceiver<Req>, N::RequestResponseProtocolConfig) {
let (raw, cfg) = Req::PROTOCOL.get_config::<B, N>(req_protocol_names);
(IncomingRequestReceiver { raw, phantom: PhantomData {} }, cfg)
}
@@ -52,6 +52,8 @@
use std::{collections::HashMap, time::Duration, u64};
use polkadot_primitives::{MAX_CODE_SIZE, MAX_POV_SIZE};
use sc_network::NetworkBackend;
use sp_runtime::traits::Block;
use strum::{EnumIter, IntoEnumIterator};
pub use sc_network::{config as network, config::RequestResponseConfig, ProtocolName};
@@ -179,76 +181,76 @@ impl Protocol {
///
/// Returns a `ProtocolConfig` for this protocol.
/// Use this if you plan only to send requests for this protocol.
pub fn get_outbound_only_config(
pub fn get_outbound_only_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
self,
req_protocol_names: &ReqProtocolNames,
) -> RequestResponseConfig {
self.create_config(req_protocol_names, None)
) -> N::RequestResponseProtocolConfig {
self.create_config::<B, N>(req_protocol_names, None)
}
/// Get a configuration for a given Request response protocol.
///
/// Returns a receiver for messages received on this protocol and the requested
/// `ProtocolConfig`.
pub fn get_config(
pub fn get_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
self,
req_protocol_names: &ReqProtocolNames,
) -> (async_channel::Receiver<network::IncomingRequest>, RequestResponseConfig) {
) -> (async_channel::Receiver<network::IncomingRequest>, N::RequestResponseProtocolConfig) {
let (tx, rx) = async_channel::bounded(self.get_channel_size());
let cfg = self.create_config(req_protocol_names, Some(tx));
let cfg = self.create_config::<B, N>(req_protocol_names, Some(tx));
(rx, cfg)
}
fn create_config(
fn create_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
self,
req_protocol_names: &ReqProtocolNames,
tx: Option<async_channel::Sender<network::IncomingRequest>>,
) -> RequestResponseConfig {
) -> N::RequestResponseProtocolConfig {
let name = req_protocol_names.get_name(self);
let legacy_names = self.get_legacy_name().into_iter().map(Into::into).collect();
match self {
Protocol::ChunkFetchingV1 => RequestResponseConfig {
Protocol::ChunkFetchingV1 => N::request_response_config(
name,
fallback_names: legacy_names,
max_request_size: 1_000,
max_response_size: POV_RESPONSE_SIZE as u64 * 3,
legacy_names,
1_000,
POV_RESPONSE_SIZE as u64 * 3,
// We are connected to all validators:
request_timeout: CHUNK_REQUEST_TIMEOUT,
inbound_queue: tx,
},
CHUNK_REQUEST_TIMEOUT,
tx,
),
Protocol::CollationFetchingV1 | Protocol::CollationFetchingV2 =>
RequestResponseConfig {
N::request_response_config(
name,
fallback_names: legacy_names,
max_request_size: 1_000,
max_response_size: POV_RESPONSE_SIZE,
legacy_names,
1_000,
POV_RESPONSE_SIZE,
// Taken from initial implementation in collator protocol:
request_timeout: POV_REQUEST_TIMEOUT_CONNECTED,
inbound_queue: tx,
},
Protocol::PoVFetchingV1 => RequestResponseConfig {
POV_REQUEST_TIMEOUT_CONNECTED,
tx,
),
Protocol::PoVFetchingV1 => N::request_response_config(
name,
fallback_names: legacy_names,
max_request_size: 1_000,
max_response_size: POV_RESPONSE_SIZE,
request_timeout: POV_REQUEST_TIMEOUT_CONNECTED,
inbound_queue: tx,
},
Protocol::AvailableDataFetchingV1 => RequestResponseConfig {
legacy_names,
1_000,
POV_RESPONSE_SIZE,
POV_REQUEST_TIMEOUT_CONNECTED,
tx,
),
Protocol::AvailableDataFetchingV1 => N::request_response_config(
name,
fallback_names: legacy_names,
max_request_size: 1_000,
legacy_names,
1_000,
// Available data size is dominated by the PoV size.
max_response_size: POV_RESPONSE_SIZE,
request_timeout: POV_REQUEST_TIMEOUT_CONNECTED,
inbound_queue: tx,
},
Protocol::StatementFetchingV1 => RequestResponseConfig {
POV_RESPONSE_SIZE,
POV_REQUEST_TIMEOUT_CONNECTED,
tx,
),
Protocol::StatementFetchingV1 => N::request_response_config(
name,
fallback_names: legacy_names,
max_request_size: 1_000,
legacy_names,
1_000,
// Available data size is dominated code size.
max_response_size: STATEMENT_RESPONSE_SIZE,
STATEMENT_RESPONSE_SIZE,
// We need statement fetching to be fast and will try our best at the responding
// side to answer requests within that timeout, assuming a bandwidth of 500Mbit/s
// - which is the recommended minimum bandwidth for nodes on Kusama as of April
@@ -258,27 +260,27 @@ impl Protocol {
// waiting for timeout on an overloaded node. Fetches from slow nodes will likely
// fail, but this is desired, so we can quickly move on to a faster one - we should
// also decrease its reputation.
request_timeout: Duration::from_secs(1),
inbound_queue: tx,
},
Protocol::DisputeSendingV1 => RequestResponseConfig {
Duration::from_secs(1),
tx,
),
Protocol::DisputeSendingV1 => N::request_response_config(
name,
fallback_names: legacy_names,
max_request_size: 1_000,
legacy_names,
1_000,
// Responses are just confirmation, in essence not even a bit. So 100 seems
// plenty.
max_response_size: 100,
request_timeout: DISPUTE_REQUEST_TIMEOUT,
inbound_queue: tx,
},
Protocol::AttestedCandidateV2 => RequestResponseConfig {
100,
DISPUTE_REQUEST_TIMEOUT,
tx,
),
Protocol::AttestedCandidateV2 => N::request_response_config(
name,
fallback_names: legacy_names,
max_request_size: 1_000,
max_response_size: ATTESTED_CANDIDATE_RESPONSE_SIZE,
request_timeout: ATTESTED_CANDIDATE_TIMEOUT,
inbound_queue: tx,
},
legacy_names,
1_000,
ATTESTED_CANDIDATE_RESPONSE_SIZE,
ATTESTED_CANDIDATE_TIMEOUT,
tx,
),
}
}
@@ -20,7 +20,7 @@ use network::ProtocolName;
use parity_scale_codec::{Decode, Encode, Error as DecodingError};
use sc_network as network;
use sc_network::PeerId;
use sc_network_types::PeerId;
use polkadot_primitives::AuthorityDiscoveryId;