Rework the event system of sc-network (#1370)

This commit introduces a new concept called `NotificationService` which
allows Polkadot protocols to communicate with the underlying
notification protocol implementation directly, without routing events
through `NetworkWorker`. This implies that each protocol has its own
service which it uses to communicate with remote peers and that each
`NotificationService` is unique with respect to the underlying
notification protocol, meaning `NotificationService` for the transaction
protocol can only be used to send and receive transaction-related
notifications.

The `NotificationService` concept introduces two additional benefits:
  * allow protocols to start using custom handshakes
  * allow protocols to accept/reject inbound peers

Previously the validation of inbound connections was solely the
responsibility of `ProtocolController`. This caused issues with light
peers and `SyncingEngine` as `ProtocolController` would accept more
peers than `SyncingEngine` could accept which caused peers to have
differing views of their own states. `SyncingEngine` would reject excess
peers but these rejections were not properly communicated to those peers
causing them to assume that they were accepted.

With `NotificationService`, the local handshake is not sent to remote
peer if peer is rejected which allows it to detect that it was rejected.

This commit also deprecates the use of `NetworkEventStream` for all
notification-related events and going forward only DHT events are
provided through `NetworkEventStream`. If protocols wish to follow each
other's events, they must introduce additional abtractions, as is done
for GRANDPA and transactions protocols by following the syncing protocol
through `SyncEventStream`.

Fixes https://github.com/paritytech/polkadot-sdk/issues/512
Fixes https://github.com/paritytech/polkadot-sdk/issues/514
Fixes https://github.com/paritytech/polkadot-sdk/issues/515
Fixes https://github.com/paritytech/polkadot-sdk/issues/554
Fixes https://github.com/paritytech/polkadot-sdk/issues/556

---
These changes are transferred from
https://github.com/paritytech/substrate/pull/14197 but there are no
functional changes compared to that PR

---------

Co-authored-by: Dmitry Markin <dmitry@markin.tech>
Co-authored-by: Alexandru Vasile <60601340+lexnv@users.noreply.github.com>
This commit is contained in:
Aaro Altonen
2023-11-28 20:18:52 +02:00
committed by GitHub
parent ec3a61ed86
commit e71c484d5b
102 changed files with 5694 additions and 2603 deletions
+66 -152
View File
@@ -54,6 +54,7 @@ use crate::{
ReputationChange,
};
use codec::DecodeAll;
use either::Either;
use futures::{channel::oneshot, prelude::*};
#[allow(deprecated)]
@@ -71,10 +72,13 @@ use libp2p::{
Multiaddr, PeerId,
};
use log::{debug, error, info, trace, warn};
use metrics::{Histogram, HistogramVec, MetricSources, Metrics};
use metrics::{Histogram, MetricSources, Metrics};
use parking_lot::Mutex;
use sc_network_common::ExHashT;
use sc_network_common::{
role::{ObservedRole, Roles},
ExHashT,
};
use sc_utils::mpsc::{tracing_unbounded, TracingUnboundedReceiver, TracingUnboundedSender};
use sp_runtime::traits::Block as BlockT;
@@ -118,12 +122,6 @@ pub struct NetworkService<B: BlockT + 'static, H: ExHashT> {
bandwidth: Arc<transport::BandwidthSinks>,
/// Channel that sends messages to the actual worker.
to_worker: TracingUnboundedSender<ServiceToWorkerMsg>,
/// For each peer and protocol combination, an object that allows sending notifications to
/// that peer. Updated by the [`NetworkWorker`].
peers_notifications_sinks: Arc<Mutex<HashMap<(PeerId, ProtocolName), NotificationsSink>>>,
/// Field extracted from the [`Metrics`] struct and necessary to report the
/// notifications-related metrics.
notifications_sizes_metric: Option<HistogramVec>,
/// Protocol name -> `SetId` mapping for notification protocols. The map never changes after
/// initialization.
notification_protocol_ids: HashMap<ProtocolName, SetId>,
@@ -132,6 +130,8 @@ pub struct NetworkService<B: BlockT + 'static, H: ExHashT> {
protocol_handles: Vec<protocol_controller::ProtocolHandle>,
/// Shortcut to sync protocol handle (`protocol_handles[0]`).
sync_protocol_handle: protocol_controller::ProtocolHandle,
/// Handle to `PeerStore`.
peer_store_handle: PeerStoreHandle,
/// Marker to pin the `H` generic. Serves no purpose except to not break backwards
/// compatibility.
_marker: PhantomData<H>,
@@ -199,7 +199,7 @@ where
)?;
for notification_protocol in &notification_protocols {
ensure_addresses_consistent_with_transport(
notification_protocol.set_config.reserved_nodes.iter().map(|x| &x.multiaddr),
notification_protocol.set_config().reserved_nodes.iter().map(|x| &x.multiaddr),
&network_config.transport,
)?;
}
@@ -241,7 +241,7 @@ where
.map(|cfg| usize::try_from(cfg.max_response_size).unwrap_or(usize::MAX));
let notifs_max = notification_protocols
.iter()
.map(|cfg| usize::try_from(cfg.max_notification_size).unwrap_or(usize::MAX));
.map(|cfg| usize::try_from(cfg.max_notification_size()).unwrap_or(usize::MAX));
// A "default" max is added to cover all the other protocols: ping, identify,
// kademlia, block announces, and transactions.
@@ -273,7 +273,7 @@ where
// We must prepend a hardcoded default peer set to notification protocols.
let all_peer_sets_iter = iter::once(&network_config.default_peers_set)
.chain(notification_protocols.iter().map(|protocol| &protocol.set_config));
.chain(notification_protocols.iter().map(|protocol| protocol.set_config()));
let (protocol_handles, protocol_controllers): (Vec<_>, Vec<_>) = all_peer_sets_iter
.enumerate()
@@ -312,21 +312,9 @@ where
iter::once(&params.block_announce_config)
.chain(notification_protocols.iter())
.enumerate()
.map(|(index, protocol)| {
(protocol.notifications_protocol.clone(), SetId::from(index))
})
.map(|(index, protocol)| (protocol.protocol_name().clone(), SetId::from(index)))
.collect();
let protocol = Protocol::new(
From::from(&params.role),
notification_protocols.clone(),
params.block_announce_config,
params.peer_store.clone(),
protocol_handles.clone(),
from_protocol_controllers,
params.tx,
)?;
let known_addresses = {
// Collect all reserved nodes and bootnodes addresses.
let mut addresses: Vec<_> = network_config
@@ -336,7 +324,7 @@ where
.map(|reserved| (reserved.peer_id, reserved.multiaddr.clone()))
.chain(notification_protocols.iter().flat_map(|protocol| {
protocol
.set_config
.set_config()
.reserved_nodes
.iter()
.map(|reserved| (reserved.peer_id, reserved.multiaddr.clone()))
@@ -389,6 +377,16 @@ where
let num_connected = Arc::new(AtomicUsize::new(0));
let external_addresses = Arc::new(Mutex::new(HashSet::new()));
let (protocol, notif_protocol_handles) = Protocol::new(
From::from(&params.role),
&params.metrics_registry,
notification_protocols,
params.block_announce_config,
params.peer_store.clone(),
protocol_handles.clone(),
from_protocol_controllers,
)?;
// Build the swarm.
let (mut swarm, bandwidth): (Swarm<Behaviour<B>>, _) = {
let user_agent =
@@ -508,7 +506,6 @@ where
}
let listen_addresses = Arc::new(Mutex::new(HashSet::new()));
let peers_notifications_sinks = Arc::new(Mutex::new(HashMap::new()));
let service = Arc::new(NetworkService {
bandwidth,
@@ -518,13 +515,10 @@ where
local_peer_id,
local_identity,
to_worker,
peers_notifications_sinks: peers_notifications_sinks.clone(),
notifications_sizes_metric: metrics
.as_ref()
.map(|metrics| metrics.notifications_sizes.clone()),
notification_protocol_ids,
protocol_handles,
sync_protocol_handle,
peer_store_handle: params.peer_store.clone(),
_marker: PhantomData,
_block: Default::default(),
});
@@ -539,8 +533,8 @@ where
metrics,
boot_node_ids,
reported_invalid_boot_nodes: Default::default(),
peers_notifications_sinks,
peer_store_handle: params.peer_store,
notif_protocol_handles,
_marker: Default::default(),
_block: Default::default(),
})
@@ -567,7 +561,7 @@ where
/// Returns the number of peers we're connected to.
pub fn num_connected_peers(&self) -> usize {
self.network_service.behaviour().user_protocol().num_connected_peers()
self.network_service.behaviour().user_protocol().num_sync_peers()
}
/// Adds an address for a node.
@@ -991,6 +985,16 @@ where
fn sync_num_connected(&self) -> usize {
self.num_connected.load(Ordering::Relaxed)
}
fn peer_role(&self, peer_id: PeerId, handshake: Vec<u8>) -> Option<ObservedRole> {
match Roles::decode_all(&mut &handshake[..]) {
Ok(role) => Some(role.into()),
Err(_) => {
log::debug!(target: "sub-libp2p", "handshake doesn't contain peer role: {handshake:?}");
self.peer_store_handle.peer_role(&peer_id)
},
}
}
}
impl<B, H> NetworkEventStream for NetworkService<B, H>
@@ -1010,68 +1014,20 @@ where
B: BlockT + 'static,
H: ExHashT,
{
fn write_notification(&self, target: PeerId, protocol: ProtocolName, message: Vec<u8>) {
// We clone the `NotificationsSink` in order to be able to unlock the network-wide
// `peers_notifications_sinks` mutex as soon as possible.
let sink = {
let peers_notifications_sinks = self.peers_notifications_sinks.lock();
if let Some(sink) = peers_notifications_sinks.get(&(target, protocol.clone())) {
sink.clone()
} else {
// Notification silently discarded, as documented.
debug!(
target: "sub-libp2p",
"Attempted to send notification on missing or closed substream: {}, {:?}",
target, protocol,
);
return
}
};
if let Some(notifications_sizes_metric) = self.notifications_sizes_metric.as_ref() {
notifications_sizes_metric
.with_label_values(&["out", &protocol])
.observe(message.len() as f64);
}
// Sending is communicated to the `NotificationsSink`.
trace!(
target: "sub-libp2p",
"External API => Notification({:?}, {:?}, {} bytes)",
target, protocol, message.len()
);
trace!(target: "sub-libp2p", "Handler({:?}) <= Sync notification", target);
sink.send_sync_notification(message);
fn write_notification(&self, _target: PeerId, _protocol: ProtocolName, _message: Vec<u8>) {
unimplemented!();
}
fn notification_sender(
&self,
target: PeerId,
protocol: ProtocolName,
_target: PeerId,
_protocol: ProtocolName,
) -> Result<Box<dyn NotificationSenderT>, NotificationSenderError> {
// We clone the `NotificationsSink` in order to be able to unlock the network-wide
// `peers_notifications_sinks` mutex as soon as possible.
let sink = {
let peers_notifications_sinks = self.peers_notifications_sinks.lock();
if let Some(sink) = peers_notifications_sinks.get(&(target, protocol.clone())) {
sink.clone()
} else {
return Err(NotificationSenderError::Closed)
}
};
let notification_size_metric = self
.notifications_sizes_metric
.as_ref()
.map(|histogram| histogram.with_label_values(&["out", &protocol]));
Ok(Box::new(NotificationSender { sink, protocol_name: protocol, notification_size_metric }))
unimplemented!();
}
fn set_notification_handshake(&self, protocol: ProtocolName, handshake: Vec<u8>) {
let _ = self
.to_worker
.unbounded_send(ServiceToWorkerMsg::SetNotificationHandshake(protocol, handshake));
fn set_notification_handshake(&self, _protocol: ProtocolName, _handshake: Vec<u8>) {
unimplemented!();
}
}
@@ -1209,7 +1165,6 @@ enum ServiceToWorkerMsg {
pending_response: oneshot::Sender<Result<NetworkState, RequestFailure>>,
},
DisconnectPeer(PeerId, ProtocolName),
SetNotificationHandshake(ProtocolName, Vec<u8>),
}
/// Main network worker. Must be polled in order for the network to advance.
@@ -1239,11 +1194,10 @@ where
boot_node_ids: Arc<HashMap<PeerId, Vec<Multiaddr>>>,
/// Boot nodes that we already have reported as invalid.
reported_invalid_boot_nodes: HashSet<PeerId>,
/// For each peer and protocol combination, an object that allows sending notifications to
/// that peer. Shared with the [`NetworkService`].
peers_notifications_sinks: Arc<Mutex<HashMap<(PeerId, ProtocolName), NotificationsSink>>>,
/// Peer reputation store handle.
peer_store_handle: PeerStoreHandle,
/// Notification protocol handles.
notif_protocol_handles: Vec<protocol::ProtocolHandle>,
/// Marker to pin the `H` generic. Serves no purpose except to not break backwards
/// compatibility.
_marker: PhantomData<H>,
@@ -1282,8 +1236,7 @@ where
};
// Update the `num_connected` count shared with the `NetworkService`.
let num_connected_peers =
self.network_service.behaviour_mut().user_protocol_mut().num_connected_peers();
let num_connected_peers = self.network_service.behaviour().user_protocol().num_sync_peers();
self.num_connected.store(num_connected_peers, Ordering::Relaxed);
if let Some(metrics) = self.metrics.as_ref() {
@@ -1353,11 +1306,6 @@ where
.behaviour_mut()
.user_protocol_mut()
.disconnect_peer(&who, protocol_name),
ServiceToWorkerMsg::SetNotificationHandshake(protocol, handshake) => self
.network_service
.behaviour_mut()
.user_protocol_mut()
.set_notification_handshake(protocol, handshake),
}
}
@@ -1472,47 +1420,27 @@ where
},
SwarmEvent::Behaviour(BehaviourOut::NotificationStreamOpened {
remote,
protocol,
set_id,
direction,
negotiated_fallback,
notifications_sink,
role,
received_handshake,
}) => {
if let Some(metrics) = self.metrics.as_ref() {
metrics
.notifications_streams_opened_total
.with_label_values(&[&protocol])
.inc();
}
{
let mut peers_notifications_sinks = self.peers_notifications_sinks.lock();
let _previous_value = peers_notifications_sinks
.insert((remote, protocol.clone()), notifications_sink);
debug_assert!(_previous_value.is_none());
}
self.event_streams.send(Event::NotificationStreamOpened {
let _ = self.notif_protocol_handles[usize::from(set_id)].report_substream_opened(
remote,
protocol,
negotiated_fallback,
role,
direction,
received_handshake,
});
negotiated_fallback,
notifications_sink,
);
},
SwarmEvent::Behaviour(BehaviourOut::NotificationStreamReplaced {
remote,
protocol,
set_id,
notifications_sink,
}) => {
let mut peers_notifications_sinks = self.peers_notifications_sinks.lock();
if let Some(s) = peers_notifications_sinks.get_mut(&(remote, protocol)) {
*s = notifications_sink;
} else {
error!(
target: "sub-libp2p",
"NotificationStreamReplaced for non-existing substream"
);
debug_assert!(false);
}
let _ = self.notif_protocol_handles[usize::from(set_id)]
.report_notification_sink_replaced(remote, notifications_sink);
// TODO: Notifications might have been lost as a result of the previous
// connection being dropped, and as a result it would be preferable to notify
@@ -1535,31 +1463,17 @@ where
// role,
// });
},
SwarmEvent::Behaviour(BehaviourOut::NotificationStreamClosed { remote, protocol }) => {
if let Some(metrics) = self.metrics.as_ref() {
metrics
.notifications_streams_closed_total
.with_label_values(&[&protocol[..]])
.inc();
}
self.event_streams
.send(Event::NotificationStreamClosed { remote, protocol: protocol.clone() });
{
let mut peers_notifications_sinks = self.peers_notifications_sinks.lock();
let _previous_value = peers_notifications_sinks.remove(&(remote, protocol));
debug_assert!(_previous_value.is_some());
}
SwarmEvent::Behaviour(BehaviourOut::NotificationStreamClosed { remote, set_id }) => {
let _ = self.notif_protocol_handles[usize::from(set_id)]
.report_substream_closed(remote);
},
SwarmEvent::Behaviour(BehaviourOut::NotificationsReceived { remote, messages }) => {
if let Some(metrics) = self.metrics.as_ref() {
for (protocol, message) in &messages {
metrics
.notifications_sizes
.with_label_values(&["in", protocol])
.observe(message.len() as f64);
}
}
self.event_streams.send(Event::NotificationsReceived { remote, messages });
SwarmEvent::Behaviour(BehaviourOut::NotificationsReceived {
remote,
set_id,
notification,
}) => {
let _ = self.notif_protocol_handles[usize::from(set_id)]
.report_notification_received(remote, notification);
},
SwarmEvent::Behaviour(BehaviourOut::Dht(event, duration)) => {
if let Some(metrics) = self.metrics.as_ref() {