split NetworkBridge into two subsystems (#5616)

* foo

* rolling session window

* fixup

* remove use statemetn

* fmt

* split NetworkBridge into two subsystems

Pending cleanup

* split

* chore: reexport OrchestraError as OverseerError

* chore: silence warnings

* fixup tests

* chore: add default timenout of 30s to subsystem test helper ctx handle

* single item channel

* fixins

* fmt

* cleanup

* remove dead code

* remove sync bounds again

* wire up shared state

* deal with some FIXMEs

* use distinct tags

Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com>

* use tag

Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com>

* address naming

tx and rx are common in networking and also have an implicit meaning regarding networking
compared to incoming and outgoing which are already used with subsystems themselvesq

* remove unused sync oracle

* remove unneeded state

* fix tests

* chore: fmt

* do not try to register twice

* leak Metrics type

Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com>
Co-authored-by: Andronik <write@reusable.software>
This commit is contained in:
Bernhard Schuster
2022-07-12 18:22:36 +02:00
committed by GitHub
parent c11c1f38f4
commit 3240cb5e4d
40 changed files with 1880 additions and 1429 deletions
+301
View File
@@ -0,0 +1,301 @@
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! The Network Bridge Subsystem - handles _outgoing_ messages, from subsystem to the network.
use super::*;
use polkadot_node_network_protocol::{peer_set::PeerSet, v1 as protocol_v1, PeerId, Versioned};
use polkadot_node_subsystem::{
errors::SubsystemError, messages::NetworkBridgeTxMessage, overseer, FromOrchestra,
OverseerSignal, SpawnedSubsystem,
};
/// Peer set info for network initialization.
///
/// To be added to [`NetworkConfiguration::extra_sets`].
pub use polkadot_node_network_protocol::peer_set::{peer_sets_info, IsAuthority};
use crate::validator_discovery;
/// Actual interfacing to the network based on the `Network` trait.
///
/// Defines the `Network` trait with an implementation for an `Arc<NetworkService>`.
use crate::network::{send_message, Network};
use crate::metrics::Metrics;
#[cfg(test)]
mod tests;
// network bridge log target
const LOG_TARGET: &'static str = "parachain::network-bridge-tx";
/// The network bridge subsystem.
pub struct NetworkBridgeTx<N, AD> {
/// `Network` trait implementing type.
network_service: N,
authority_discovery_service: AD,
metrics: Metrics,
}
impl<N, AD> NetworkBridgeTx<N, AD> {
/// Create a new network bridge subsystem with underlying network service and authority discovery service.
///
/// This assumes that the network service has had the notifications protocol for the network
/// bridge already registered. See [`peers_sets_info`](peers_sets_info).
pub fn new(network_service: N, authority_discovery_service: AD, metrics: Metrics) -> Self {
Self { network_service, authority_discovery_service, metrics }
}
}
#[overseer::subsystem(NetworkBridgeTx, error = SubsystemError, prefix = self::overseer)]
impl<Net, AD, Context> NetworkBridgeTx<Net, AD>
where
Net: Network + Sync,
AD: validator_discovery::AuthorityDiscovery + Clone + Sync,
{
fn start(self, ctx: Context) -> SpawnedSubsystem {
let future = run_network_out(self, ctx)
.map_err(|e| SubsystemError::with_origin("network-bridge", e))
.boxed();
SpawnedSubsystem { name: "network-bridge-subsystem", future }
}
}
#[overseer::contextbounds(NetworkBridgeTx, prefix = self::overseer)]
async fn handle_subsystem_messages<Context, N, AD>(
mut ctx: Context,
mut network_service: N,
mut authority_discovery_service: AD,
metrics: Metrics,
) -> Result<(), Error>
where
N: Network,
AD: validator_discovery::AuthorityDiscovery + Clone,
{
let mut validator_discovery = validator_discovery::Service::<N, AD>::new();
loop {
match ctx.recv().fuse().await? {
FromOrchestra::Signal(OverseerSignal::Conclude) => return Ok(()),
FromOrchestra::Signal(_) => { /* handled by incoming */ },
FromOrchestra::Communication { msg } => {
(network_service, authority_discovery_service) =
handle_incoming_subsystem_communication(
&mut ctx,
network_service,
&mut validator_discovery,
authority_discovery_service.clone(),
msg,
&metrics,
)
.await;
},
}
}
}
#[overseer::contextbounds(NetworkBridgeTx, prefix = self::overseer)]
async fn handle_incoming_subsystem_communication<Context, N, AD>(
_ctx: &mut Context,
mut network_service: N,
validator_discovery: &mut validator_discovery::Service<N, AD>,
mut authority_discovery_service: AD,
msg: NetworkBridgeTxMessage,
metrics: &Metrics,
) -> (N, AD)
where
N: Network,
AD: validator_discovery::AuthorityDiscovery + Clone,
{
match msg {
NetworkBridgeTxMessage::ReportPeer(peer, rep) => {
if !rep.is_benefit() {
gum::debug!(target: LOG_TARGET, ?peer, ?rep, action = "ReportPeer");
}
metrics.on_report_event();
network_service.report_peer(peer, rep);
},
NetworkBridgeTxMessage::DisconnectPeer(peer, peer_set) => {
gum::trace!(
target: LOG_TARGET,
action = "DisconnectPeer",
?peer,
peer_set = ?peer_set,
);
network_service.disconnect_peer(peer, peer_set);
},
NetworkBridgeTxMessage::SendValidationMessage(peers, msg) => {
gum::trace!(
target: LOG_TARGET,
action = "SendValidationMessages",
num_messages = 1usize,
);
match msg {
Versioned::V1(msg) => send_validation_message_v1(
&mut network_service,
peers,
WireMessage::ProtocolMessage(msg),
&metrics,
),
}
},
NetworkBridgeTxMessage::SendValidationMessages(msgs) => {
gum::trace!(
target: LOG_TARGET,
action = "SendValidationMessages",
num_messages = %msgs.len(),
);
for (peers, msg) in msgs {
match msg {
Versioned::V1(msg) => send_validation_message_v1(
&mut network_service,
peers,
WireMessage::ProtocolMessage(msg),
&metrics,
),
}
}
},
NetworkBridgeTxMessage::SendCollationMessage(peers, msg) => {
gum::trace!(
target: LOG_TARGET,
action = "SendCollationMessages",
num_messages = 1usize,
);
match msg {
Versioned::V1(msg) => send_collation_message_v1(
&mut network_service,
peers,
WireMessage::ProtocolMessage(msg),
&metrics,
),
}
},
NetworkBridgeTxMessage::SendCollationMessages(msgs) => {
gum::trace!(
target: LOG_TARGET,
action = "SendCollationMessages",
num_messages = %msgs.len(),
);
for (peers, msg) in msgs {
match msg {
Versioned::V1(msg) => send_collation_message_v1(
&mut network_service,
peers,
WireMessage::ProtocolMessage(msg),
&metrics,
),
}
}
},
NetworkBridgeTxMessage::SendRequests(reqs, if_disconnected) => {
gum::trace!(
target: LOG_TARGET,
action = "SendRequests",
num_requests = %reqs.len(),
);
for req in reqs {
network_service
.start_request(&mut authority_discovery_service, req, if_disconnected)
.await;
}
},
NetworkBridgeTxMessage::ConnectToValidators { validator_ids, peer_set, failed } => {
gum::trace!(
target: LOG_TARGET,
action = "ConnectToValidators",
peer_set = ?peer_set,
ids = ?validator_ids,
"Received a validator connection request",
);
metrics.note_desired_peer_count(peer_set, validator_ids.len());
let (network_service, ads) = validator_discovery
.on_request(
validator_ids,
peer_set,
failed,
network_service,
authority_discovery_service,
)
.await;
return (network_service, ads)
},
NetworkBridgeTxMessage::ConnectToResolvedValidators { validator_addrs, peer_set } => {
gum::trace!(
target: LOG_TARGET,
action = "ConnectToPeers",
peer_set = ?peer_set,
?validator_addrs,
"Received a resolved validator connection request",
);
metrics.note_desired_peer_count(peer_set, validator_addrs.len());
let all_addrs = validator_addrs.into_iter().flatten().collect();
let network_service = validator_discovery
.on_resolved_request(all_addrs, peer_set, network_service)
.await;
return (network_service, authority_discovery_service)
},
}
(network_service, authority_discovery_service)
}
#[overseer::contextbounds(NetworkBridgeTx, prefix = self::overseer)]
async fn run_network_out<N, AD, Context>(
bridge: NetworkBridgeTx<N, AD>,
ctx: Context,
) -> Result<(), Error>
where
N: Network,
AD: validator_discovery::AuthorityDiscovery + Clone + Sync,
{
let NetworkBridgeTx { network_service, authority_discovery_service, metrics } = bridge;
handle_subsystem_messages(ctx, network_service, authority_discovery_service, metrics).await?;
Ok(())
}
fn send_validation_message_v1(
net: &mut impl Network,
peers: Vec<PeerId>,
message: WireMessage<protocol_v1::ValidationProtocol>,
metrics: &Metrics,
) {
send_message(net, peers, PeerSet::Validation, 1, message, metrics);
}
fn send_collation_message_v1(
net: &mut impl Network,
peers: Vec<PeerId>,
message: WireMessage<protocol_v1::CollationProtocol>,
metrics: &Metrics,
) {
send_message(net, peers, PeerSet::Collation, 1, message, metrics)
}
@@ -0,0 +1,298 @@
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use super::*;
use futures::{executor, stream::BoxStream};
use polkadot_node_subsystem_util::TimeoutExt;
use async_trait::async_trait;
use parking_lot::Mutex;
use std::{borrow::Cow, collections::HashSet};
use sc_network::{Event as NetworkEvent, IfDisconnected};
use polkadot_node_network_protocol::{
request_response::outgoing::Requests, ObservedRole, Versioned,
};
use polkadot_node_subsystem::{FromOrchestra, OverseerSignal};
use polkadot_node_subsystem_test_helpers::TestSubsystemContextHandle;
use polkadot_node_subsystem_util::metered;
use polkadot_primitives::v2::AuthorityDiscoveryId;
use polkadot_primitives_test_helpers::dummy_collator_signature;
use sc_network::Multiaddr;
use sp_keyring::Sr25519Keyring;
const TIMEOUT: std::time::Duration = polkadot_node_subsystem_test_helpers::TestSubsystemContextHandle::<NetworkBridgeTxMessage>::TIMEOUT;
use crate::{network::Network, validator_discovery::AuthorityDiscovery, Rep};
#[derive(Debug, PartialEq)]
pub enum NetworkAction {
/// Note a change in reputation for a peer.
ReputationChange(PeerId, Rep),
/// Disconnect a peer from the given peer-set.
DisconnectPeer(PeerId, PeerSet),
/// Write a notification to a given peer on the given peer-set.
WriteNotification(PeerId, PeerSet, Vec<u8>),
}
// The subsystem's view of the network - only supports a single call to `event_stream`.
#[derive(Clone)]
struct TestNetwork {
net_events: Arc<Mutex<Option<metered::MeteredReceiver<NetworkEvent>>>>,
action_tx: Arc<Mutex<metered::UnboundedMeteredSender<NetworkAction>>>,
}
#[derive(Clone, Debug)]
struct TestAuthorityDiscovery;
// The test's view of the network. This receives updates from the subsystem in the form
// of `NetworkAction`s.
struct TestNetworkHandle {
action_rx: metered::UnboundedMeteredReceiver<NetworkAction>,
net_tx: metered::MeteredSender<NetworkEvent>,
}
fn new_test_network() -> (TestNetwork, TestNetworkHandle, TestAuthorityDiscovery) {
let (net_tx, net_rx) = metered::channel(10);
let (action_tx, action_rx) = metered::unbounded();
(
TestNetwork {
net_events: Arc::new(Mutex::new(Some(net_rx))),
action_tx: Arc::new(Mutex::new(action_tx)),
},
TestNetworkHandle { action_rx, net_tx },
TestAuthorityDiscovery,
)
}
#[async_trait]
impl Network for TestNetwork {
fn event_stream(&mut self) -> BoxStream<'static, NetworkEvent> {
self.net_events
.lock()
.take()
.expect("Subsystem made more than one call to `event_stream`")
.boxed()
}
async fn set_reserved_peers(
&mut self,
_protocol: Cow<'static, str>,
_: HashSet<Multiaddr>,
) -> Result<(), String> {
Ok(())
}
async fn remove_from_peers_set(&mut self, _protocol: Cow<'static, str>, _: Vec<PeerId>) {}
async fn start_request<AD: AuthorityDiscovery>(
&self,
_: &mut AD,
_: Requests,
_: IfDisconnected,
) {
}
fn report_peer(&self, who: PeerId, cost_benefit: Rep) {
self.action_tx
.lock()
.unbounded_send(NetworkAction::ReputationChange(who, cost_benefit))
.unwrap();
}
fn disconnect_peer(&self, who: PeerId, peer_set: PeerSet) {
self.action_tx
.lock()
.unbounded_send(NetworkAction::DisconnectPeer(who, peer_set))
.unwrap();
}
fn write_notification(&self, who: PeerId, peer_set: PeerSet, message: Vec<u8>) {
self.action_tx
.lock()
.unbounded_send(NetworkAction::WriteNotification(who, peer_set, message))
.unwrap();
}
}
#[async_trait]
impl validator_discovery::AuthorityDiscovery for TestAuthorityDiscovery {
async fn get_addresses_by_authority_id(
&mut self,
_authority: AuthorityDiscoveryId,
) -> Option<HashSet<Multiaddr>> {
None
}
async fn get_authority_ids_by_peer_id(
&mut self,
_peer_id: PeerId,
) -> Option<HashSet<AuthorityDiscoveryId>> {
None
}
}
impl TestNetworkHandle {
// Get the next network action.
async fn next_network_action(&mut self) -> NetworkAction {
self.action_rx.next().await.expect("subsystem concluded early")
}
async fn connect_peer(&mut self, peer: PeerId, peer_set: PeerSet, role: ObservedRole) {
self.send_network_event(NetworkEvent::NotificationStreamOpened {
remote: peer,
protocol: peer_set.into_default_protocol_name(),
negotiated_fallback: None,
role: role.into(),
})
.await;
}
async fn send_network_event(&mut self, event: NetworkEvent) {
self.net_tx.send(event).await.expect("subsystem concluded early");
}
}
type VirtualOverseer = TestSubsystemContextHandle<NetworkBridgeTxMessage>;
struct TestHarness {
network_handle: TestNetworkHandle,
virtual_overseer: VirtualOverseer,
}
fn test_harness<T: Future<Output = VirtualOverseer>>(test: impl FnOnce(TestHarness) -> T) {
let pool = sp_core::testing::TaskExecutor::new();
let (network, network_handle, discovery) = new_test_network();
let (context, virtual_overseer) =
polkadot_node_subsystem_test_helpers::make_subsystem_context(pool);
let bridge_out = NetworkBridgeTx::new(network, discovery, Metrics(None));
let network_bridge_out_fut = run_network_out(bridge_out, context)
.map_err(|e| panic!("bridge-out subsystem execution failed {:?}", e))
.map(|_| ());
let test_fut = test(TestHarness { network_handle, virtual_overseer });
futures::pin_mut!(test_fut);
futures::pin_mut!(network_bridge_out_fut);
let _ = executor::block_on(future::join(
async move {
let mut virtual_overseer = test_fut.await;
virtual_overseer.send(FromOrchestra::Signal(OverseerSignal::Conclude)).await;
},
network_bridge_out_fut,
));
}
#[test]
fn send_messages_to_peers() {
test_harness(|test_harness| async move {
let TestHarness { mut network_handle, mut virtual_overseer } = test_harness;
let peer = PeerId::random();
network_handle
.connect_peer(peer.clone(), PeerSet::Validation, ObservedRole::Full)
.timeout(TIMEOUT)
.await
.expect("Timeout does not occur");
// the outgoing side does not consume network messages
// so the single item sink has to be free explicitly
network_handle
.connect_peer(peer.clone(), PeerSet::Collation, ObservedRole::Full)
.timeout(TIMEOUT)
.await
.expect("Timeout does not occur");
// send a validation protocol message.
{
let approval_distribution_message =
protocol_v1::ApprovalDistributionMessage::Approvals(Vec::new());
let message_v1 = protocol_v1::ValidationProtocol::ApprovalDistribution(
approval_distribution_message.clone(),
);
virtual_overseer
.send(FromOrchestra::Communication {
msg: NetworkBridgeTxMessage::SendValidationMessage(
vec![peer.clone()],
Versioned::V1(message_v1.clone()),
),
})
.timeout(TIMEOUT)
.await
.expect("Timeout does not occur");
assert_eq!(
network_handle
.next_network_action()
.timeout(TIMEOUT)
.await
.expect("Timeout does not occur"),
NetworkAction::WriteNotification(
peer.clone(),
PeerSet::Validation,
WireMessage::ProtocolMessage(message_v1).encode(),
)
);
}
// send a collation protocol message.
{
let collator_protocol_message = protocol_v1::CollatorProtocolMessage::Declare(
Sr25519Keyring::Alice.public().into(),
0_u32.into(),
dummy_collator_signature(),
);
let message_v1 =
protocol_v1::CollationProtocol::CollatorProtocol(collator_protocol_message.clone());
virtual_overseer
.send(FromOrchestra::Communication {
msg: NetworkBridgeTxMessage::SendCollationMessage(
vec![peer.clone()],
Versioned::V1(message_v1.clone()),
),
})
.await;
assert_eq!(
network_handle
.next_network_action()
.timeout(TIMEOUT)
.await
.expect("Timeout does not occur"),
NetworkAction::WriteNotification(
peer.clone(),
PeerSet::Collation,
WireMessage::ProtocolMessage(message_v1).encode(),
)
);
}
virtual_overseer
});
}