NetworkBridge: validator (authorities) discovery api (#1699)

* stupid, but it compiles

* redo

* cleanup

* add ValidatorDiscovery to msgs

* sketch network bridge code

* ConnectToAuthorities instead of validators

* more stuff

* cleanup

* more stuff

* complete ConnectToAuthoritiesState

* Update node/network/bridge/src/lib.rs

Co-authored-by: Peter Goodspeed-Niklaus <coriolinus@users.noreply.github.com>

* Collator protocol subsystem (#1659)

* WIP

* The initial implementation of the collator side.

* Improve comments

* Multiple collation requests

* Add more tests and comments to validator side

* Add comments, remove dead code

* Apply suggestions from code review

Co-authored-by: Peter Goodspeed-Niklaus <coriolinus@users.noreply.github.com>

* Fix build after suggested changes

* Also connect to the next validator group

* Remove a Future impl and move TimeoutExt to util

* Minor nits

* Fix build

* Change FetchCollations back to FetchCollation

* Try this

* Final fixes

* Fix build

Co-authored-by: Peter Goodspeed-Niklaus <coriolinus@users.noreply.github.com>

* handle multiple in-flight connection requests

* handle cancelled requests

* Update node/core/runtime-api/src/lib.rs

Co-authored-by: Bernhard Schuster <bernhard@ahoi.io>

* redo it again

* more stuff

* redo it again

* update comments

* workaround Future is not Send

* fix trailing spaces

* clarify comments

* bridge: fix compilation in tests

* update more comments

* small fixes

* port collator protocol to new validator discovery api

* collator tests compile

* collator tests pass

* do not revoke a request when the stream receiver is closed

* make revoking opt-in

* fix is_fulfilled

* handle request revokation in collator

* tests

* wait for validator connections asyncronously

* fix compilation

* relabel my todos

* apply Fedor's patch

* resolve reconnection TODO

* resolve revoking TODO

* resolve channel capacity TODO

* resolve peer cloning TODO

* resolve peer disconnected TODO

* resolve PeerSet TODO

* wip tests

* more tests

* resolve Arc TODO

* rename pending to non_revoked

* one more test

* extract utility function into util crate

* fix compilation in tests

* Apply suggestions from code review

Co-authored-by: Fedor Sakharov <fedor.sakharov@gmail.com>

* revert pin_project removal

* fix while let loop

* Revert "revert pin_project removal"

This reverts commit ae7f529d8de982ef66c3007dd1ff74c6ddce80d2.

* fix compilation

* Update node/subsystem/src/messages.rs

* docs on pub items

* guide updates

* remove a TODO

* small guide update

* fix a typo

* link to the issue

* validator discovery: on_request docs

Co-authored-by: Peter Goodspeed-Niklaus <coriolinus@users.noreply.github.com>
Co-authored-by: Fedor Sakharov <fedor.sakharov@gmail.com>
Co-authored-by: Bernhard Schuster <bernhard@ahoi.io>
This commit is contained in:
Andronik Ordian
2020-10-06 13:34:57 +02:00
committed by GitHub
parent 96f6b5ae2d
commit ca89e3edbe
20 changed files with 1102 additions and 124 deletions
+2
View File
@@ -57,6 +57,8 @@ use std::{
};
use streamunordered::{StreamUnordered, StreamYield};
pub mod validator_discovery;
/// These reexports are required so that external crates can use the `delegated_subsystem` macro properly.
pub mod reexports {
pub use sp_core::traits::SpawnNamed;
@@ -0,0 +1,157 @@
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Utility function to make it easier to connect to validators.
use std::collections::HashMap;
use std::pin::Pin;
use futures::{
channel::{mpsc, oneshot},
task::{Poll, self},
stream,
};
use polkadot_node_subsystem::{
errors::RuntimeApiError, SubsystemError,
messages::{AllMessages, RuntimeApiMessage, RuntimeApiRequest, NetworkBridgeMessage},
SubsystemContext,
};
use polkadot_primitives::v1::{Hash, ValidatorId, AuthorityDiscoveryId};
use sc_network::PeerId;
/// Error when making a request to connect to validators.
#[derive(Debug, derive_more::From)]
pub enum Error {
/// Attempted to send or receive on a oneshot channel which had been canceled
#[from]
Oneshot(oneshot::Canceled),
/// A subsystem error.
#[from]
Subsystem(SubsystemError),
/// An error in the Runtime API.
#[from]
RuntimeApi(RuntimeApiError),
}
/// Utility function to make it easier to connect to validators.
pub async fn connect_to_validators<Context: SubsystemContext>(
ctx: &mut Context,
relay_parent: Hash,
validators: Vec<ValidatorId>,
) -> Result<ConnectionRequest, Error> {
// ValidatorId -> AuthorityDiscoveryId
let (tx, rx) = oneshot::channel();
ctx.send_message(AllMessages::RuntimeApi(
RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::ValidatorDiscovery(validators.clone(), tx),
)
)).await?;
let maybe_authorities = rx.await??;
let authorities: Vec<_> = maybe_authorities.iter()
.cloned()
.filter_map(|id| id)
.collect();
let validator_map = validators.into_iter()
.zip(maybe_authorities.into_iter())
.filter_map(|(k, v)| v.map(|v| (v, k)))
.collect::<HashMap<AuthorityDiscoveryId, ValidatorId>>();
let (connections, revoke) = connect_to_authorities(ctx, authorities).await?;
Ok(ConnectionRequest {
validator_map,
connections,
revoke,
})
}
async fn connect_to_authorities<Context: SubsystemContext>(
ctx: &mut Context,
validator_ids: Vec<AuthorityDiscoveryId>,
) -> Result<(mpsc::Receiver<(AuthorityDiscoveryId, PeerId)>, oneshot::Sender<()>), Error> {
const PEERS_CAPACITY: usize = 8;
let (revoke_tx, revoke) = oneshot::channel();
let (connected, connected_rx) = mpsc::channel(PEERS_CAPACITY);
ctx.send_message(AllMessages::NetworkBridge(
NetworkBridgeMessage::ConnectToValidators {
validator_ids,
connected,
revoke,
}
)).await?;
Ok((connected_rx, revoke_tx))
}
/// A pending connection request to validators.
/// This struct implements `Stream` to allow for asynchronous
/// discovery of validator addresses.
///
/// NOTE: you should call `revoke` on this struct
/// when you're no longer interested in the requested validators.
#[must_use = "dropping a request will result in its immediate revokation"]
pub struct ConnectionRequest {
validator_map: HashMap<AuthorityDiscoveryId, ValidatorId>,
#[must_use = "streams do nothing unless polled"]
connections: mpsc::Receiver<(AuthorityDiscoveryId, PeerId)>,
#[must_use = "a request should be revoked at some point"]
revoke: oneshot::Sender<()>,
}
impl stream::Stream for ConnectionRequest {
type Item = (ValidatorId, PeerId);
fn poll_next(mut self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<Option<Self::Item>> {
if self.validator_map.is_empty() {
return Poll::Ready(None);
}
match Pin::new(&mut self.connections).poll_next(cx) {
Poll::Ready(Some((id, peer_id))) => {
if let Some(validator_id) = self.validator_map.remove(&id) {
return Poll::Ready(Some((validator_id, peer_id)));
} else {
// unknown authority_id
// should be unreachable
}
}
_ => {},
}
Poll::Pending
}
}
impl ConnectionRequest {
/// By revoking the request the caller allows the network to
/// free some peer slots thus freeing the resources.
/// It doesn't necessarily lead to peers disconnection though.
/// The revokation is enacted on in the next connection request.
///
/// This can be done either by calling this function or dropping the request.
pub fn revoke(self) {
if let Err(_) = self.revoke.send(()) {
log::warn!(
"Failed to revoke a validator connection request",
);
}
}
}