validator-discovery: basic retrying logic (#3059)

* validator_discovery: less flexible, but simpler design

* fix test

* remove unused struct

* smol optimization

* validator_discovery: basic retrying logic

* add a test

* add more tests

* update the guide

* more test logic

* Require at least 2/3 connectivity.

* Fix test.

* Update node/network/gossip-support/src/lib.rs

Co-authored-by: André Silva <123550+andresilva@users.noreply.github.com>

* Update node/network/gossip-support/src/lib.rs

Co-authored-by: André Silva <123550+andresilva@users.noreply.github.com>

Co-authored-by: Robert Klotzner <robert.klotzner@gmx.at>
Co-authored-by: Robert Klotzner <eskimor@users.noreply.github.com>
Co-authored-by: André Silva <123550+andresilva@users.noreply.github.com>
This commit is contained in:
Andronik Ordian
2021-05-20 12:05:44 +02:00
committed by GitHub
parent 933c9ac2bf
commit 2e70f4ea08
9 changed files with 432 additions and 27 deletions
@@ -18,7 +18,10 @@
//! and issuing a connection request to the validators relevant to
//! the gossiping subsystems on every new session.
use futures::FutureExt as _;
#[cfg(test)]
mod tests;
use futures::{channel::oneshot, FutureExt as _};
use polkadot_node_subsystem::{
messages::{
AllMessages, GossipSupportMessage, NetworkBridgeMessage,
@@ -44,6 +47,7 @@ pub struct GossipSupport {
#[derive(Default)]
struct State {
last_session_index: Option<SessionIndex>,
force_request: bool,
}
impl GossipSupport {
@@ -54,12 +58,18 @@ impl GossipSupport {
}
}
#[tracing::instrument(skip(self, ctx), fields(subsystem = LOG_TARGET))]
async fn run<Context>(self, mut ctx: Context)
async fn run<Context>(self, ctx: Context)
where
Context: SubsystemContext<Message = GossipSupportMessage>,
{
let mut state = State::default();
self.run_inner(ctx, &mut state).await;
}
async fn run_inner<Context>(self, mut ctx: Context, state: &mut State)
where
Context: SubsystemContext<Message = GossipSupportMessage>,
{
let Self { keystore } = self;
loop {
let message = match ctx.recv().await {
@@ -128,13 +138,16 @@ pub async fn connect_to_authorities(
ctx: &mut impl SubsystemContext,
validator_ids: Vec<AuthorityDiscoveryId>,
peer_set: PeerSet,
) {
) -> oneshot::Receiver<usize> {
let (failed, failed_rx) = oneshot::channel();
ctx.send_message(AllMessages::NetworkBridge(
NetworkBridgeMessage::ConnectToValidators {
validator_ids,
peer_set,
failed,
}
)).await;
failed_rx
}
impl State {
@@ -150,7 +163,7 @@ impl State {
for leaf in leaves {
let current_index = util::request_session_index_for_child(leaf, ctx.sender()).await.await??;
let maybe_new_session = match self.last_session_index {
Some(i) if i >= current_index => None,
Some(i) if current_index <= i && !self.force_request => None,
_ => Some((current_index, leaf)),
};
@@ -158,15 +171,22 @@ impl State {
tracing::debug!(target: LOG_TARGET, %new_session, "New session detected");
let authorities = determine_relevant_authorities(ctx, relay_parent).await?;
ensure_i_am_an_authority(keystore, &authorities).await?;
tracing::debug!(target: LOG_TARGET, num = ?authorities.len(), "Issuing a connection request");
let num = authorities.len();
tracing::debug!(target: LOG_TARGET, %num, "Issuing a connection request");
connect_to_authorities(
let failures = connect_to_authorities(
ctx,
authorities,
PeerSet::Validation,
).await;
// we await for the request to be processed
// this is fine, it should take much less time than one session
let failures = failures.await.unwrap_or(num);
self.last_session_index = Some(new_session);
// issue another request if at least a third of the authorities were not resolved
self.force_request = failures >= num / 3;
}
}