improved gossip topology (#3270)

* gossip-support: gossip topology

* some fixes

* handle view update for newly added gossip peers

* fix neighbors calculation

* fix test

* resolve TODOs

* typo

* guide updates

* spaces in the guide

* sneaky spaces

* hash randomness

* address some review nits

* use unbounded in bridge for subsystem msg
This commit is contained in:
Andronik Ordian
2021-06-18 21:30:35 +02:00
committed by GitHub
parent ae5b355754
commit ad9c02886d
21 changed files with 720 additions and 287 deletions
+151 -41
View File
@@ -15,14 +15,23 @@
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! This subsystem is responsible for keeping track of session changes
//! and issuing a connection request to the validators relevant to
//! the gossiping subsystems on every new session.
//! and issuing a connection request to the relevant validators
//! on every new session.
//!
//! In addition to that, it creates a gossip overlay topology
//! which limits the amount of messages sent and received
//! to be an order of sqrt of the validators. Our neighbors
//! in this graph will be forwarded to the network bridge with
//! the `NetworkBridgeMessage::NewGossipTopology` message.
use std::time::{Duration, Instant};
use futures::{channel::oneshot, FutureExt as _};
use rand::{SeedableRng, seq::SliceRandom as _};
use rand_chacha::ChaCha20Rng;
use polkadot_node_subsystem::{
messages::{
AllMessages, GossipSupportMessage, NetworkBridgeMessage,
RuntimeApiMessage, RuntimeApiRequest,
},
ActiveLeavesUpdate, FromOverseer, OverseerSignal,
Subsystem, SpawnedSubsystem, SubsystemContext,
@@ -39,8 +48,8 @@ use sp_application_crypto::{Public, AppKey};
mod tests;
const LOG_TARGET: &str = "parachain::gossip-support";
// How much time should we wait since the last
// authority discovery resolution failure.
// How much time should we wait to reissue a connection request
// since the last authority discovery resolution failure.
const BACKOFF_DURATION: Duration = Duration::from_secs(5);
/// The Gossip Support subsystem.
@@ -85,7 +94,7 @@ impl GossipSupport {
tracing::debug!(
target: LOG_TARGET,
err = ?e,
"Failed to receive a message from Overseer, exiting"
"Failed to receive a message from Overseer, exiting",
);
return;
},
@@ -120,28 +129,30 @@ async fn determine_relevant_authorities(
tracing::debug!(
target: LOG_TARGET,
authority_count = ?authorities.len(),
"Determined relevant authorities"
"Determined relevant authorities",
);
Ok(authorities)
}
/// Return an error if we're not a validator in the given set (do not have keys).
/// Otherwise, returns the index of our keys in `authorities`.
async fn ensure_i_am_an_authority(
keystore: &SyncCryptoStorePtr,
authorities: &[AuthorityDiscoveryId],
) -> Result<(), util::Error> {
for v in authorities {
if CryptoStore::has_keys(&**keystore, &[(v.to_raw_vec(), AuthorityDiscoveryId::ID)])
.await
{
return Ok(());
) -> Result<usize, util::Error> {
for (i, v) in authorities.iter().enumerate() {
if CryptoStore::has_keys(
&**keystore,
&[(v.to_raw_vec(), AuthorityDiscoveryId::ID)]
).await {
return Ok(i);
}
}
Err(util::Error::NotAValidator)
}
/// A helper function for making a `ConnectToValidators` request.
pub async fn connect_to_authorities(
async fn connect_to_authorities(
ctx: &mut impl SubsystemContext,
validator_ids: Vec<AuthorityDiscoveryId>,
peer_set: PeerSet,
@@ -157,6 +168,79 @@ pub async fn connect_to_authorities(
failed_rx
}
/// We partition the list of all sorted `authorities` into sqrt(len) groups of sqrt(len) size
/// and form a matrix where each validator is connected to all validators in its row and column.
/// This is similar to [web3] research proposed topology, except for the groups are not parachain
/// groups (because not all validators are parachain validators and the group size is small),
/// but formed randomly via BABE randomness from two epochs ago.
/// This limits the amount of gossip peers to 2 * sqrt(len) and ensures the diameter of 2.
///
/// [web3]: https://research.web3.foundation/en/latest/polkadot/networking/3-avail-valid.html#topology
async fn update_gossip_topology(
ctx: &mut impl SubsystemContext,
our_index: usize,
authorities: Vec<AuthorityDiscoveryId>,
relay_parent: Hash,
) -> Result<(), util::Error> {
// retrieve BABE randomness
let random_seed = {
let (tx, rx) = oneshot::channel();
ctx.send_message(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::CurrentBabeEpoch(tx),
).into()).await;
let randomness = rx.await??.randomness;
let mut subject = [0u8; 40];
subject[..8].copy_from_slice(b"gossipsu");
subject[8..].copy_from_slice(&randomness);
sp_core::blake2_256(&subject)
};
// shuffle the indices
let mut rng: ChaCha20Rng = SeedableRng::from_seed(random_seed);
let len = authorities.len();
let mut indices: Vec<usize> = (0..len).collect();
indices.shuffle(&mut rng);
let our_shuffled_position = indices.iter()
.position(|i| *i == our_index)
.expect("our_index < len; indices contains it; qed");
let neighbors = matrix_neighbors(our_shuffled_position, len);
let our_neighbors = neighbors.map(|i| authorities[indices[i]].clone()).collect();
ctx.send_message(AllMessages::NetworkBridge(
NetworkBridgeMessage::NewGossipTopology {
our_neighbors,
}
)).await;
Ok(())
}
/// Compute our row and column neighbors in a matrix
fn matrix_neighbors(our_index: usize, len: usize) -> impl Iterator<Item=usize> {
assert!(our_index < len, "our_index is computed using `enumerate`; qed");
// e.g. for size 11 the matrix would be
//
// 0 1 2
// 3 4 5
// 6 7 8
// 9 10
//
// and for index 10, the neighbors would be 1, 4, 7, 9
let sqrt = (len as f64).sqrt() as usize;
let our_row = our_index / sqrt;
let our_column = our_index % sqrt;
let row_neighbors = our_row * sqrt..std::cmp::min(our_row * sqrt + sqrt, len);
let column_neighbors = (our_column..len).step_by(sqrt);
row_neighbors.chain(column_neighbors).filter(move |i| *i != our_index)
}
impl State {
/// 1. Determine if the current session index has changed.
/// 2. If it has, determine relevant validators
@@ -171,46 +255,72 @@ impl State {
let current_index = util::request_session_index_for_child(leaf, ctx.sender()).await.await??;
let since_failure = self.last_failure.map(|i| i.elapsed()).unwrap_or_default();
let force_request = since_failure >= BACKOFF_DURATION;
let leaf_session = Some((current_index, leaf));
let maybe_new_session = match self.last_session_index {
Some(i) if current_index <= i && !force_request => None,
_ => Some((current_index, leaf)),
Some(i) if current_index <= i => None,
_ => leaf_session,
};
if let Some((new_session, relay_parent)) = maybe_new_session {
tracing::debug!(
target: LOG_TARGET,
%new_session,
%force_request,
"New session detected",
);
let maybe_issue_connection = if force_request {
leaf_session
} else {
maybe_new_session
};
if let Some((session_index, relay_parent)) = maybe_issue_connection {
let is_new_session = maybe_new_session.is_some();
if is_new_session {
tracing::debug!(
target: LOG_TARGET,
%session_index,
"New session detected",
);
}
let authorities = determine_relevant_authorities(ctx, relay_parent).await?;
ensure_i_am_an_authority(keystore, &authorities).await?;
let num = authorities.len();
tracing::debug!(target: LOG_TARGET, %num, "Issuing a connection request");
let our_index = ensure_i_am_an_authority(keystore, &authorities).await?;
let failures = connect_to_authorities(
ctx,
authorities,
PeerSet::Validation,
).await;
self.issue_connection_request(ctx, authorities.clone()).await?;
// we await for the request to be processed
// this is fine, it should take much less time than one session
let failures = failures.await.unwrap_or(num);
self.last_session_index = Some(new_session);
// issue another request for the same session
// if at least a third of the authorities were not resolved
self.last_failure = if failures >= num / 3 {
Some(Instant::now())
} else {
None
if is_new_session {
self.last_session_index = Some(session_index);
update_gossip_topology(ctx, our_index, authorities, relay_parent).await?;
}
}
}
Ok(())
}
async fn issue_connection_request(
&mut self,
ctx: &mut impl SubsystemContext,
authorities: Vec<AuthorityDiscoveryId>,
) -> Result<(), util::Error> {
let num = authorities.len();
tracing::debug!(target: LOG_TARGET, %num, "Issuing a connection request");
let failures = connect_to_authorities(
ctx,
authorities,
PeerSet::Validation,
).await;
// we await for the request to be processed
// this is fine, it should take much less time than one session
let failures = failures.await.unwrap_or(num);
// issue another request for the same session
// if at least a third of the authorities were not resolved
self.last_failure = if failures >= num / 3 {
Some(Instant::now())
} else {
None
};
Ok(())
}
}
impl<Context> Subsystem<Context> for GossipSupport
@@ -26,6 +26,9 @@ use polkadot_node_subsystem_util::TimeoutExt as _;
use sc_keystore::LocalKeystore;
use sp_keyring::Sr25519Keyring;
use sp_keystore::SyncCryptoStore;
use sp_consensus_babe::{
Epoch as BabeEpoch, BabeEpochConfiguration, AllowedSlots,
};
use std::sync::Arc;
use std::time::Duration;
@@ -117,6 +120,47 @@ fn authorities() -> Vec<AuthorityDiscoveryId> {
]
}
fn neighbors() -> Vec<AuthorityDiscoveryId> {
vec![
Sr25519Keyring::One.public().into(),
Sr25519Keyring::Alice.public().into(),
Sr25519Keyring::Eve.public().into(),
]
}
async fn test_neighbors(overseer: &mut VirtualOverseer) {
assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
_,
RuntimeApiRequest::CurrentBabeEpoch(tx),
)) => {
let _ = tx.send(Ok(BabeEpoch {
epoch_index: 2 as _,
start_slot: 0.into(),
duration: 200,
authorities: vec![(Sr25519Keyring::Alice.public().into(), 1)],
randomness: [0u8; 32],
config: BabeEpochConfiguration {
c: (1, 4),
allowed_slots: AllowedSlots::PrimarySlots,
},
})).unwrap();
}
);
assert_matches!(
overseer_recv(overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::NewGossipTopology {
our_neighbors,
}) => {
let mut got: Vec<_> = our_neighbors.into_iter().collect();
got.sort();
assert_eq!(got, neighbors());
}
);
}
#[test]
fn issues_a_connection_request_on_new_session() {
let hash = Hash::repeat_byte(0xAA);
@@ -157,6 +201,8 @@ fn issues_a_connection_request_on_new_session() {
}
);
test_neighbors(overseer).await;
virtual_overseer
});
@@ -223,6 +269,8 @@ fn issues_a_connection_request_on_new_session() {
}
);
test_neighbors(overseer).await;
virtual_overseer
});
assert_eq!(state.last_session_index, Some(2));
@@ -268,6 +316,9 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() {
failed.send(2).unwrap();
}
);
test_neighbors(overseer).await;
virtual_overseer
});
@@ -312,6 +363,7 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() {
failed.send(1).unwrap();
}
);
virtual_overseer
});
@@ -319,3 +371,18 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() {
assert!(state.last_failure.is_none());
}
#[test]
fn test_matrix_neighbors() {
for (our_index, len, expected) in vec![
(0usize, 1usize, vec![]),
(1, 2, vec![0usize]),
(0, 9, vec![1, 2, 3, 6]),
(9, 10, vec![0, 3, 6]),
(10, 11, vec![1, 4, 7, 9]),
(7, 11, vec![1, 4, 6, 8, 10]),
].into_iter() {
let mut result: Vec<_> = matrix_neighbors(our_index, len).collect();
result.sort();
assert_eq!(result, expected);
}
}