mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-04-26 07:37:57 +00:00
Reduce network bandwidth, improve parablock times: optimize approval-distribution (#5164)
* gossip-support: be explicit about dimensions * some guide updates * update network-bridge to distinguish x and y dimensions * get everything to compile * beginnings * some TODOs * polkadot runtime: use relevant_authorities * make gossip topologies per-session * better formatting * gossip support: use current session validators * expand in comment * adjust tests and fix index bug * add past/present/future connection test and clean up code * fmt * network bridge: updated types * update protocols to new gossip topology message * guide updates * add session to BlockApprovalMeta * add session to block info * refactor knowledge and remove most unify logic * start replacing gossip_peers with new SessionTopologies * add routing information to message state * add some utilities to SessionTopology * implement new gossip topology logic * re-implement unify_with_peer * distribute assignments according to topology * finish grid topology implementation * refactor network bridge slightly * issue connection requests on all past/present/future * fmt * address grumbles * tighten invariants in unify_with_peer * implement random propagation * refactor: extract required routing adjustment logic * some block-age logic * aggressively propagate messages when finality is slow * overhaul aggression system to have 3 levels * add aggression metrics * remove aggression L3 * reduce random circulation * remove PeerData * get approval tests compiling * use btree_map in known_by to make deterministic * Revert "use btree_map in known_by to make deterministic" This reverts commit 330d65343a7bb6fe4dd0f24bd8dbc15c0cbdbd9d. * test XY grid propagation * remove stray println * test unshared dimension propagation * add random gossip check * test unify_with_peer better * test sending after getting gossip topology * test L1 aggression on originator * test L1 aggression for non-originators * test non-originator aggression L2 * fnt * ~spellcheck * fix statement-distribution tests * fix flaky test * fix metrics typo * re-send periodically * test resending * typo Co-authored-by: Bernhard Schuster <bernhard@ahoi.io> * add more metrics about apd messages * add back unify_with_peer logs * make Resend an enum * be more explicit when resending * fmt * fix error * add a TODO for refactoring * remove debug metrics * add some guide stuff * fmt * update runtime API in test-runtim Co-authored-by: Bernhard Schuster <bernhard@ahoi.io>
This commit is contained in:
@@ -49,10 +49,12 @@ use polkadot_node_subsystem::{
|
||||
RuntimeApiRequest,
|
||||
},
|
||||
overseer, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext,
|
||||
SubsystemError, SubsystemSender,
|
||||
SubsystemError,
|
||||
};
|
||||
use polkadot_node_subsystem_util as util;
|
||||
use polkadot_primitives::v2::{AuthorityDiscoveryId, Hash, SessionIndex};
|
||||
use polkadot_primitives::v2::{
|
||||
AuthorityDiscoveryId, Hash, SessionIndex, SessionInfo, ValidatorIndex,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
@@ -213,6 +215,24 @@ where
|
||||
if force_request { leaf_session } else { maybe_new_session };
|
||||
|
||||
if let Some((session_index, relay_parent)) = maybe_issue_connection {
|
||||
let session_info =
|
||||
util::request_session_info(leaf, session_index, ctx.sender()).await.await??;
|
||||
|
||||
let session_info = match session_info {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
gum::warn!(
|
||||
relay_parent = ?leaf,
|
||||
session_index = self.last_session_index,
|
||||
"Failed to get session info.",
|
||||
);
|
||||
|
||||
continue
|
||||
},
|
||||
};
|
||||
|
||||
// Note: we only update `last_session_index` once we've
|
||||
// successfully gotten the `SessionInfo`.
|
||||
let is_new_session = maybe_new_session.is_some();
|
||||
if is_new_session {
|
||||
gum::debug!(
|
||||
@@ -223,45 +243,52 @@ where
|
||||
self.last_session_index = Some(session_index);
|
||||
}
|
||||
|
||||
let all_authorities = determine_relevant_authorities(ctx, relay_parent).await?;
|
||||
let our_index = ensure_i_am_an_authority(&self.keystore, &all_authorities).await?;
|
||||
let other_authorities = {
|
||||
let mut authorities = all_authorities.clone();
|
||||
authorities.swap_remove(our_index);
|
||||
authorities
|
||||
};
|
||||
// Connect to authorities from the past/present/future.
|
||||
//
|
||||
// This is maybe not the right place for this logic to live,
|
||||
// but at the moment we're limited by the network bridge's ability
|
||||
// to handle connection requests (it only allows one, globally).
|
||||
//
|
||||
// Certain network protocols - mostly req/res, but some gossip,
|
||||
// will require being connected to past/future validators as well
|
||||
// as current. That is, the old authority sets are not made obsolete
|
||||
// by virtue of a new session being entered. Therefore we maintain
|
||||
// connections to a much broader set of validators.
|
||||
{
|
||||
let mut connections = authorities_past_present_future(ctx, leaf).await?;
|
||||
|
||||
self.issue_connection_request(ctx, other_authorities).await;
|
||||
// Remove all of our locally controlled validator indices so we don't connect to ourself.
|
||||
// If we control none of them, don't issue connection requests - we're outside
|
||||
// of the 'clique' of recent validators.
|
||||
if remove_all_controlled(&self.keystore, &mut connections).await != 0 {
|
||||
self.issue_connection_request(ctx, connections).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Gossip topology is only relevant for authorities in the current session.
|
||||
let our_index =
|
||||
ensure_i_am_an_authority(&self.keystore, &session_info.discovery_keys).await?;
|
||||
|
||||
if is_new_session {
|
||||
update_gossip_topology(ctx, our_index, all_authorities, relay_parent).await?;
|
||||
self.update_authority_status_metrics(leaf, ctx.sender()).await?;
|
||||
self.update_authority_status_metrics(&session_info).await;
|
||||
|
||||
update_gossip_topology(
|
||||
ctx,
|
||||
our_index,
|
||||
session_info.discovery_keys,
|
||||
relay_parent,
|
||||
session_index,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn update_authority_status_metrics(
|
||||
&mut self,
|
||||
leaf: Hash,
|
||||
sender: &mut impl SubsystemSender,
|
||||
) -> Result<(), util::Error> {
|
||||
if let Some(session_info) = util::request_session_info(
|
||||
leaf,
|
||||
self.last_session_index
|
||||
.expect("Last session index is always set on every session index change"),
|
||||
sender,
|
||||
)
|
||||
.await
|
||||
.await??
|
||||
{
|
||||
let maybe_index = match ensure_i_am_an_authority(
|
||||
&self.keystore,
|
||||
&session_info.discovery_keys,
|
||||
)
|
||||
.await
|
||||
{
|
||||
async fn update_authority_status_metrics(&mut self, session_info: &SessionInfo) {
|
||||
let maybe_index =
|
||||
match ensure_i_am_an_authority(&self.keystore, &session_info.discovery_keys).await {
|
||||
Ok(index) => {
|
||||
self.metrics.on_is_authority();
|
||||
Some(index)
|
||||
@@ -275,21 +302,19 @@ where
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
if let Some(validator_index) = maybe_index {
|
||||
// The subset of authorities participating in parachain consensus.
|
||||
let parachain_validators_this_session = session_info.validators;
|
||||
if let Some(validator_index) = maybe_index {
|
||||
// The subset of authorities participating in parachain consensus.
|
||||
let parachain_validators_this_session = session_info.validators.len();
|
||||
|
||||
// First `maxValidators` entries are the parachain validators. We'll check
|
||||
// if our index is in this set to avoid searching for the keys.
|
||||
// https://github.com/paritytech/polkadot/blob/a52dca2be7840b23c19c153cf7e110b1e3e475f8/runtime/parachains/src/configuration.rs#L148
|
||||
if validator_index < parachain_validators_this_session.len() {
|
||||
self.metrics.on_is_parachain_validator();
|
||||
} else {
|
||||
self.metrics.on_is_not_parachain_validator();
|
||||
}
|
||||
// First `maxValidators` entries are the parachain validators. We'll check
|
||||
// if our index is in this set to avoid searching for the keys.
|
||||
// https://github.com/paritytech/polkadot/blob/a52dca2be7840b23c19c153cf7e110b1e3e475f8/runtime/parachains/src/configuration.rs#L148
|
||||
if validator_index < parachain_validators_this_session {
|
||||
self.metrics.on_is_parachain_validator();
|
||||
} else {
|
||||
self.metrics.on_is_not_parachain_validator();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn issue_connection_request<Context>(
|
||||
@@ -378,7 +403,7 @@ where
|
||||
},
|
||||
NetworkBridgeEvent::OurViewChange(_) => {},
|
||||
NetworkBridgeEvent::PeerViewChange(_, _) => {},
|
||||
NetworkBridgeEvent::NewGossipTopology(_) => {},
|
||||
NetworkBridgeEvent::NewGossipTopology { .. } => {},
|
||||
NetworkBridgeEvent::PeerMessage(_, v) => {
|
||||
match v {};
|
||||
},
|
||||
@@ -416,7 +441,8 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
async fn determine_relevant_authorities<Context>(
|
||||
// Get the authorities of the past, present, and future.
|
||||
async fn authorities_past_present_future<Context>(
|
||||
ctx: &mut Context,
|
||||
relay_parent: Hash,
|
||||
) -> Result<Vec<AuthorityDiscoveryId>, util::Error>
|
||||
@@ -428,7 +454,7 @@ where
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
authority_count = ?authorities.len(),
|
||||
"Determined relevant authorities",
|
||||
"Determined past/present/future authorities",
|
||||
);
|
||||
Ok(authorities)
|
||||
}
|
||||
@@ -447,6 +473,25 @@ async fn ensure_i_am_an_authority(
|
||||
Err(util::Error::NotAValidator)
|
||||
}
|
||||
|
||||
/// Filter out all controlled keys in the given set. Returns the number of keys removed.
|
||||
async fn remove_all_controlled(
|
||||
keystore: &SyncCryptoStorePtr,
|
||||
authorities: &mut Vec<AuthorityDiscoveryId>,
|
||||
) -> usize {
|
||||
let mut to_remove = Vec::new();
|
||||
for (i, v) in authorities.iter().enumerate() {
|
||||
if CryptoStore::has_keys(&**keystore, &[(v.to_raw_vec(), AuthorityDiscoveryId::ID)]).await {
|
||||
to_remove.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
for i in to_remove.iter().rev().copied() {
|
||||
authorities.remove(i);
|
||||
}
|
||||
|
||||
to_remove.len()
|
||||
}
|
||||
|
||||
/// We partition the list of all sorted `authorities` into `sqrt(len)` groups of `sqrt(len)` size
|
||||
/// and form a matrix where each validator is connected to all validators in its row and column.
|
||||
/// This is similar to `[web3]` research proposed topology, except for the groups are not parachain
|
||||
@@ -460,6 +505,7 @@ async fn update_gossip_topology<Context>(
|
||||
our_index: usize,
|
||||
authorities: Vec<AuthorityDiscoveryId>,
|
||||
relay_parent: Hash,
|
||||
session_index: SessionIndex,
|
||||
) -> Result<(), util::Error>
|
||||
where
|
||||
Context: SubsystemContext<Message = GossipSupportMessage>,
|
||||
@@ -469,6 +515,8 @@ where
|
||||
let random_seed = {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
// TODO https://github.com/paritytech/polkadot/issues/5316:
|
||||
// get the random seed from the `SessionInfo` instead.
|
||||
ctx.send_message(RuntimeApiMessage::Request(
|
||||
relay_parent,
|
||||
RuntimeApiRequest::CurrentBabeEpoch(tx),
|
||||
@@ -493,16 +541,38 @@ where
|
||||
.expect("our_index < len; indices contains it; qed");
|
||||
|
||||
let neighbors = matrix_neighbors(our_shuffled_position, len);
|
||||
let our_neighbors = neighbors.map(|i| authorities[indices[i]].clone()).collect();
|
||||
let row_neighbors = neighbors
|
||||
.row_neighbors
|
||||
.map(|i| indices[i])
|
||||
.map(|i| (authorities[i].clone(), ValidatorIndex::from(i as u32)))
|
||||
.collect();
|
||||
|
||||
ctx.send_message(NetworkBridgeMessage::NewGossipTopology { our_neighbors })
|
||||
.await;
|
||||
let column_neighbors = neighbors
|
||||
.column_neighbors
|
||||
.map(|i| indices[i])
|
||||
.map(|i| (authorities[i].clone(), ValidatorIndex::from(i as u32)))
|
||||
.collect();
|
||||
|
||||
ctx.send_message(NetworkBridgeMessage::NewGossipTopology {
|
||||
session: session_index,
|
||||
our_neighbors_x: row_neighbors,
|
||||
our_neighbors_y: column_neighbors,
|
||||
})
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct MatrixNeighbors<R, C> {
|
||||
row_neighbors: R,
|
||||
column_neighbors: C,
|
||||
}
|
||||
|
||||
/// Compute our row and column neighbors in a matrix
|
||||
fn matrix_neighbors(our_index: usize, len: usize) -> impl Iterator<Item = usize> {
|
||||
fn matrix_neighbors(
|
||||
our_index: usize,
|
||||
len: usize,
|
||||
) -> MatrixNeighbors<impl Iterator<Item = usize>, impl Iterator<Item = usize>> {
|
||||
assert!(our_index < len, "our_index is computed using `enumerate`; qed");
|
||||
|
||||
// e.g. for size 11 the matrix would be
|
||||
@@ -520,7 +590,10 @@ fn matrix_neighbors(our_index: usize, len: usize) -> impl Iterator<Item = usize>
|
||||
let row_neighbors = our_row * sqrt..std::cmp::min(our_row * sqrt + sqrt, len);
|
||||
let column_neighbors = (our_column..len).step_by(sqrt);
|
||||
|
||||
row_neighbors.chain(column_neighbors).filter(move |i| *i != our_index)
|
||||
MatrixNeighbors {
|
||||
row_neighbors: row_neighbors.filter(move |i| *i != our_index),
|
||||
column_neighbors: column_neighbors.filter(move |i| *i != our_index),
|
||||
}
|
||||
}
|
||||
|
||||
impl<Context, AD> overseer::Subsystem<Context, SubsystemError> for GossipSupport<AD>
|
||||
|
||||
Reference in New Issue
Block a user