Reduce network bandwidth, improve parablock times: optimize approval-distribution (#5164)

* gossip-support: be explicit about dimensions

* some guide updates

* update network-bridge to distinguish x and y dimensions

* get everything to compile

* beginnings

* some TODOs

* polkadot runtime: use relevant_authorities

* make gossip topologies per-session

* better formatting

* gossip support: use current session validators

* expand in comment

* adjust tests and fix index bug

* add past/present/future connection test and clean up code

* fmt

* network bridge: updated types

* update protocols to new gossip topology message

* guide updates

* add session to BlockApprovalMeta

* add session to block info

* refactor knowledge and remove most unify logic

* start replacing gossip_peers with new SessionTopologies

* add routing information to message state

* add some utilities to SessionTopology

* implement new gossip topology logic

* re-implement unify_with_peer

* distribute assignments according to topology

* finish grid topology implementation

* refactor network bridge slightly

* issue connection requests on all past/present/future

* fmt

* address grumbles

* tighten invariants in unify_with_peer

* implement random propagation

* refactor: extract required routing adjustment logic

* some block-age logic

* aggressively propagate messages when finality is slow

* overhaul aggression system to have 3 levels

* add aggression metrics

* remove aggression L3

* reduce random circulation

* remove PeerData

* get approval tests compiling

* use btree_map in known_by to make deterministic

* Revert "use btree_map in known_by to make deterministic"

This reverts commit 330d65343a7bb6fe4dd0f24bd8dbc15c0cbdbd9d.

* test XY grid propagation

* remove stray println

* test unshared dimension propagation

* add random gossip check

* test unify_with_peer better

* test sending after getting gossip topology

* test L1 aggression on originator

* test L1 aggression for non-originators

* test non-originator aggression L2

* fnt

* ~spellcheck

* fix statement-distribution tests

* fix flaky test

* fix metrics typo

* re-send periodically

* test resending

* typo

Co-authored-by: Bernhard Schuster <bernhard@ahoi.io>

* add more metrics about apd messages

* add back unify_with_peer logs

* make Resend an enum

* be more explicit when resending

* fmt

* fix error

* add a TODO for refactoring

* remove debug metrics

* add some guide stuff

* fmt

* update runtime API in test-runtim

Co-authored-by: Bernhard Schuster <bernhard@ahoi.io>
This commit is contained in:
asynchronous rob
2022-04-19 13:26:55 -05:00
committed by GitHub
parent edfa24bbc5
commit 79ecc53801
25 changed files with 2563 additions and 499 deletions
+49 -17
View File
@@ -31,10 +31,13 @@ use polkadot_node_network_protocol::{
};
use polkadot_node_subsystem_util::metrics::{self, prometheus};
use polkadot_overseer::gen::{OverseerError, Subsystem};
use polkadot_primitives::v2::{BlockNumber, Hash};
use polkadot_primitives::v2::{AuthorityDiscoveryId, BlockNumber, Hash, ValidatorIndex};
use polkadot_subsystem::{
errors::{SubsystemError, SubsystemResult},
messages::{AllMessages, CollatorProtocolMessage, NetworkBridgeEvent, NetworkBridgeMessage},
messages::{
network_bridge_event::{NewGossipTopology, TopologyPeerInfo},
AllMessages, CollatorProtocolMessage, NetworkBridgeEvent, NetworkBridgeMessage,
},
overseer, ActivatedLeaf, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem,
SubsystemContext, SubsystemSender,
};
@@ -45,7 +48,8 @@ use polkadot_subsystem::{
pub use polkadot_node_network_protocol::peer_set::{peer_sets_info, IsAuthority};
use std::{
collections::{hash_map, HashMap, HashSet},
collections::{hash_map, HashMap},
iter::ExactSizeIterator,
sync::Arc,
};
@@ -590,30 +594,36 @@ where
).await;
}
NetworkBridgeMessage::NewGossipTopology {
our_neighbors,
session,
our_neighbors_x,
our_neighbors_y,
} => {
gum::debug!(
target: LOG_TARGET,
action = "NewGossipTopology",
neighbors = our_neighbors.len(),
neighbors_x = our_neighbors_x.len(),
neighbors_y = our_neighbors_y.len(),
"Gossip topology has changed",
);
let ads = &mut authority_discovery_service;
let mut gossip_peers = HashSet::with_capacity(our_neighbors.len());
for authority in our_neighbors {
let addr = get_peer_id_by_authority_id(
ads,
authority.clone(),
).await;
let gossip_peers_x = update_gossip_peers_1d(
&mut authority_discovery_service,
our_neighbors_x,
).await;
if let Some(peer_id) = addr {
gossip_peers.insert(peer_id);
}
}
let gossip_peers_y = update_gossip_peers_1d(
&mut authority_discovery_service,
our_neighbors_y,
).await;
dispatch_validation_event_to_all_unbounded(
NetworkBridgeEvent::NewGossipTopology(gossip_peers),
NetworkBridgeEvent::NewGossipTopology(
NewGossipTopology {
session,
our_neighbors_x: gossip_peers_x,
our_neighbors_y: gossip_peers_y,
}
),
ctx.sender(),
);
}
@@ -624,6 +634,28 @@ where
}
}
async fn update_gossip_peers_1d<AD, N>(
ads: &mut AD,
neighbors: N,
) -> HashMap<AuthorityDiscoveryId, TopologyPeerInfo>
where
AD: validator_discovery::AuthorityDiscovery,
N: IntoIterator<Item = (AuthorityDiscoveryId, ValidatorIndex)>,
N::IntoIter: std::iter::ExactSizeIterator,
{
let neighbors = neighbors.into_iter();
let mut peers = HashMap::with_capacity(neighbors.len());
for (authority, validator_index) in neighbors {
let addr = get_peer_id_by_authority_id(ads, authority.clone()).await;
if let Some(peer_id) = addr {
peers.insert(authority, TopologyPeerInfo { peer_ids: vec![peer_id], validator_index });
}
}
peers
}
async fn handle_network_messages<AD: validator_discovery::AuthorityDiscovery>(
mut sender: impl SubsystemSender,
mut network_service: impl Network,