Reduce network bandwidth, improve parablock times: optimize approval-distribution (#5164)

* gossip-support: be explicit about dimensions

* some guide updates

* update network-bridge to distinguish x and y dimensions

* get everything to compile

* beginnings

* some TODOs

* polkadot runtime: use relevant_authorities

* make gossip topologies per-session

* better formatting

* gossip support: use current session validators

* expand in comment

* adjust tests and fix index bug

* add past/present/future connection test and clean up code

* fmt

* network bridge: updated types

* update protocols to new gossip topology message

* guide updates

* add session to BlockApprovalMeta

* add session to block info

* refactor knowledge and remove most unify logic

* start replacing gossip_peers with new SessionTopologies

* add routing information to message state

* add some utilities to SessionTopology

* implement new gossip topology logic

* re-implement unify_with_peer

* distribute assignments according to topology

* finish grid topology implementation

* refactor network bridge slightly

* issue connection requests on all past/present/future

* fmt

* address grumbles

* tighten invariants in unify_with_peer

* implement random propagation

* refactor: extract required routing adjustment logic

* some block-age logic

* aggressively propagate messages when finality is slow

* overhaul aggression system to have 3 levels

* add aggression metrics

* remove aggression L3

* reduce random circulation

* remove PeerData

* get approval tests compiling

* use btree_map in known_by to make deterministic

* Revert "use btree_map in known_by to make deterministic"

This reverts commit 330d65343a7bb6fe4dd0f24bd8dbc15c0cbdbd9d.

* test XY grid propagation

* remove stray println

* test unshared dimension propagation

* add random gossip check

* test unify_with_peer better

* test sending after getting gossip topology

* test L1 aggression on originator

* test L1 aggression for non-originators

* test non-originator aggression L2

* fnt

* ~spellcheck

* fix statement-distribution tests

* fix flaky test

* fix metrics typo

* re-send periodically

* test resending

* typo

Co-authored-by: Bernhard Schuster <bernhard@ahoi.io>

* add more metrics about apd messages

* add back unify_with_peer logs

* make Resend an enum

* be more explicit when resending

* fmt

* fix error

* add a TODO for refactoring

* remove debug metrics

* add some guide stuff

* fmt

* update runtime API in test-runtim

Co-authored-by: Bernhard Schuster <bernhard@ahoi.io>
This commit is contained in:
asynchronous rob
2022-04-19 13:26:55 -05:00
committed by GitHub
parent edfa24bbc5
commit 79ecc53801
25 changed files with 2563 additions and 499 deletions
@@ -1632,7 +1632,14 @@ async fn handle_network_update(
});
}
},
NetworkBridgeEvent::NewGossipTopology(new_peers) => {
NetworkBridgeEvent::NewGossipTopology(topology) => {
// Combine all peers in the x & y direction as we don't make any distinction.
let new_peers: HashSet<PeerId> = topology
.our_neighbors_x
.values()
.chain(topology.our_neighbors_y.values())
.flat_map(|peer_info| peer_info.peer_ids.iter().cloned())
.collect();
let _ = metrics.time_network_bridge_update_v1("new_gossip_topology");
let newly_added: Vec<PeerId> = new_peers.difference(gossip_peers).cloned().collect();
*gossip_peers = new_peers;
@@ -34,11 +34,12 @@ use polkadot_primitives_test_helpers::{
};
use polkadot_subsystem::{
jaeger,
messages::{RuntimeApiMessage, RuntimeApiRequest},
messages::{network_bridge_event, RuntimeApiMessage, RuntimeApiRequest},
ActivatedLeaf, LeafStatus,
};
use sc_keystore::LocalKeystore;
use sp_application_crypto::{sr25519::Pair, AppKey, Pair as TraitPair};
use sp_authority_discovery::AuthorityPair;
use sp_keyring::Sr25519Keyring;
use sp_keystore::{CryptoStore, SyncCryptoStore, SyncCryptoStorePtr};
use std::{iter::FromIterator as _, sync::Arc, time::Duration};
@@ -1964,12 +1965,34 @@ fn handle_multiple_seconded_statements() {
// Explicitly add all `lucky` peers to the gossip peers to ensure that neither `peerA` not `peerB`
// receive statements
let gossip_topology = {
let mut t = network_bridge_event::NewGossipTopology {
session: 1,
our_neighbors_x: HashMap::new(),
our_neighbors_y: HashMap::new(),
};
// This is relying on the fact that statement distribution
// just extracts the peer IDs from this struct and does nothing else
// with it.
for (i, peer) in lucky_peers.iter().enumerate() {
let authority_id = AuthorityPair::generate().0.public();
t.our_neighbors_x.insert(
authority_id,
network_bridge_event::TopologyPeerInfo {
peer_ids: vec![peer.clone()],
validator_index: (i as u32).into(),
},
);
}
t
};
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::NewGossipTopology(
lucky_peers.iter().cloned().collect::<HashSet<_>>(),
),
NetworkBridgeEvent::NewGossipTopology(gossip_topology),
),
})
.await;