Reduce network bandwidth, improve parablock times: optimize approval-distribution (#5164)

* gossip-support: be explicit about dimensions

* some guide updates

* update network-bridge to distinguish x and y dimensions

* get everything to compile

* beginnings

* some TODOs

* polkadot runtime: use relevant_authorities

* make gossip topologies per-session

* better formatting

* gossip support: use current session validators

* expand in comment

* adjust tests and fix index bug

* add past/present/future connection test and clean up code

* fmt

* network bridge: updated types

* update protocols to new gossip topology message

* guide updates

* add session to BlockApprovalMeta

* add session to block info

* refactor knowledge and remove most unify logic

* start replacing gossip_peers with new SessionTopologies

* add routing information to message state

* add some utilities to SessionTopology

* implement new gossip topology logic

* re-implement unify_with_peer

* distribute assignments according to topology

* finish grid topology implementation

* refactor network bridge slightly

* issue connection requests on all past/present/future

* fmt

* address grumbles

* tighten invariants in unify_with_peer

* implement random propagation

* refactor: extract required routing adjustment logic

* some block-age logic

* aggressively propagate messages when finality is slow

* overhaul aggression system to have 3 levels

* add aggression metrics

* remove aggression L3

* reduce random circulation

* remove PeerData

* get approval tests compiling

* use btree_map in known_by to make deterministic

* Revert "use btree_map in known_by to make deterministic"

This reverts commit 330d65343a7bb6fe4dd0f24bd8dbc15c0cbdbd9d.

* test XY grid propagation

* remove stray println

* test unshared dimension propagation

* add random gossip check

* test unify_with_peer better

* test sending after getting gossip topology

* test L1 aggression on originator

* test L1 aggression for non-originators

* test non-originator aggression L2

* fnt

* ~spellcheck

* fix statement-distribution tests

* fix flaky test

* fix metrics typo

* re-send periodically

* test resending

* typo

Co-authored-by: Bernhard Schuster <bernhard@ahoi.io>

* add more metrics about apd messages

* add back unify_with_peer logs

* make Resend an enum

* be more explicit when resending

* fmt

* fix error

* add a TODO for refactoring

* remove debug metrics

* add some guide stuff

* fmt

* update runtime API in test-runtim

Co-authored-by: Bernhard Schuster <bernhard@ahoi.io>
This commit is contained in:
asynchronous rob
2022-04-19 13:26:55 -05:00
committed by GitHub
parent edfa24bbc5
commit 79ecc53801
25 changed files with 2563 additions and 499 deletions
+15 -4
View File
@@ -49,7 +49,7 @@ use polkadot_primitives::v2::{
};
use polkadot_statement_table::v2::Misbehavior;
use std::{
collections::{BTreeMap, HashSet},
collections::{BTreeMap, HashMap, HashSet},
sync::Arc,
time::Duration,
};
@@ -378,9 +378,20 @@ pub enum NetworkBridgeMessage {
/// Inform the distribution subsystems about the new
/// gossip network topology formed.
NewGossipTopology {
/// Ids of our neighbors in the new gossip topology.
/// We're not necessarily connected to all of them, but we should.
our_neighbors: HashSet<AuthorityDiscoveryId>,
/// The session info this gossip topology is concerned with.
session: SessionIndex,
/// Ids of our neighbors in the X dimensions of the new gossip topology,
/// along with their validator indices within the session.
///
/// We're not necessarily connected to all of them, but we should
/// try to be.
our_neighbors_x: HashMap<AuthorityDiscoveryId, ValidatorIndex>,
/// Ids of our neighbors in the X dimensions of the new gossip topology,
/// along with their validator indices within the session.
///
/// We're not necessarily connected to all of them, but we should
/// try to be.
our_neighbors_y: HashMap<AuthorityDiscoveryId, ValidatorIndex>,
},
}
@@ -14,12 +14,36 @@
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use std::collections::HashSet;
use std::{
collections::{HashMap, HashSet},
convert::TryFrom,
};
pub use sc_network::{PeerId, ReputationChange};
use polkadot_node_network_protocol::{ObservedRole, OurView, View, WrongVariant};
use polkadot_primitives::v2::AuthorityDiscoveryId;
use polkadot_primitives::v2::{AuthorityDiscoveryId, SessionIndex, ValidatorIndex};
/// Information about a peer in the gossip topology for a session.
#[derive(Debug, Clone, PartialEq)]
pub struct TopologyPeerInfo {
/// The validator's known peer IDs.
pub peer_ids: Vec<PeerId>,
/// The index of the validator in the discovery keys of the corresponding
/// `SessionInfo`. This can extend _beyond_ the set of active parachain validators.
pub validator_index: ValidatorIndex,
}
/// A struct indicating new gossip topology.
#[derive(Debug, Clone, PartialEq)]
pub struct NewGossipTopology {
/// The session index this topology corresponds to.
pub session: SessionIndex,
/// Neighbors in the 'X' dimension of the grid.
pub our_neighbors_x: HashMap<AuthorityDiscoveryId, TopologyPeerInfo>,
/// Neighbors in the 'Y' dimension of the grid.
pub our_neighbors_y: HashMap<AuthorityDiscoveryId, TopologyPeerInfo>,
}
/// Events from network.
#[derive(Debug, Clone, PartialEq)]
@@ -30,14 +54,14 @@ pub enum NetworkBridgeEvent<M> {
/// A peer has disconnected.
PeerDisconnected(PeerId),
/// Our neighbors in the new gossip topology.
/// Our neighbors in the new gossip topology for the session.
/// We're not necessarily connected to all of them.
///
/// This message is issued only on the validation peer set.
///
/// Note, that the distribution subsystems need to handle the last
/// view update of the newly added gossip peers manually.
NewGossipTopology(HashSet<PeerId>),
NewGossipTopology(NewGossipTopology),
/// Peer has sent a message.
PeerMessage(PeerId, M),
@@ -77,8 +101,8 @@ impl<M> NetworkBridgeEvent<M> {
NetworkBridgeEvent::PeerConnected(peer.clone(), role.clone(), authority_id.clone()),
NetworkBridgeEvent::PeerDisconnected(ref peer) =>
NetworkBridgeEvent::PeerDisconnected(peer.clone()),
NetworkBridgeEvent::NewGossipTopology(ref peers) =>
NetworkBridgeEvent::NewGossipTopology(peers.clone()),
NetworkBridgeEvent::NewGossipTopology(ref topology) =>
NetworkBridgeEvent::NewGossipTopology(topology.clone()),
NetworkBridgeEvent::PeerViewChange(ref peer, ref view) =>
NetworkBridgeEvent::PeerViewChange(peer.clone(), view.clone()),
NetworkBridgeEvent::OurViewChange(ref view) =>