Rework priority groups, take 2 (#7700)

* Rework priority groups

* Broken tests fix

* Fix warning causing CI to fail

* [Hack] Try restore backwards-compatibility

* Fix peerset bug

* Doc fixes and clean up

* Error on state mismatch

* Try debug CI

* CI debugging

* [CI debug] Can I please see this line

* Revert "[CI debug] Can I please see this line"

This reverts commit 4b7cf7c1511f579cd818b21d46bd11642dfac5cb.

* Revert "CI debugging"

This reverts commit 9011f1f564b860386dc7dd6ffa9fc34ea7107623.

* Fix error! which isn't actually an error

* Fix Ok() returned when actually Err()

* Tweaks and fixes

* Fix build

* Peerset bugfix

* [Debug] Try outbound GrandPa slots

* Another bugfix

* Revert "[Debug] Try outbound GrandPa slots"

This reverts commit d175b9208c088faad77d9f0ce36ff6f48bd92dd3.

* [Debug] Try outbound GrandPa slots

* Apply suggestions from code review

Co-authored-by: Max Inden <mail@max-inden.de>

* Use consts for hardcoded peersets

* Revert "Try debug CI"

This reverts commit 62c4ad5e79c03d561c714a008022ecac463a597e.

* Renames

* Line widths

* Add doc

Co-authored-by: Max Inden <mail@max-inden.de>
This commit is contained in:
Pierre Krieger
2021-01-07 14:52:39 +01:00
committed by GitHub
parent 94bb119ef9
commit 779c4f8616
30 changed files with 2742 additions and 2293 deletions
@@ -38,8 +38,6 @@ pub enum Error {
CallingRuntime(sp_blockchain::Error),
/// Received a dht record with a key that does not match any in-flight awaited keys.
ReceivingUnexpectedRecord,
/// Failed to set the authority discovery peerset priority group in the peerset module.
SettingPeersetPriorityGroup(String),
/// Failed to encode a protobuf payload.
EncodingProto(prost::EncodeError),
/// Failed to decode a protobuf payload.
@@ -57,10 +57,6 @@ pub mod tests;
const LOG_TARGET: &'static str = "sub-authority-discovery";
/// Name of the Substrate peerset priority group for authorities discovered through the authority
/// discovery module.
const AUTHORITIES_PRIORITY_GROUP_NAME: &'static str = "authorities";
/// Maximum number of addresses cached per authority. Additional addresses are discarded.
const MAX_ADDRESSES_PER_AUTHORITY: usize = 10;
@@ -115,9 +111,6 @@ pub struct Worker<Client, Network, Block, DhtEventStream> {
publish_interval: ExpIncInterval,
/// Interval at which to request addresses of authorities, refilling the pending lookups queue.
query_interval: ExpIncInterval,
/// Interval on which to set the peerset priority group to a new random
/// set of addresses.
priority_group_set_interval: ExpIncInterval,
/// Queue of throttled lookups pending to be passed to the network.
pending_lookups: Vec<AuthorityId>,
@@ -166,13 +159,6 @@ where
Duration::from_secs(2),
config.max_query_interval,
);
let priority_group_set_interval = ExpIncInterval::new(
Duration::from_secs(2),
// Trade-off between node connection churn and connectivity. Using half of
// [`crate::WorkerConfig::max_query_interval`] to update priority group once at the
// beginning and once in the middle of each query interval.
config.max_query_interval / 2,
);
let addr_cache = AddrCache::new();
@@ -196,7 +182,6 @@ where
dht_event_rx,
publish_interval,
query_interval,
priority_group_set_interval,
pending_lookups: Vec::new(),
in_flight_lookups: HashMap::new(),
addr_cache,
@@ -226,15 +211,6 @@ where
msg = self.from_service.select_next_some() => {
self.process_message_from_service(msg);
},
// Set peerset priority group to a new random set of addresses.
_ = self.priority_group_set_interval.next().fuse() => {
if let Err(e) = self.set_priority_group().await {
error!(
target: LOG_TARGET,
"Failed to set priority group: {:?}", e,
);
}
},
// Publish own addresses.
_ = self.publish_interval.next().fuse() => {
if let Err(e) = self.publish_ext_addresses().await {
@@ -582,38 +558,6 @@ where
Ok(intersection)
}
/// Set the peer set 'authority' priority group to a new random set of
/// [`Multiaddr`]s.
async fn set_priority_group(&self) -> Result<()> {
let addresses = self.addr_cache.get_random_subset();
if addresses.is_empty() {
debug!(
target: LOG_TARGET,
"Got no addresses in cache for peerset priority group.",
);
return Ok(());
}
if let Some(metrics) = &self.metrics {
metrics.priority_group_size.set(addresses.len().try_into().unwrap_or(std::u64::MAX));
}
debug!(
target: LOG_TARGET,
"Applying priority group {:?} to peerset.", addresses,
);
self.network
.set_priority_group(
AUTHORITIES_PRIORITY_GROUP_NAME.to_string(),
addresses.into_iter().collect(),
).await
.map_err(Error::SettingPeersetPriorityGroup)?;
Ok(())
}
}
/// NetworkProvider provides [`Worker`] with all necessary hooks into the
@@ -621,13 +565,6 @@ where
/// [`sc_network::NetworkService`] directly is necessary to unit test [`Worker`].
#[async_trait]
pub trait NetworkProvider: NetworkStateInfo {
/// Modify a peerset priority group.
async fn set_priority_group(
&self,
group_id: String,
peers: HashSet<libp2p::Multiaddr>,
) -> std::result::Result<(), String>;
/// Start putting a value in the Dht.
fn put_value(&self, key: libp2p::kad::record::Key, value: Vec<u8>);
@@ -641,13 +578,6 @@ where
B: BlockT + 'static,
H: ExHashT,
{
async fn set_priority_group(
&self,
group_id: String,
peers: HashSet<libp2p::Multiaddr>,
) -> std::result::Result<(), String> {
self.set_priority_group(group_id, peers).await
}
fn put_value(&self, key: libp2p::kad::record::Key, value: Vec<u8>) {
self.put_value(key, value)
}
@@ -670,7 +600,6 @@ pub(crate) struct Metrics {
dht_event_received: CounterVec<U64>,
handle_value_found_event_failure: Counter<U64>,
known_authorities_count: Gauge<U64>,
priority_group_size: Gauge<U64>,
}
impl Metrics {
@@ -730,13 +659,6 @@ impl Metrics {
)?,
registry,
)?,
priority_group_size: register(
Gauge::new(
"authority_discovery_priority_group_size",
"Number of addresses passed to the peer set as a priority group."
)?,
registry,
)?,
})
}
}
@@ -17,17 +17,11 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.
use libp2p::core::multiaddr::{Multiaddr, Protocol};
use rand::seq::SliceRandom;
use std::collections::HashMap;
use sp_authority_discovery::AuthorityId;
use sc_network::PeerId;
/// The maximum number of authority connections initialized through the authority discovery module.
///
/// In other words the maximum size of the `authority` peerset priority group.
const MAX_NUM_AUTHORITY_CONN: usize = 10;
/// Cache for [`AuthorityId`] -> [`Vec<Multiaddr>`] and [`PeerId`] -> [`AuthorityId`] mappings.
pub(super) struct AddrCache {
authority_id_to_addresses: HashMap<AuthorityId, Vec<Multiaddr>>,
@@ -77,30 +71,6 @@ impl AddrCache {
self.peer_id_to_authority_id.get(peer_id)
}
/// Returns a single address for a random subset (maximum of [`MAX_NUM_AUTHORITY_CONN`]) of all
/// known authorities.
pub fn get_random_subset(&self) -> Vec<Multiaddr> {
let mut rng = rand::thread_rng();
let mut addresses = self
.authority_id_to_addresses
.iter()
.filter_map(|(_authority_id, addresses)| {
debug_assert!(!addresses.is_empty());
addresses
.choose(&mut rng)
})
.collect::<Vec<&Multiaddr>>();
addresses.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
addresses.dedup();
addresses
.choose_multiple(&mut rng, MAX_NUM_AUTHORITY_CONN)
.map(|a| (**a).clone())
.collect()
}
/// Removes all [`PeerId`]s and [`Multiaddr`]s from the cache that are not related to the given
/// [`AuthorityId`]s.
pub fn retain_ids(&mut self, authority_ids: &Vec<AuthorityId>) {
@@ -192,11 +162,6 @@ mod tests {
cache.insert(second.0.clone(), vec![second.1.clone()]);
cache.insert(third.0.clone(), vec![third.1.clone()]);
let subset = cache.get_random_subset();
assert!(
subset.contains(&first.1) && subset.contains(&second.1) && subset.contains(&third.1),
"Expect initial subset to contain all authorities.",
);
assert_eq!(
Some(&vec![third.1.clone()]),
cache.get_addresses_by_authority_id(&third.0),
@@ -210,12 +175,6 @@ mod tests {
cache.retain_ids(&vec![first.0, second.0]);
let subset = cache.get_random_subset();
assert!(
subset.contains(&first.1) || subset.contains(&second.1),
"Expected both first and second authority."
);
assert!(!subset.contains(&third.1), "Did not expect address from third authority");
assert_eq!(
None, cache.get_addresses_by_authority_id(&third.0),
"Expect `get_addresses_by_authority_id` to not return `None` for third authority."
@@ -18,7 +18,7 @@
use crate::worker::schema;
use std::{iter::FromIterator, sync::{Arc, Mutex}, task::Poll};
use std::{sync::{Arc, Mutex}, task::Poll};
use async_trait::async_trait;
use futures::channel::mpsc::{self, channel};
@@ -112,10 +112,6 @@ sp_api::mock_impl_runtime_apis! {
pub enum TestNetworkEvent {
GetCalled(kad::record::Key),
PutCalled(kad::record::Key, Vec<u8>),
SetPriorityGroupCalled {
group_id: String,
peers: HashSet<Multiaddr>
},
}
pub struct TestNetwork {
@@ -125,7 +121,6 @@ pub struct TestNetwork {
// vectors below.
pub put_value_call: Arc<Mutex<Vec<(kad::record::Key, Vec<u8>)>>>,
pub get_value_call: Arc<Mutex<Vec<kad::record::Key>>>,
pub set_priority_group_call: Arc<Mutex<Vec<(String, HashSet<Multiaddr>)>>>,
event_sender: mpsc::UnboundedSender<TestNetworkEvent>,
event_receiver: Option<mpsc::UnboundedReceiver<TestNetworkEvent>>,
}
@@ -147,7 +142,6 @@ impl Default for TestNetwork {
],
put_value_call: Default::default(),
get_value_call: Default::default(),
set_priority_group_call: Default::default(),
event_sender: tx,
event_receiver: Some(rx),
}
@@ -156,21 +150,6 @@ impl Default for TestNetwork {
#[async_trait]
impl NetworkProvider for TestNetwork {
async fn set_priority_group(
&self,
group_id: String,
peers: HashSet<Multiaddr>,
) -> std::result::Result<(), String> {
self.set_priority_group_call
.lock()
.unwrap()
.push((group_id.clone(), peers.clone()));
self.event_sender.clone().unbounded_send(TestNetworkEvent::SetPriorityGroupCalled {
group_id,
peers,
}).unwrap();
Ok(())
}
fn put_value(&self, key: kad::record::Key, value: Vec<u8>) {
self.put_value_call.lock().unwrap().push((key.clone(), value.clone()));
self.event_sender.clone().unbounded_send(TestNetworkEvent::PutCalled(key, value)).unwrap();
@@ -296,14 +275,6 @@ fn publish_discover_cycle() {
let (_dht_event_tx, dht_event_rx) = channel(1000);
let network: Arc<TestNetwork> = Arc::new(Default::default());
let node_a_multiaddr = {
let peer_id = network.local_peer_id();
let address = network.external_addresses().pop().unwrap();
address.with(multiaddr::Protocol::P2p(
peer_id.into(),
))
};
let key_store = KeyStore::new();
@@ -365,19 +336,6 @@ fn publish_discover_cycle() {
// Make authority discovery handle the event.
worker.handle_dht_event(dht_event).await;
worker.set_priority_group().await.unwrap();
// Expect authority discovery to set the priority set.
assert_eq!(network.set_priority_group_call.lock().unwrap().len(), 1);
assert_eq!(
network.set_priority_group_call.lock().unwrap()[0],
(
"authorities".to_string(),
HashSet::from_iter(vec![node_a_multiaddr.clone()].into_iter())
)
);
}.boxed_local().into());
pool.run();