mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-09 09:27:59 +00:00
80616f6d03
[litep2p](https://github.com/altonen/litep2p) is a libp2p-compatible P2P networking library. It supports all of the features of `rust-libp2p` that are currently being utilized by Polkadot SDK. Compared to `rust-libp2p`, `litep2p` has a quite different architecture which is why the new `litep2p` network backend is only able to use a little of the existing code in `sc-network`. The design has been mainly influenced by how we'd wish to structure our networking-related code in Polkadot SDK: independent higher-levels protocols directly communicating with the network over links that support bidirectional backpressure. A good example would be `NotificationHandle`/`RequestResponseHandle` abstractions which allow, e.g., `SyncingEngine` to directly communicate with peers to announce/request blocks. I've tried running `polkadot --network-backend litep2p` with a few different peer configurations and there is a noticeable reduction in networking CPU usage. For high load (`--out-peers 200`), networking CPU usage goes down from ~110% to ~30% (80 pp) and for normal load (`--out-peers 40`), the usage goes down from ~55% to ~18% (37 pp). These should not be taken as final numbers because: a) there are still some low-hanging optimization fruits, such as enabling [receive window auto-tuning](https://github.com/libp2p/rust-yamux/pull/176), integrating `Peerset` more closely with `litep2p` or improving memory usage of the WebSocket transport b) fixing bugs/instabilities that incorrectly cause `litep2p` to do less work will increase the networking CPU usage c) verification in a more diverse set of tests/conditions is needed Nevertheless, these numbers should give an early estimate for CPU usage of the new networking backend. This PR consists of three separate changes: * introduce a generic `PeerId` (wrapper around `Multihash`) so that we don't have use `NetworkService::PeerId` in every part of the code that uses a `PeerId` * introduce `NetworkBackend` trait, implement it for the libp2p network stack and make Polkadot SDK generic over `NetworkBackend` * implement `NetworkBackend` for litep2p The new library should be considered experimental which is why `rust-libp2p` will remain as the default option for the time being. This PR currently depends on the master branch of `litep2p` but I'll cut a new release for the library once all review comments have been addresses. --------- Signed-off-by: Alexandru Vasile <alexandru.vasile@parity.io> Co-authored-by: Dmitry Markin <dmitry@markin.tech> Co-authored-by: Alexandru Vasile <60601340+lexnv@users.noreply.github.com> Co-authored-by: Alexandru Vasile <alexandru.vasile@parity.io>
876 lines
27 KiB
Rust
876 lines
27 KiB
Rust
// This file is part of Substrate.
|
|
|
|
// Copyright (C) Parity Technologies (UK) Ltd.
|
|
// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
use crate::{
|
|
error::{Error, Result},
|
|
interval::ExpIncInterval,
|
|
ServicetoWorkerMsg, WorkerConfig,
|
|
};
|
|
|
|
use std::{
|
|
collections::{HashMap, HashSet},
|
|
marker::PhantomData,
|
|
sync::Arc,
|
|
time::Duration,
|
|
};
|
|
|
|
use futures::{channel::mpsc, future, stream::Fuse, FutureExt, Stream, StreamExt};
|
|
|
|
use addr_cache::AddrCache;
|
|
use codec::{Decode, Encode};
|
|
use ip_network::IpNetwork;
|
|
use linked_hash_set::LinkedHashSet;
|
|
use multihash::{Code, Multihash, MultihashDigest};
|
|
|
|
use log::{debug, error, log_enabled};
|
|
use prometheus_endpoint::{register, Counter, CounterVec, Gauge, Opts, U64};
|
|
use prost::Message;
|
|
use rand::{seq::SliceRandom, thread_rng};
|
|
|
|
use sc_network::{
|
|
event::DhtEvent, multiaddr, KademliaKey, Multiaddr, NetworkDHTProvider, NetworkSigner,
|
|
NetworkStateInfo,
|
|
};
|
|
use sc_network_types::PeerId;
|
|
use sp_api::{ApiError, ProvideRuntimeApi};
|
|
use sp_authority_discovery::{
|
|
AuthorityDiscoveryApi, AuthorityId, AuthorityPair, AuthoritySignature,
|
|
};
|
|
use sp_blockchain::HeaderBackend;
|
|
use sp_core::crypto::{key_types, ByteArray, Pair};
|
|
use sp_keystore::{Keystore, KeystorePtr};
|
|
use sp_runtime::traits::Block as BlockT;
|
|
|
|
mod addr_cache;
|
|
/// Dht payload schemas generated from Protobuf definitions via Prost crate in build.rs.
|
|
mod schema {
|
|
#[cfg(test)]
|
|
mod tests;
|
|
|
|
include!(concat!(env!("OUT_DIR"), "/authority_discovery_v2.rs"));
|
|
}
|
|
#[cfg(test)]
|
|
pub mod tests;
|
|
|
|
const LOG_TARGET: &str = "sub-authority-discovery";
|
|
|
|
/// Maximum number of addresses cached per authority. Additional addresses are discarded.
|
|
const MAX_ADDRESSES_PER_AUTHORITY: usize = 10;
|
|
|
|
/// Maximum number of in-flight DHT lookups at any given point in time.
|
|
const MAX_IN_FLIGHT_LOOKUPS: usize = 8;
|
|
|
|
/// Role an authority discovery [`Worker`] can run as.
|
|
pub enum Role {
|
|
/// Publish own addresses and discover addresses of others.
|
|
PublishAndDiscover(KeystorePtr),
|
|
/// Discover addresses of others.
|
|
Discover,
|
|
}
|
|
|
|
/// An authority discovery [`Worker`] can publish the local node's addresses as well as discover
|
|
/// those of other nodes via a Kademlia DHT.
|
|
///
|
|
/// When constructed with [`Role::PublishAndDiscover`] a [`Worker`] will
|
|
///
|
|
/// 1. Retrieve its external addresses (including peer id).
|
|
///
|
|
/// 2. Get the list of keys owned by the local node participating in the current authority set.
|
|
///
|
|
/// 3. Sign the addresses with the keys.
|
|
///
|
|
/// 4. Put addresses and signature as a record with the authority id as a key on a Kademlia DHT.
|
|
///
|
|
/// When constructed with either [`Role::PublishAndDiscover`] or [`Role::Discover`] a [`Worker`]
|
|
/// will
|
|
///
|
|
/// 1. Retrieve the current and next set of authorities.
|
|
///
|
|
/// 2. Start DHT queries for the ids of the authorities.
|
|
///
|
|
/// 3. Validate the signatures of the retrieved key value pairs.
|
|
///
|
|
/// 4. Add the retrieved external addresses as priority nodes to the
|
|
/// network peerset.
|
|
///
|
|
/// 5. Allow querying of the collected addresses via the [`crate::Service`].
|
|
pub struct Worker<Client, Block, DhtEventStream> {
|
|
/// Channel receiver for messages send by a [`crate::Service`].
|
|
from_service: Fuse<mpsc::Receiver<ServicetoWorkerMsg>>,
|
|
|
|
client: Arc<Client>,
|
|
|
|
network: Arc<dyn NetworkProvider>,
|
|
|
|
/// Channel we receive Dht events on.
|
|
dht_event_rx: DhtEventStream,
|
|
|
|
/// Interval to be proactive, publishing own addresses.
|
|
publish_interval: ExpIncInterval,
|
|
|
|
/// Pro-actively publish our own addresses at this interval, if the keys in the keystore
|
|
/// have changed.
|
|
publish_if_changed_interval: ExpIncInterval,
|
|
|
|
/// List of keys onto which addresses have been published at the latest publication.
|
|
/// Used to check whether they have changed.
|
|
latest_published_keys: HashSet<AuthorityId>,
|
|
/// List of the kademlia keys that have been published at the latest publication.
|
|
/// Used to associate DHT events with our published records.
|
|
latest_published_kad_keys: HashSet<KademliaKey>,
|
|
|
|
/// Same value as in the configuration.
|
|
publish_non_global_ips: bool,
|
|
|
|
/// Public addresses set by the node operator to always publish first in the authority
|
|
/// discovery DHT record.
|
|
public_addresses: LinkedHashSet<Multiaddr>,
|
|
|
|
/// Same value as in the configuration.
|
|
strict_record_validation: bool,
|
|
|
|
/// Interval at which to request addresses of authorities, refilling the pending lookups queue.
|
|
query_interval: ExpIncInterval,
|
|
|
|
/// Queue of throttled lookups pending to be passed to the network.
|
|
pending_lookups: Vec<AuthorityId>,
|
|
|
|
/// Set of in-flight lookups.
|
|
in_flight_lookups: HashMap<KademliaKey, AuthorityId>,
|
|
|
|
addr_cache: addr_cache::AddrCache,
|
|
|
|
metrics: Option<Metrics>,
|
|
|
|
role: Role,
|
|
|
|
phantom: PhantomData<Block>,
|
|
}
|
|
|
|
/// Wrapper for [`AuthorityDiscoveryApi`](sp_authority_discovery::AuthorityDiscoveryApi). Can be
|
|
/// be implemented by any struct without dependency on the runtime.
|
|
#[async_trait::async_trait]
|
|
pub trait AuthorityDiscovery<Block: BlockT> {
|
|
/// Retrieve authority identifiers of the current and next authority set.
|
|
async fn authorities(&self, at: Block::Hash)
|
|
-> std::result::Result<Vec<AuthorityId>, ApiError>;
|
|
|
|
/// Retrieve best block hash
|
|
async fn best_hash(&self) -> std::result::Result<Block::Hash, Error>;
|
|
}
|
|
|
|
#[async_trait::async_trait]
|
|
impl<Block, T> AuthorityDiscovery<Block> for T
|
|
where
|
|
T: ProvideRuntimeApi<Block> + HeaderBackend<Block> + Send + Sync,
|
|
T::Api: AuthorityDiscoveryApi<Block>,
|
|
Block: BlockT,
|
|
{
|
|
async fn authorities(
|
|
&self,
|
|
at: Block::Hash,
|
|
) -> std::result::Result<Vec<AuthorityId>, ApiError> {
|
|
self.runtime_api().authorities(at)
|
|
}
|
|
|
|
async fn best_hash(&self) -> std::result::Result<Block::Hash, Error> {
|
|
Ok(self.info().best_hash)
|
|
}
|
|
}
|
|
|
|
impl<Client, Block, DhtEventStream> Worker<Client, Block, DhtEventStream>
|
|
where
|
|
Block: BlockT + Unpin + 'static,
|
|
Client: AuthorityDiscovery<Block> + 'static,
|
|
DhtEventStream: Stream<Item = DhtEvent> + Unpin,
|
|
{
|
|
/// Construct a [`Worker`].
|
|
pub(crate) fn new(
|
|
from_service: mpsc::Receiver<ServicetoWorkerMsg>,
|
|
client: Arc<Client>,
|
|
network: Arc<dyn NetworkProvider>,
|
|
dht_event_rx: DhtEventStream,
|
|
role: Role,
|
|
prometheus_registry: Option<prometheus_endpoint::Registry>,
|
|
config: WorkerConfig,
|
|
) -> Self {
|
|
// When a node starts up publishing and querying might fail due to various reasons, for
|
|
// example due to being not yet fully bootstrapped on the DHT. Thus one should retry rather
|
|
// sooner than later. On the other hand, a long running node is likely well connected and
|
|
// thus timely retries are not needed. For this reasoning use an exponentially increasing
|
|
// interval for `publish_interval`, `query_interval` and `priority_group_set_interval`
|
|
// instead of a constant interval.
|
|
let publish_interval =
|
|
ExpIncInterval::new(Duration::from_secs(2), config.max_publish_interval);
|
|
let query_interval = ExpIncInterval::new(Duration::from_secs(2), config.max_query_interval);
|
|
|
|
// An `ExpIncInterval` is overkill here because the interval is constant, but consistency
|
|
// is more simple.
|
|
let publish_if_changed_interval =
|
|
ExpIncInterval::new(config.keystore_refresh_interval, config.keystore_refresh_interval);
|
|
|
|
let addr_cache = AddrCache::new();
|
|
|
|
let metrics = match prometheus_registry {
|
|
Some(registry) => match Metrics::register(®istry) {
|
|
Ok(metrics) => Some(metrics),
|
|
Err(e) => {
|
|
error!(target: LOG_TARGET, "Failed to register metrics: {}", e);
|
|
None
|
|
},
|
|
},
|
|
None => None,
|
|
};
|
|
|
|
let public_addresses = {
|
|
let local_peer_id: Multihash = network.local_peer_id().into();
|
|
|
|
config
|
|
.public_addresses
|
|
.into_iter()
|
|
.map(|mut address| {
|
|
if let Some(multiaddr::Protocol::P2p(peer_id)) = address.iter().last() {
|
|
if peer_id != local_peer_id {
|
|
error!(
|
|
target: LOG_TARGET,
|
|
"Discarding invalid local peer ID in public address {address}.",
|
|
);
|
|
}
|
|
// Always discard `/p2p/...` protocol for proper address comparison (local
|
|
// peer id will be added before publishing).
|
|
address.pop();
|
|
}
|
|
address
|
|
})
|
|
.collect()
|
|
};
|
|
|
|
Worker {
|
|
from_service: from_service.fuse(),
|
|
client,
|
|
network,
|
|
dht_event_rx,
|
|
publish_interval,
|
|
publish_if_changed_interval,
|
|
latest_published_keys: HashSet::new(),
|
|
latest_published_kad_keys: HashSet::new(),
|
|
publish_non_global_ips: config.publish_non_global_ips,
|
|
public_addresses,
|
|
strict_record_validation: config.strict_record_validation,
|
|
query_interval,
|
|
pending_lookups: Vec::new(),
|
|
in_flight_lookups: HashMap::new(),
|
|
addr_cache,
|
|
role,
|
|
metrics,
|
|
phantom: PhantomData,
|
|
}
|
|
}
|
|
|
|
/// Start the worker
|
|
pub async fn run(mut self) {
|
|
loop {
|
|
self.start_new_lookups();
|
|
|
|
futures::select! {
|
|
// Process incoming events.
|
|
event = self.dht_event_rx.next().fuse() => {
|
|
if let Some(event) = event {
|
|
self.handle_dht_event(event).await;
|
|
} else {
|
|
// This point is reached if the network has shut down, at which point there is not
|
|
// much else to do than to shut down the authority discovery as well.
|
|
return;
|
|
}
|
|
},
|
|
// Handle messages from [`Service`]. Ignore if sender side is closed.
|
|
msg = self.from_service.select_next_some() => {
|
|
self.process_message_from_service(msg);
|
|
},
|
|
// Publish own addresses.
|
|
only_if_changed = future::select(
|
|
self.publish_interval.next().map(|_| false),
|
|
self.publish_if_changed_interval.next().map(|_| true)
|
|
).map(|e| e.factor_first().0).fuse() => {
|
|
if let Err(e) = self.publish_ext_addresses(only_if_changed).await {
|
|
error!(
|
|
target: LOG_TARGET,
|
|
"Failed to publish external addresses: {}", e,
|
|
);
|
|
}
|
|
},
|
|
// Request addresses of authorities.
|
|
_ = self.query_interval.next().fuse() => {
|
|
if let Err(e) = self.refill_pending_lookups_queue().await {
|
|
error!(
|
|
target: LOG_TARGET,
|
|
"Failed to request addresses of authorities: {}", e,
|
|
);
|
|
}
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
fn process_message_from_service(&self, msg: ServicetoWorkerMsg) {
|
|
match msg {
|
|
ServicetoWorkerMsg::GetAddressesByAuthorityId(authority, sender) => {
|
|
let _ = sender.send(
|
|
self.addr_cache.get_addresses_by_authority_id(&authority).map(Clone::clone),
|
|
);
|
|
},
|
|
ServicetoWorkerMsg::GetAuthorityIdsByPeerId(peer_id, sender) => {
|
|
let _ = sender
|
|
.send(self.addr_cache.get_authority_ids_by_peer_id(&peer_id).map(Clone::clone));
|
|
},
|
|
}
|
|
}
|
|
|
|
fn addresses_to_publish(&self) -> impl Iterator<Item = Multiaddr> {
|
|
let local_peer_id = self.network.local_peer_id();
|
|
let publish_non_global_ips = self.publish_non_global_ips;
|
|
let addresses = self
|
|
.public_addresses
|
|
.clone()
|
|
.into_iter()
|
|
.chain(self.network.external_addresses().into_iter().filter_map(|mut address| {
|
|
// Make sure the reported external address does not contain `/p2p/...` protocol.
|
|
if let Some(multiaddr::Protocol::P2p(peer_id)) = address.iter().last() {
|
|
if peer_id != *local_peer_id.as_ref() {
|
|
error!(
|
|
target: LOG_TARGET,
|
|
"Network returned external address '{address}' with peer id \
|
|
not matching the local peer id '{local_peer_id}'.",
|
|
);
|
|
debug_assert!(false);
|
|
}
|
|
address.pop();
|
|
}
|
|
|
|
if self.public_addresses.contains(&address) {
|
|
// Already added above.
|
|
None
|
|
} else {
|
|
Some(address)
|
|
}
|
|
}))
|
|
.filter(move |address| {
|
|
if publish_non_global_ips {
|
|
return true
|
|
}
|
|
|
|
address.iter().all(|protocol| match protocol {
|
|
// The `ip_network` library is used because its `is_global()` method is stable,
|
|
// while `is_global()` in the standard library currently isn't.
|
|
multiaddr::Protocol::Ip4(ip) if !IpNetwork::from(ip).is_global() => false,
|
|
multiaddr::Protocol::Ip6(ip) if !IpNetwork::from(ip).is_global() => false,
|
|
_ => true,
|
|
})
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
debug!(
|
|
target: LOG_TARGET,
|
|
"Authority DHT record peer_id='{local_peer_id}' addresses='{addresses:?}'",
|
|
);
|
|
|
|
// The address must include the local peer id.
|
|
let local_peer_id: Multihash = local_peer_id.into();
|
|
addresses
|
|
.into_iter()
|
|
.map(move |a| a.with(multiaddr::Protocol::P2p(local_peer_id)))
|
|
}
|
|
|
|
/// Publish own public addresses.
|
|
///
|
|
/// If `only_if_changed` is true, the function has no effect if the list of keys to publish
|
|
/// is equal to `self.latest_published_keys`.
|
|
async fn publish_ext_addresses(&mut self, only_if_changed: bool) -> Result<()> {
|
|
let key_store = match &self.role {
|
|
Role::PublishAndDiscover(key_store) => key_store,
|
|
Role::Discover => return Ok(()),
|
|
};
|
|
|
|
let keys =
|
|
Worker::<Client, Block, DhtEventStream>::get_own_public_keys_within_authority_set(
|
|
key_store.clone(),
|
|
self.client.as_ref(),
|
|
)
|
|
.await?
|
|
.into_iter()
|
|
.collect::<HashSet<_>>();
|
|
|
|
if only_if_changed {
|
|
// If the authority keys did not change and the `publish_if_changed_interval` was
|
|
// triggered then do nothing.
|
|
if keys == self.latest_published_keys {
|
|
return Ok(())
|
|
}
|
|
|
|
// We have detected a change in the authority keys, reset the timers to
|
|
// publish and gather data faster.
|
|
self.publish_interval.set_to_start();
|
|
self.query_interval.set_to_start();
|
|
}
|
|
|
|
let addresses = serialize_addresses(self.addresses_to_publish());
|
|
|
|
if let Some(metrics) = &self.metrics {
|
|
metrics.publish.inc();
|
|
metrics
|
|
.amount_addresses_last_published
|
|
.set(addresses.len().try_into().unwrap_or(std::u64::MAX));
|
|
}
|
|
|
|
let serialized_record = serialize_authority_record(addresses)?;
|
|
let peer_signature = sign_record_with_peer_id(&serialized_record, &self.network)?;
|
|
|
|
let keys_vec = keys.iter().cloned().collect::<Vec<_>>();
|
|
|
|
let kv_pairs = sign_record_with_authority_ids(
|
|
serialized_record,
|
|
Some(peer_signature),
|
|
key_store.as_ref(),
|
|
keys_vec,
|
|
)?;
|
|
|
|
self.latest_published_kad_keys = kv_pairs.iter().map(|(k, _)| k.clone()).collect();
|
|
|
|
for (key, value) in kv_pairs.into_iter() {
|
|
self.network.put_value(key, value);
|
|
}
|
|
|
|
self.latest_published_keys = keys;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn refill_pending_lookups_queue(&mut self) -> Result<()> {
|
|
let best_hash = self.client.best_hash().await?;
|
|
|
|
let local_keys = match &self.role {
|
|
Role::PublishAndDiscover(key_store) => key_store
|
|
.sr25519_public_keys(key_types::AUTHORITY_DISCOVERY)
|
|
.into_iter()
|
|
.collect::<HashSet<_>>(),
|
|
Role::Discover => HashSet::new(),
|
|
};
|
|
|
|
let mut authorities = self
|
|
.client
|
|
.authorities(best_hash)
|
|
.await
|
|
.map_err(|e| Error::CallingRuntime(e.into()))?
|
|
.into_iter()
|
|
.filter(|id| !local_keys.contains(id.as_ref()))
|
|
.collect::<Vec<_>>();
|
|
|
|
self.addr_cache.retain_ids(&authorities);
|
|
|
|
authorities.shuffle(&mut thread_rng());
|
|
self.pending_lookups = authorities;
|
|
// Ignore all still in-flight lookups. Those that are still in-flight are likely stalled as
|
|
// query interval ticks are far enough apart for all lookups to succeed.
|
|
self.in_flight_lookups.clear();
|
|
|
|
if let Some(metrics) = &self.metrics {
|
|
metrics
|
|
.requests_pending
|
|
.set(self.pending_lookups.len().try_into().unwrap_or(std::u64::MAX));
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn start_new_lookups(&mut self) {
|
|
while self.in_flight_lookups.len() < MAX_IN_FLIGHT_LOOKUPS {
|
|
let authority_id = match self.pending_lookups.pop() {
|
|
Some(authority) => authority,
|
|
None => return,
|
|
};
|
|
let hash = hash_authority_id(authority_id.as_ref());
|
|
self.network.get_value(&hash);
|
|
self.in_flight_lookups.insert(hash, authority_id);
|
|
|
|
if let Some(metrics) = &self.metrics {
|
|
metrics.requests.inc();
|
|
metrics
|
|
.requests_pending
|
|
.set(self.pending_lookups.len().try_into().unwrap_or(std::u64::MAX));
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Handle incoming Dht events.
|
|
async fn handle_dht_event(&mut self, event: DhtEvent) {
|
|
match event {
|
|
DhtEvent::ValueFound(v) => {
|
|
if let Some(metrics) = &self.metrics {
|
|
metrics.dht_event_received.with_label_values(&["value_found"]).inc();
|
|
}
|
|
|
|
if log_enabled!(log::Level::Debug) {
|
|
let hashes: Vec<_> = v.iter().map(|(hash, _value)| hash.clone()).collect();
|
|
debug!(target: LOG_TARGET, "Value for hash '{:?}' found on Dht.", hashes);
|
|
}
|
|
|
|
if let Err(e) = self.handle_dht_value_found_event(v) {
|
|
if let Some(metrics) = &self.metrics {
|
|
metrics.handle_value_found_event_failure.inc();
|
|
}
|
|
|
|
debug!(target: LOG_TARGET, "Failed to handle Dht value found event: {}", e);
|
|
}
|
|
},
|
|
DhtEvent::ValueNotFound(hash) => {
|
|
if let Some(metrics) = &self.metrics {
|
|
metrics.dht_event_received.with_label_values(&["value_not_found"]).inc();
|
|
}
|
|
|
|
if self.in_flight_lookups.remove(&hash).is_some() {
|
|
debug!(target: LOG_TARGET, "Value for hash '{:?}' not found on Dht.", hash)
|
|
} else {
|
|
debug!(
|
|
target: LOG_TARGET,
|
|
"Received 'ValueNotFound' for unexpected hash '{:?}'.", hash
|
|
)
|
|
}
|
|
},
|
|
DhtEvent::ValuePut(hash) => {
|
|
if !self.latest_published_kad_keys.contains(&hash) {
|
|
return;
|
|
}
|
|
|
|
// Fast forward the exponentially increasing interval to the configured maximum. In
|
|
// case this was the first successful address publishing there is no need for a
|
|
// timely retry.
|
|
self.publish_interval.set_to_max();
|
|
|
|
if let Some(metrics) = &self.metrics {
|
|
metrics.dht_event_received.with_label_values(&["value_put"]).inc();
|
|
}
|
|
|
|
debug!(target: LOG_TARGET, "Successfully put hash '{:?}' on Dht.", hash)
|
|
},
|
|
DhtEvent::ValuePutFailed(hash) => {
|
|
if !self.latest_published_kad_keys.contains(&hash) {
|
|
// Not a value we have published or received multiple times.
|
|
return;
|
|
}
|
|
|
|
if let Some(metrics) = &self.metrics {
|
|
metrics.dht_event_received.with_label_values(&["value_put_failed"]).inc();
|
|
}
|
|
|
|
debug!(target: LOG_TARGET, "Failed to put hash '{:?}' on Dht.", hash)
|
|
},
|
|
}
|
|
}
|
|
|
|
fn handle_dht_value_found_event(&mut self, values: Vec<(KademliaKey, Vec<u8>)>) -> Result<()> {
|
|
// Ensure `values` is not empty and all its keys equal.
|
|
let remote_key = single(values.iter().map(|(key, _)| key.clone()))
|
|
.map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)?
|
|
.ok_or(Error::ReceivingDhtValueFoundEventWithNoRecords)?;
|
|
|
|
let authority_id: AuthorityId = self
|
|
.in_flight_lookups
|
|
.remove(&remote_key)
|
|
.ok_or(Error::ReceivingUnexpectedRecord)?;
|
|
|
|
let local_peer_id = self.network.local_peer_id();
|
|
|
|
let remote_addresses: Vec<Multiaddr> = values
|
|
.into_iter()
|
|
.map(|(_k, v)| {
|
|
let schema::SignedAuthorityRecord { record, auth_signature, peer_signature } =
|
|
schema::SignedAuthorityRecord::decode(v.as_slice())
|
|
.map_err(Error::DecodingProto)?;
|
|
|
|
let auth_signature = AuthoritySignature::decode(&mut &auth_signature[..])
|
|
.map_err(Error::EncodingDecodingScale)?;
|
|
|
|
if !AuthorityPair::verify(&auth_signature, &record, &authority_id) {
|
|
return Err(Error::VerifyingDhtPayload)
|
|
}
|
|
|
|
let addresses: Vec<Multiaddr> = schema::AuthorityRecord::decode(record.as_slice())
|
|
.map(|a| a.addresses)
|
|
.map_err(Error::DecodingProto)?
|
|
.into_iter()
|
|
.map(|a| a.try_into())
|
|
.collect::<std::result::Result<_, _>>()
|
|
.map_err(Error::ParsingMultiaddress)?;
|
|
|
|
let get_peer_id = |a: &Multiaddr| match a.iter().last() {
|
|
Some(multiaddr::Protocol::P2p(key)) => PeerId::from_multihash(key).ok(),
|
|
_ => None,
|
|
};
|
|
|
|
// Ignore [`Multiaddr`]s without [`PeerId`] or with own addresses.
|
|
let addresses: Vec<Multiaddr> = addresses
|
|
.into_iter()
|
|
.filter(|a| get_peer_id(a).filter(|p| *p != local_peer_id).is_some())
|
|
.collect();
|
|
|
|
let remote_peer_id = single(addresses.iter().map(get_peer_id))
|
|
.map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentPeerIds)? // different peer_id in records
|
|
.flatten()
|
|
.ok_or(Error::ReceivingDhtValueFoundEventWithNoPeerIds)?; // no records with peer_id in them
|
|
|
|
// At this point we know all the valid multiaddresses from the record, know that
|
|
// each of them belong to the same PeerId, we just need to check if the record is
|
|
// properly signed by the owner of the PeerId
|
|
|
|
if let Some(peer_signature) = peer_signature {
|
|
match self.network.verify(
|
|
remote_peer_id.into(),
|
|
&peer_signature.public_key,
|
|
&peer_signature.signature,
|
|
&record,
|
|
) {
|
|
Ok(true) => {},
|
|
Ok(false) => return Err(Error::VerifyingDhtPayload),
|
|
Err(error) => return Err(Error::ParsingLibp2pIdentity(error)),
|
|
}
|
|
} else if self.strict_record_validation {
|
|
return Err(Error::MissingPeerIdSignature)
|
|
} else {
|
|
debug!(
|
|
target: LOG_TARGET,
|
|
"Received unsigned authority discovery record from {}", authority_id
|
|
);
|
|
}
|
|
Ok(addresses)
|
|
})
|
|
.collect::<Result<Vec<Vec<Multiaddr>>>>()?
|
|
.into_iter()
|
|
.flatten()
|
|
.take(MAX_ADDRESSES_PER_AUTHORITY)
|
|
.collect();
|
|
|
|
if !remote_addresses.is_empty() {
|
|
self.addr_cache.insert(authority_id, remote_addresses);
|
|
if let Some(metrics) = &self.metrics {
|
|
metrics
|
|
.known_authorities_count
|
|
.set(self.addr_cache.num_authority_ids().try_into().unwrap_or(std::u64::MAX));
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Retrieve our public keys within the current and next authority set.
|
|
// A node might have multiple authority discovery keys within its keystore, e.g. an old one and
|
|
// one for the upcoming session. In addition it could be participating in the current and (/ or)
|
|
// next authority set with two keys. The function does not return all of the local authority
|
|
// discovery public keys, but only the ones intersecting with the current or next authority set.
|
|
async fn get_own_public_keys_within_authority_set(
|
|
key_store: KeystorePtr,
|
|
client: &Client,
|
|
) -> Result<HashSet<AuthorityId>> {
|
|
let local_pub_keys = key_store
|
|
.sr25519_public_keys(key_types::AUTHORITY_DISCOVERY)
|
|
.into_iter()
|
|
.collect::<HashSet<_>>();
|
|
|
|
let best_hash = client.best_hash().await?;
|
|
let authorities = client
|
|
.authorities(best_hash)
|
|
.await
|
|
.map_err(|e| Error::CallingRuntime(e.into()))?
|
|
.into_iter()
|
|
.map(Into::into)
|
|
.collect::<HashSet<_>>();
|
|
|
|
let intersection =
|
|
local_pub_keys.intersection(&authorities).cloned().map(Into::into).collect();
|
|
|
|
Ok(intersection)
|
|
}
|
|
}
|
|
|
|
/// NetworkProvider provides [`Worker`] with all necessary hooks into the
|
|
/// underlying Substrate networking. Using this trait abstraction instead of
|
|
/// `sc_network::NetworkService` directly is necessary to unit test [`Worker`].
|
|
pub trait NetworkProvider:
|
|
NetworkDHTProvider + NetworkStateInfo + NetworkSigner + Send + Sync
|
|
{
|
|
}
|
|
|
|
impl<T> NetworkProvider for T where
|
|
T: NetworkDHTProvider + NetworkStateInfo + NetworkSigner + Send + Sync
|
|
{
|
|
}
|
|
|
|
fn hash_authority_id(id: &[u8]) -> KademliaKey {
|
|
KademliaKey::new(&Code::Sha2_256.digest(id).digest())
|
|
}
|
|
|
|
// Makes sure all values are the same and returns it
|
|
//
|
|
// Returns Err(_) if not all values are equal. Returns Ok(None) if there are
|
|
// no values.
|
|
fn single<T>(values: impl IntoIterator<Item = T>) -> std::result::Result<Option<T>, ()>
|
|
where
|
|
T: PartialEq<T>,
|
|
{
|
|
values.into_iter().try_fold(None, |acc, item| match acc {
|
|
None => Ok(Some(item)),
|
|
Some(ref prev) if *prev != item => Err(()),
|
|
Some(x) => Ok(Some(x)),
|
|
})
|
|
}
|
|
|
|
fn serialize_addresses(addresses: impl Iterator<Item = Multiaddr>) -> Vec<Vec<u8>> {
|
|
addresses.map(|a| a.to_vec()).collect()
|
|
}
|
|
|
|
fn serialize_authority_record(addresses: Vec<Vec<u8>>) -> Result<Vec<u8>> {
|
|
let mut serialized_record = vec![];
|
|
schema::AuthorityRecord { addresses }
|
|
.encode(&mut serialized_record)
|
|
.map_err(Error::EncodingProto)?;
|
|
Ok(serialized_record)
|
|
}
|
|
|
|
fn sign_record_with_peer_id(
|
|
serialized_record: &[u8],
|
|
network: &impl NetworkSigner,
|
|
) -> Result<schema::PeerSignature> {
|
|
let signature = network
|
|
.sign_with_local_identity(serialized_record.to_vec())
|
|
.map_err(|e| Error::CannotSign(format!("{} (network packet)", e)))?;
|
|
let public_key = signature.public_key.encode_protobuf();
|
|
let signature = signature.bytes;
|
|
Ok(schema::PeerSignature { signature, public_key })
|
|
}
|
|
|
|
fn sign_record_with_authority_ids(
|
|
serialized_record: Vec<u8>,
|
|
peer_signature: Option<schema::PeerSignature>,
|
|
key_store: &dyn Keystore,
|
|
keys: Vec<AuthorityId>,
|
|
) -> Result<Vec<(KademliaKey, Vec<u8>)>> {
|
|
let mut result = Vec::with_capacity(keys.len());
|
|
|
|
for key in keys.iter() {
|
|
let auth_signature = key_store
|
|
.sr25519_sign(key_types::AUTHORITY_DISCOVERY, key.as_ref(), &serialized_record)
|
|
.map_err(|e| Error::CannotSign(format!("{}. Key: {:?}", e, key)))?
|
|
.ok_or_else(|| {
|
|
Error::CannotSign(format!("Could not find key in keystore. Key: {:?}", key))
|
|
})?;
|
|
|
|
// Scale encode
|
|
let auth_signature = auth_signature.encode();
|
|
|
|
let signed_record = schema::SignedAuthorityRecord {
|
|
record: serialized_record.clone(),
|
|
auth_signature,
|
|
peer_signature: peer_signature.clone(),
|
|
}
|
|
.encode_to_vec();
|
|
|
|
result.push((hash_authority_id(key.as_slice()), signed_record));
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
/// Prometheus metrics for a [`Worker`].
|
|
#[derive(Clone)]
|
|
pub(crate) struct Metrics {
|
|
publish: Counter<U64>,
|
|
amount_addresses_last_published: Gauge<U64>,
|
|
requests: Counter<U64>,
|
|
requests_pending: Gauge<U64>,
|
|
dht_event_received: CounterVec<U64>,
|
|
handle_value_found_event_failure: Counter<U64>,
|
|
known_authorities_count: Gauge<U64>,
|
|
}
|
|
|
|
impl Metrics {
|
|
pub(crate) fn register(registry: &prometheus_endpoint::Registry) -> Result<Self> {
|
|
Ok(Self {
|
|
publish: register(
|
|
Counter::new(
|
|
"substrate_authority_discovery_times_published_total",
|
|
"Number of times authority discovery has published external addresses.",
|
|
)?,
|
|
registry,
|
|
)?,
|
|
amount_addresses_last_published: register(
|
|
Gauge::new(
|
|
"substrate_authority_discovery_amount_external_addresses_last_published",
|
|
"Number of external addresses published when authority discovery last \
|
|
published addresses.",
|
|
)?,
|
|
registry,
|
|
)?,
|
|
requests: register(
|
|
Counter::new(
|
|
"substrate_authority_discovery_authority_addresses_requested_total",
|
|
"Number of times authority discovery has requested external addresses of a \
|
|
single authority.",
|
|
)?,
|
|
registry,
|
|
)?,
|
|
requests_pending: register(
|
|
Gauge::new(
|
|
"substrate_authority_discovery_authority_address_requests_pending",
|
|
"Number of pending authority address requests.",
|
|
)?,
|
|
registry,
|
|
)?,
|
|
dht_event_received: register(
|
|
CounterVec::new(
|
|
Opts::new(
|
|
"substrate_authority_discovery_dht_event_received",
|
|
"Number of dht events received by authority discovery.",
|
|
),
|
|
&["name"],
|
|
)?,
|
|
registry,
|
|
)?,
|
|
handle_value_found_event_failure: register(
|
|
Counter::new(
|
|
"substrate_authority_discovery_handle_value_found_event_failure",
|
|
"Number of times handling a dht value found event failed.",
|
|
)?,
|
|
registry,
|
|
)?,
|
|
known_authorities_count: register(
|
|
Gauge::new(
|
|
"substrate_authority_discovery_known_authorities_count",
|
|
"Number of authorities known by authority discovery.",
|
|
)?,
|
|
registry,
|
|
)?,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Helper functions for unit testing.
|
|
#[cfg(test)]
|
|
impl<Block, Client, DhtEventStream> Worker<Client, Block, DhtEventStream> {
|
|
pub(crate) fn inject_addresses(&mut self, authority: AuthorityId, addresses: Vec<Multiaddr>) {
|
|
self.addr_cache.insert(authority, addresses);
|
|
}
|
|
}
|