mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-12 19:21:13 +00:00
Rewrite telemetry using libp2p (#2812)
* Rewrite telemetry using libp2p * Update the Cargo.lock files * Apply suggestion
This commit is contained in:
committed by
Gavin Wood
parent
6130ff3f46
commit
57afa9b440
+190
-220
@@ -14,38 +14,95 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Telemetry utils.
|
||||
//! Telemetry utilities.
|
||||
//!
|
||||
//! Calling `init_telemetry` registers a global `slog` logger using `slog_scope::set_global_logger`.
|
||||
//! After that, calling `slog_scope::with_logger` will return a logger that sends information to
|
||||
//! the telemetry endpoints. The `telemetry!` macro is a short-cut for calling
|
||||
//! `slog_scope::with_logger` followed with `slog_log!`.
|
||||
//!
|
||||
//! Note that you are supposed to only ever use `telemetry!` and not `slog_scope::with_logger` at
|
||||
//! the moment. Substate may eventually be reworked to get proper `slog` support, including sending
|
||||
//! information to the telemetry.
|
||||
//!
|
||||
//! The `Telemetry` struct implements `Future` and must be polled regularly (or sent to a
|
||||
//! background thread/task) in order for the telemetry to properly function. Dropping the object
|
||||
//! will also deregister the global logger and replace it with a logger that discards messages.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! ```no_run
|
||||
//! let telemetry = substrate_telemetry::init_telemetry(substrate_telemetry::TelemetryConfig {
|
||||
//! endpoints: substrate_telemetry::TelemetryEndpoints::new(vec![
|
||||
//! // The `0` is the maximum verbosity level of messages to send to this endpoint.
|
||||
//! ("wss://example.com".into(), 0)
|
||||
//! ]),
|
||||
//! on_connect: Box::new(|| {}),
|
||||
//! // Can be used to pass an external implementation of WebSockets.
|
||||
//! wasm_external_transport: None,
|
||||
//! });
|
||||
//!
|
||||
//! // The `telemetry` object implements `Future` and must be processed.
|
||||
//! std::thread::spawn(move || {
|
||||
//! tokio::run(telemetry);
|
||||
//! });
|
||||
//!
|
||||
//! // Sends a message on the telemetry.
|
||||
//! substrate_telemetry::telemetry!(substrate_telemetry::SUBSTRATE_INFO; "test";
|
||||
//! "foo" => "bar",
|
||||
//! )
|
||||
//! ```
|
||||
//!
|
||||
//! `telemetry` macro may be used anywhere in the Substrate codebase
|
||||
//! in order to send real-time logging information to the telemetry
|
||||
//! server (if there is one). We use the async drain adapter of `slog`
|
||||
//! so that the logging thread doesn't get held up at all.
|
||||
|
||||
use std::{io, time, thread};
|
||||
use std::sync::Arc;
|
||||
use futures::{prelude::*, task::AtomicTask};
|
||||
use libp2p::{Multiaddr, wasm_ext};
|
||||
use log::{trace, warn};
|
||||
use parking_lot::Mutex;
|
||||
use slog::{Drain, o, OwnedKVList, Record};
|
||||
use log::trace;
|
||||
use rand::{thread_rng, Rng};
|
||||
use serde::{Serialize, Deserialize};
|
||||
use std::sync::{Arc, Weak};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
pub use slog_scope::with_logger;
|
||||
pub use slog;
|
||||
use serde::{Serialize, Deserialize};
|
||||
use core::result;
|
||||
|
||||
mod worker;
|
||||
|
||||
/// Configuration for telemetry.
|
||||
pub struct TelemetryConfig {
|
||||
/// Collection of telemetry WebSocket servers with a corresponding verbosity level.
|
||||
pub endpoints: TelemetryEndpoints,
|
||||
/// What do do when we connect to the servers.
|
||||
/// Note that this closure is executed each time we connect to a telemetry endpoint.
|
||||
|
||||
/// What to do when we connect to a server.
|
||||
///
|
||||
/// This closure is executed each time we connect to a telemetry endpoint, either for the first
|
||||
/// time or after being disconnected.
|
||||
pub on_connect: Box<dyn Fn() + Send + Sync + 'static>,
|
||||
|
||||
/// Optional external implementation of a libp2p transport. Used in WASM contexts where we need
|
||||
/// some binding between the networking provided by the operating system or environment and
|
||||
/// libp2p.
|
||||
///
|
||||
/// This parameter exists whatever the target platform is, but it is expected to be set to
|
||||
/// `Some` only when compiling for WASM.
|
||||
///
|
||||
/// > **Important**: Each individual call to `write` corresponds to one message. There is no
|
||||
/// > internal buffering going on. In the context of WebSockets, each `write`
|
||||
/// > must be one individual WebSockets frame.
|
||||
pub wasm_external_transport: Option<wasm_ext::ExtTransport>,
|
||||
}
|
||||
|
||||
/// Telemetry service guard.
|
||||
pub type Telemetry = slog_scope::GlobalLoggerGuard;
|
||||
/// List of telemetry servers we want to talk to. Contains the URL of the server, and the
|
||||
/// maximum verbosity level.
|
||||
///
|
||||
/// The URL string can be either a URL or a multiaddress.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TelemetryEndpoints(Vec<(String, u8)>);
|
||||
|
||||
/// Size of the channel for passing messages to telemetry thread.
|
||||
const CHANNEL_SIZE: usize = 262_144;
|
||||
impl TelemetryEndpoints {
|
||||
pub fn new(endpoints: Vec<(String, u8)>) -> Self {
|
||||
TelemetryEndpoints(endpoints)
|
||||
}
|
||||
}
|
||||
|
||||
/// Log levels.
|
||||
pub const SUBSTRATE_DEBUG: &str = "9";
|
||||
@@ -56,105 +113,135 @@ pub const CONSENSUS_DEBUG: &str = "5";
|
||||
pub const CONSENSUS_WARN: &str = "4";
|
||||
pub const CONSENSUS_INFO: &str = "0";
|
||||
|
||||
/// Multiply logging to all drains. This is similar to `slog::Duplicate`, which is
|
||||
/// limited to two drains though and doesn't support dynamic nesting at runtime.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Multiply<D: Drain> (pub Vec<D>);
|
||||
/// Telemetry object. Implements `Future` and must be polled regularly.
|
||||
/// Contains an `Arc` and can be cloned and pass around. Only one clone needs to be polled
|
||||
/// regularly.
|
||||
/// Dropping all the clones unregisters the telemetry.
|
||||
#[derive(Clone)]
|
||||
pub struct Telemetry {
|
||||
inner: Arc<TelemetryInner>,
|
||||
/// Slog guard so that we don't get deregistered.
|
||||
_guard: Arc<slog_scope::GlobalLoggerGuard>,
|
||||
}
|
||||
|
||||
impl<D: Drain> Multiply<D> {
|
||||
pub fn new(v: Vec<D>) -> Self {
|
||||
Multiply(v)
|
||||
// Implementation notes: considering that logging can happen at any moment, we only have two
|
||||
// options: locking a mutex (which we currently do), or using a channel (which we should do).
|
||||
// At the moment, `slog` doesn't provide any easy way to serialize records in order to send them
|
||||
// over a channel, but ideally that's what should be done.
|
||||
|
||||
/// Shared between `Telemetry` and `TelemetryDrain`.
|
||||
struct TelemetryInner {
|
||||
/// Worker for the telemetry.
|
||||
worker: Mutex<worker::TelemetryWorker>,
|
||||
/// Same field as in the configuration. Called when we connected to an endpoint.
|
||||
on_connect: Box<dyn Fn() + Send + Sync + 'static>,
|
||||
/// Task to wake up when we add a log entry to the worker.
|
||||
polling_task: AtomicTask,
|
||||
}
|
||||
|
||||
/// Implements `slog::Drain`.
|
||||
struct TelemetryDrain {
|
||||
inner: std::panic::AssertUnwindSafe<Weak<TelemetryInner>>,
|
||||
}
|
||||
|
||||
/// Initializes the telemetry. See the crate root documentation for more information.
|
||||
///
|
||||
/// Please be careful to not call this function twice in the same program. The `slog` crate
|
||||
/// doesn't provide any way of knowing whether a global logger has already been registered.
|
||||
pub fn init_telemetry(config: TelemetryConfig) -> Telemetry {
|
||||
// Build the list of telemetry endpoints.
|
||||
let mut endpoints = Vec::new();
|
||||
for &(ref url, verbosity) in &config.endpoints.0 {
|
||||
match url_to_multiaddr(url) {
|
||||
Ok(addr) => endpoints.push((addr, verbosity)),
|
||||
Err(err) => warn!(target: "telemetry", "Invalid telemetry URL {}: {}", url, err),
|
||||
}
|
||||
}
|
||||
|
||||
let inner = Arc::new(TelemetryInner {
|
||||
worker: Mutex::new(worker::TelemetryWorker::new(endpoints, config.wasm_external_transport)),
|
||||
on_connect: config.on_connect,
|
||||
polling_task: AtomicTask::new(),
|
||||
});
|
||||
|
||||
let guard = {
|
||||
let logger = TelemetryDrain { inner: std::panic::AssertUnwindSafe(Arc::downgrade(&inner)) };
|
||||
let root = slog::Logger::root(slog::Drain::fuse(logger), slog::o!());
|
||||
slog_scope::set_global_logger(root)
|
||||
};
|
||||
|
||||
Telemetry {
|
||||
inner,
|
||||
_guard: Arc::new(guard),
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Drain> Drain for Multiply<D> {
|
||||
type Ok = Vec<D::Ok>;
|
||||
type Err = Vec<D::Err>;
|
||||
impl Future for Telemetry {
|
||||
type Item = ();
|
||||
type Error = ();
|
||||
|
||||
fn log(&self, record: &Record, logger_values: &OwnedKVList) -> result::Result<Self::Ok, Self::Err> {
|
||||
let mut oks = Vec::new();
|
||||
let mut errs = Vec::new();
|
||||
fn poll(&mut self) -> Poll<(), ()> {
|
||||
let before = Instant::now();
|
||||
|
||||
self.0.iter().for_each(|l| {
|
||||
let res: Result<<D as Drain>::Ok, <D as Drain>::Err> = (*l).log(record, logger_values);
|
||||
match res {
|
||||
Ok(o) => oks.push(o),
|
||||
Err(e) => errs.push(e),
|
||||
let mut has_connected = false;
|
||||
while let Async::Ready(event) = self.inner.worker.lock().poll() {
|
||||
// Right now we only have one possible event. This line is here in order to not
|
||||
// forget to handle any possible new event type.
|
||||
let worker::TelemetryWorkerEvent::Connected = event;
|
||||
has_connected = true;
|
||||
}
|
||||
|
||||
if before.elapsed() > Duration::from_millis(200) {
|
||||
warn!(target: "telemetry", "Polling the telemetry took more than 200ms");
|
||||
}
|
||||
|
||||
// We use an intermediary variable `has_connected` so that the lock is released when we
|
||||
// call `on_connect`.
|
||||
if has_connected {
|
||||
trace!(target: "telemetry", "Running on_connect handlers");
|
||||
(self.inner.on_connect)();
|
||||
}
|
||||
|
||||
self.inner.polling_task.register();
|
||||
Ok(Async::NotReady)
|
||||
}
|
||||
}
|
||||
|
||||
impl slog::Drain for TelemetryDrain {
|
||||
type Ok = ();
|
||||
type Err = ();
|
||||
|
||||
fn log(&self, record: &slog::Record, values: &slog::OwnedKVList) -> Result<Self::Ok, Self::Err> {
|
||||
if let Some(inner) = self.inner.0.upgrade() {
|
||||
let before = Instant::now();
|
||||
let result = inner.worker.lock().log(record, values);
|
||||
inner.polling_task.notify();
|
||||
if before.elapsed() > Duration::from_millis(50) {
|
||||
warn!(target: "telemetry", "Writing a telemetry log took more than 50ms");
|
||||
}
|
||||
});
|
||||
|
||||
if !errs.is_empty() {
|
||||
result::Result::Err(errs)
|
||||
result
|
||||
} else {
|
||||
result::Result::Ok(oks)
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize telemetry.
|
||||
pub fn init_telemetry(config: TelemetryConfig) -> slog_scope::GlobalLoggerGuard {
|
||||
let mut endpoint_drains: Vec<Box<slog::Filter<_, _>>> = Vec::new();
|
||||
let mut out_syncs = Vec::new();
|
||||
/// Parses a WebSocket URL into a libp2p `Multiaddr`.
|
||||
fn url_to_multiaddr(url: &str) -> Result<Multiaddr, libp2p::multiaddr::Error> {
|
||||
// First, assume that we have a `Multiaddr`.
|
||||
let parse_error = match url.parse() {
|
||||
Ok(ma) => return Ok(ma),
|
||||
Err(err) => err,
|
||||
};
|
||||
|
||||
// Set up a filter/drain for each endpoint
|
||||
config.endpoints.0.iter().for_each(|(url, verbosity)| {
|
||||
let writer = TelemetryWriter::new(Arc::new(url.to_owned()));
|
||||
let out_sync = writer.out.clone();
|
||||
out_syncs.push(out_sync);
|
||||
// If not, try the `ws://path/url` format.
|
||||
if let Ok(ma) = libp2p::multiaddr::from_url(url) {
|
||||
return Ok(ma)
|
||||
}
|
||||
|
||||
let until_verbosity = *verbosity;
|
||||
let filter = slog::Filter(
|
||||
slog_json::Json::default(writer).fuse(),
|
||||
move |rec| {
|
||||
let tag = rec.tag().parse::<u8>()
|
||||
.expect("`telemetry!` macro requires tag.");
|
||||
tag <= until_verbosity
|
||||
});
|
||||
|
||||
let filter = Box::new(filter) as Box<slog::Filter<_, _>>;
|
||||
endpoint_drains.push(filter);
|
||||
});
|
||||
|
||||
// Set up logging to all endpoints
|
||||
let drain = slog_async::Async::new(Multiply::new(endpoint_drains).fuse());
|
||||
let root = slog::Logger::root(drain.chan_size(CHANNEL_SIZE)
|
||||
.overflow_strategy(slog_async::OverflowStrategy::DropAndReport)
|
||||
.build().fuse(), o!()
|
||||
);
|
||||
let logger_guard = slog_scope::set_global_logger(root);
|
||||
|
||||
// Spawn a thread for each endpoint
|
||||
let on_connect = Arc::new(config.on_connect);
|
||||
config.endpoints.0.into_iter().for_each(|(url, verbosity)| {
|
||||
let inner_verbosity = Arc::new(verbosity.to_owned());
|
||||
let inner_on_connect = Arc::clone(&on_connect);
|
||||
|
||||
let out_sync = out_syncs.remove(0);
|
||||
let out_sync = Arc::clone(&out_sync);
|
||||
|
||||
thread::spawn(move || {
|
||||
loop {
|
||||
let on_connect = Arc::clone(&inner_on_connect);
|
||||
let out_sync = Arc::clone(&out_sync);
|
||||
let verbosity = Arc::clone(&inner_verbosity);
|
||||
|
||||
trace!(target: "telemetry",
|
||||
"Connecting to Telemetry at {} with verbosity {}", url, Arc::clone(&verbosity));
|
||||
|
||||
let _ = ws::connect(url.to_owned(),
|
||||
|out| {
|
||||
Connection::new(out, Arc::clone(&out_sync), Arc::clone(&on_connect), url.clone())
|
||||
});
|
||||
|
||||
// Sleep for a random time between 5-10 secs. If there are general connection
|
||||
// issues not all threads should be synchronized in their re-connection time.
|
||||
let random_sleep = thread_rng().gen_range(0, 5);
|
||||
thread::sleep(time::Duration::from_secs(5) + time::Duration::from_secs(random_sleep));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
logger_guard
|
||||
// If we have no clue about the format of that string, assume that we were expecting a
|
||||
// `Multiaddr`.
|
||||
Err(parse_error)
|
||||
}
|
||||
|
||||
/// Translates to `slog_scope::info`, but contains an additional verbosity
|
||||
@@ -166,122 +253,5 @@ macro_rules! telemetry {
|
||||
$crate::with_logger(|l| {
|
||||
$crate::slog::slog_info!(l, #$a, $b; $($t)* )
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct Connection {
|
||||
out: ws::Sender,
|
||||
out_sync: Arc<Mutex<Option<ws::Sender>>>,
|
||||
on_connect: Arc<Box<dyn Fn() + Send + Sync + 'static>>,
|
||||
url: String,
|
||||
}
|
||||
|
||||
impl Connection {
|
||||
fn new(
|
||||
out: ws::Sender,
|
||||
out_sync: Arc<Mutex<Option<ws::Sender>>>,
|
||||
on_connect: Arc<Box<dyn Fn() + Send + Sync + 'static>>,
|
||||
url: String
|
||||
) -> Self {
|
||||
Connection {
|
||||
out,
|
||||
out_sync,
|
||||
on_connect,
|
||||
url,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ws::Handler for Connection {
|
||||
fn on_open(&mut self, _: ws::Handshake) -> ws::Result<()> {
|
||||
trace!(target: "telemetry", "Connected to {}!", self.url);
|
||||
|
||||
*self.out_sync.lock() = Some(self.out.clone());
|
||||
(self.on_connect)();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn on_close(&mut self, code: ws::CloseCode, reason: &str) {
|
||||
*self.out_sync.lock() = None;
|
||||
|
||||
trace!(target: "telemetry", "Connection to {} closing due to ({:?}) {}",
|
||||
self.url, code, reason);
|
||||
}
|
||||
|
||||
fn on_error(&mut self, _: ws::Error) {
|
||||
*self.out_sync.lock() = None;
|
||||
|
||||
// Sleep to ensure that reconnecting isn't spamming logs.
|
||||
// This happens in it's own thread so it won't block anything.
|
||||
thread::sleep(time::Duration::from_millis(1000));
|
||||
}
|
||||
}
|
||||
|
||||
struct TelemetryWriter {
|
||||
buffer: Vec<u8>,
|
||||
out: Arc<Mutex<Option<ws::Sender>>>,
|
||||
url: Arc<String>,
|
||||
}
|
||||
|
||||
impl TelemetryWriter {
|
||||
fn new(url: Arc<String>) -> Self {
|
||||
let out = Arc::new(Mutex::new(None));
|
||||
|
||||
TelemetryWriter {
|
||||
buffer: Vec::new(),
|
||||
out,
|
||||
url,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Write for TelemetryWriter {
|
||||
fn write(&mut self, msg: &[u8]) -> io::Result<usize> {
|
||||
let mut iter = msg.split(|x| *x == b'\n');
|
||||
let first = iter.next().expect("Split iterator always has at least one element; qed");
|
||||
|
||||
self.buffer.extend_from_slice(first);
|
||||
|
||||
// Flush for each occurrence of new line character
|
||||
for continued in iter {
|
||||
let _ = self.flush();
|
||||
self.buffer.extend_from_slice(continued);
|
||||
}
|
||||
|
||||
Ok(msg.len())
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
if self.buffer.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
if let Ok(s) = ::std::str::from_utf8(&self.buffer[..]) {
|
||||
let mut out = self.out.lock();
|
||||
|
||||
let error = if let Some(ref mut o) = *out {
|
||||
let r = o.send(s);
|
||||
trace!(target: "telemetry", "Sent to telemetry {}: {} -> {:?}", self.url, s, r);
|
||||
|
||||
r.is_err()
|
||||
} else {
|
||||
trace!(target: "telemetry", "Telemetry socket closed to {}, failed to send: {}", self.url, s);
|
||||
false
|
||||
};
|
||||
|
||||
if error {
|
||||
*out = None;
|
||||
}
|
||||
}
|
||||
self.buffer.clear();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TelemetryEndpoints (Vec<(String, u8)>);
|
||||
|
||||
impl TelemetryEndpoints {
|
||||
pub fn new(endpoints: Vec<(String, u8)>) -> Self {
|
||||
TelemetryEndpoints(endpoints)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,203 @@
|
||||
// Copyright 2017-2019 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Substrate.
|
||||
|
||||
// Substrate is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Substrate is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Contains the object that makes the telemetry work.
|
||||
//!
|
||||
//! # Usage
|
||||
//!
|
||||
//! - Create a `TelemetryWorker` with `TelemetryWorker::new`.
|
||||
//! - Send messages to the telemetry with `TelemetryWorker::send_message`. Messages will only be
|
||||
//! sent to the appropriate targets. Messages may be ignored if the target happens to be
|
||||
//! temporarily unreachable.
|
||||
//! - You must appropriately poll the worker with `TelemetryWorker::poll`. Polling will/may produce
|
||||
//! events indicating what happened since the latest polling.
|
||||
//!
|
||||
|
||||
use bytes::BytesMut;
|
||||
use futures::prelude::*;
|
||||
use libp2p::{core::transport::OptionalTransport, core::ConnectedPoint, Multiaddr, Transport, wasm_ext};
|
||||
use log::{trace, warn, error};
|
||||
use slog::Drain;
|
||||
use std::{io, time};
|
||||
use tokio_io::AsyncWrite;
|
||||
|
||||
mod node;
|
||||
|
||||
/// Timeout after which a connection attempt is considered failed. Includes the WebSocket HTTP
|
||||
/// upgrading.
|
||||
const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(20);
|
||||
|
||||
/// Event generated when polling the worker.
|
||||
#[derive(Debug)]
|
||||
pub enum TelemetryWorkerEvent {
|
||||
/// We have established a connection to one of the telemetry endpoint, either for the first
|
||||
/// time or after having been disconnected earlier.
|
||||
Connected,
|
||||
}
|
||||
|
||||
/// Telemetry processing machine.
|
||||
#[derive(Debug)]
|
||||
pub struct TelemetryWorker {
|
||||
/// List of nodes with their maximum verbosity level.
|
||||
nodes: Vec<(node::Node<WsTrans>, u8)>,
|
||||
}
|
||||
|
||||
/// The pile of libp2p transports.
|
||||
#[cfg(not(target_os = "unknown"))]
|
||||
type WsTrans = libp2p::core::transport::timeout::TransportTimeout<
|
||||
libp2p::core::transport::OrTransport<
|
||||
libp2p::core::transport::map::Map<
|
||||
OptionalTransport<wasm_ext::ExtTransport>,
|
||||
fn(wasm_ext::Connection, ConnectedPoint) -> StreamSink<wasm_ext::Connection>
|
||||
>,
|
||||
libp2p::websocket::framed::WsConfig<libp2p::dns::DnsConfig<libp2p::tcp::TcpConfig>>
|
||||
>
|
||||
>;
|
||||
#[cfg(target_os = "unknown")]
|
||||
type WsTrans = libp2p::core::transport::timeout::TransportTimeout<
|
||||
libp2p::core::transport::map::Map<
|
||||
OptionalTransport<wasm_ext::ExtTransport>,
|
||||
fn(wasm_ext::Connection, ConnectedPoint) -> StreamSink<wasm_ext::Connection>
|
||||
>
|
||||
>;
|
||||
|
||||
impl TelemetryWorker {
|
||||
/// Builds a new `TelemetryWorker`.
|
||||
///
|
||||
/// The endpoints must be a list of targets, plus a verbosity level. When you send a message
|
||||
/// to the telemetry, only the targets whose verbosity is higher than the verbosity of the
|
||||
/// message will receive it.
|
||||
pub fn new(
|
||||
endpoints: impl IntoIterator<Item = (Multiaddr, u8)>,
|
||||
wasm_external_transport: impl Into<Option<wasm_ext::ExtTransport>>
|
||||
) -> Self {
|
||||
let transport = match wasm_external_transport.into() {
|
||||
Some(t) => OptionalTransport::some(t),
|
||||
None => OptionalTransport::none()
|
||||
}.map((|inner, _| StreamSink(inner)) as fn(_, _) -> _);
|
||||
|
||||
// The main transport is the `wasm_external_transport`, but if we're on desktop we add
|
||||
// support for TCP+WebSocket+DNS as a fallback. In practice, you're not expected to pass
|
||||
// an external transport on desktop and the fallback is used all the time.
|
||||
#[cfg(not(target_os = "unknown"))]
|
||||
let transport = transport.or_transport({
|
||||
let inner = libp2p::dns::DnsConfig::new(libp2p::tcp::TcpConfig::new());
|
||||
libp2p::websocket::framed::WsConfig::new(inner)
|
||||
});
|
||||
|
||||
let transport = transport.with_timeout(CONNECT_TIMEOUT);
|
||||
|
||||
TelemetryWorker {
|
||||
nodes: endpoints.into_iter().map(|(addr, verbosity)| {
|
||||
let node = node::Node::new(transport.clone(), addr);
|
||||
(node, verbosity)
|
||||
}).collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Polls the worker for events that happened.
|
||||
pub fn poll(&mut self) -> Async<TelemetryWorkerEvent> {
|
||||
for (node, _) in &mut self.nodes {
|
||||
loop {
|
||||
match node.poll() {
|
||||
Async::Ready(node::NodeEvent::Connected) =>
|
||||
return Async::Ready(TelemetryWorkerEvent::Connected),
|
||||
Async::Ready(node::NodeEvent::Disconnected(_)) => continue,
|
||||
Async::NotReady => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Async::NotReady
|
||||
}
|
||||
|
||||
/// Equivalent to `slog::Drain::log`, but takes `self` by `&mut` instead, which is more convenient.
|
||||
///
|
||||
/// Keep in mind that you should call `TelemetryWorker::poll` in order to process the messages.
|
||||
/// You should call this function right after calling `slog::Drain::log`.
|
||||
pub fn log(&mut self, record: &slog::Record, values: &slog::OwnedKVList) -> Result<(), ()> {
|
||||
let msg_verbosity = match record.tag().parse::<u8>() {
|
||||
Ok(v) => v,
|
||||
Err(err) => {
|
||||
warn!(target: "telemetry", "Failed to parse telemetry tag {:?}: {:?}",
|
||||
record.tag(), err);
|
||||
return Err(())
|
||||
}
|
||||
};
|
||||
|
||||
// None of the nodes want that verbosity, so just return without doing any serialization.
|
||||
if self.nodes.iter().all(|(_, node_max_verbosity)| msg_verbosity > *node_max_verbosity) {
|
||||
trace!(
|
||||
target: "telemetry",
|
||||
"Skipping log entry because verbosity {:?} is too high for all endpoints",
|
||||
msg_verbosity
|
||||
);
|
||||
return Ok(())
|
||||
}
|
||||
|
||||
// Turn the message into JSON.
|
||||
let serialized = {
|
||||
let mut out = Vec::new();
|
||||
slog_json::Json::default(&mut out).log(record, values).map_err(|_| ())?;
|
||||
out
|
||||
};
|
||||
|
||||
for (node, node_max_verbosity) in &mut self.nodes {
|
||||
if msg_verbosity > *node_max_verbosity {
|
||||
trace!(target: "telemetry", "Skipping {:?} for log entry with verbosity {:?}",
|
||||
node.addr(), msg_verbosity);
|
||||
continue;
|
||||
}
|
||||
|
||||
// `send_message` returns an error if we're not connected, which we silently ignore.
|
||||
let _ = node.send_message(serialized.clone());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Wraps around an `AsyncWrite` and implements `Sink`. Guarantees that each item being sent maps
|
||||
/// to one call of `write`.
|
||||
///
|
||||
/// For some context, we put this object around the `wasm_ext::ExtTransport` in order to make sure
|
||||
/// that each telemetry message maps to one single call to `write` in the WASM FFI.
|
||||
struct StreamSink<T>(T);
|
||||
impl<T: AsyncWrite> Sink for StreamSink<T> {
|
||||
type SinkItem = BytesMut;
|
||||
type SinkError = io::Error;
|
||||
|
||||
fn start_send(&mut self, item: Self::SinkItem) -> Result<AsyncSink<Self::SinkItem>, io::Error> {
|
||||
match self.0.write(&item[..]) {
|
||||
Ok(n) if n == item.len() => Ok(AsyncSink::Ready),
|
||||
Ok(_) => {
|
||||
error!(target: "telemetry",
|
||||
"Detected some internal buffering happening in the telemetry");
|
||||
Err(io::Error::new(io::ErrorKind::Other, "Internal buffering detected"))
|
||||
},
|
||||
Err(ref err) if err.kind() == io::ErrorKind::WouldBlock => Ok(AsyncSink::NotReady(item)),
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_complete(&mut self) -> Poll<(), io::Error> {
|
||||
match self.0.flush() {
|
||||
Ok(()) => Ok(Async::Ready(())),
|
||||
Err(ref err) if err.kind() == io::ErrorKind::WouldBlock => Ok(Async::NotReady),
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,220 @@
|
||||
// Copyright 2017-2019 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Substrate.
|
||||
|
||||
// Substrate is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Substrate is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Contains the `Node` struct, which handles communications with a single telemetry endpoint.
|
||||
|
||||
use bytes::BytesMut;
|
||||
use futures::prelude::*;
|
||||
use libp2p::Multiaddr;
|
||||
use libp2p::core::transport::Transport;
|
||||
use log::{trace, debug, warn, error};
|
||||
use rand::Rng as _;
|
||||
use std::{collections::VecDeque, fmt, mem, time::Duration, time::Instant};
|
||||
use tokio_timer::Delay;
|
||||
|
||||
/// Maximum number of pending telemetry messages.
|
||||
const MAX_PENDING: usize = 10;
|
||||
|
||||
/// Handler for a single telemetry node.
|
||||
pub struct Node<TTrans: Transport> {
|
||||
/// Address of the node.
|
||||
addr: Multiaddr,
|
||||
/// State of the connection.
|
||||
socket: NodeSocket<TTrans>,
|
||||
/// Transport used to establish new connections.
|
||||
transport: TTrans,
|
||||
}
|
||||
|
||||
enum NodeSocket<TTrans: Transport> {
|
||||
/// We're connected to the node. This is the normal state.
|
||||
Connected(NodeSocketConnected<TTrans>),
|
||||
/// We are currently dialing the node.
|
||||
Dialing(TTrans::Dial),
|
||||
/// A new connection should be started as soon as possible.
|
||||
ReconnectNow,
|
||||
/// Waiting before attempting to dial again.
|
||||
WaitingReconnect(Delay),
|
||||
/// Temporary transition state.
|
||||
Poisoned,
|
||||
}
|
||||
|
||||
struct NodeSocketConnected<TTrans: Transport> {
|
||||
/// Where to send data.
|
||||
sink: TTrans::Output,
|
||||
/// Queue of packets to send.
|
||||
pending: VecDeque<BytesMut>,
|
||||
/// If true, we need to flush the sink.
|
||||
need_flush: bool,
|
||||
}
|
||||
|
||||
/// Event that can happen with this node.
|
||||
#[derive(Debug)]
|
||||
pub enum NodeEvent<TSinkErr> {
|
||||
/// We are now connected to this node.
|
||||
Connected,
|
||||
/// We are now disconnected from this node.
|
||||
Disconnected(TSinkErr),
|
||||
}
|
||||
|
||||
impl<TTrans: Transport> Node<TTrans> {
|
||||
/// Builds a new node handler.
|
||||
pub fn new(transport: TTrans, addr: Multiaddr) -> Self {
|
||||
Node {
|
||||
addr,
|
||||
socket: NodeSocket::ReconnectNow,
|
||||
transport,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the address that was passed to `new`.
|
||||
pub fn addr(&self) -> &Multiaddr {
|
||||
&self.addr
|
||||
}
|
||||
}
|
||||
|
||||
impl<TTrans: Transport, TSinkErr> Node<TTrans>
|
||||
where TTrans: Clone, TTrans::Output: Sink<SinkItem = BytesMut, SinkError = TSinkErr>,
|
||||
TSinkErr: fmt::Debug {
|
||||
/// Sends a WebSocket frame to the node. Returns an error if we are not connected to the node.
|
||||
///
|
||||
/// After calling this method, you should call `poll` in order for it to be properly processed.
|
||||
pub fn send_message(&mut self, payload: Vec<u8>) -> Result<(), ()> {
|
||||
if let NodeSocket::Connected(NodeSocketConnected { pending, .. }) = &mut self.socket {
|
||||
if pending.len() <= MAX_PENDING {
|
||||
trace!(target: "telemetry", "Adding log entry to queue for {:?}", self.addr);
|
||||
pending.push_back(payload.into());
|
||||
Ok(())
|
||||
} else {
|
||||
warn!(target: "telemetry", "Rejected log entry because queue is full for {:?}",
|
||||
self.addr);
|
||||
Err(())
|
||||
}
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Polls the node for updates. Must be performed regularly.
|
||||
pub fn poll(&mut self) -> Async<NodeEvent<TSinkErr>> {
|
||||
let mut socket = mem::replace(&mut self.socket, NodeSocket::Poisoned);
|
||||
self.socket = loop {
|
||||
match socket {
|
||||
NodeSocket::Connected(mut conn) => match conn.poll(&self.addr) {
|
||||
Ok(Async::Ready(v)) => void::unreachable(v),
|
||||
Ok(Async::NotReady) => break NodeSocket::Connected(conn),
|
||||
Err(err) => {
|
||||
debug!(target: "telemetry", "Disconnected from {}: {:?}", self.addr, err);
|
||||
let timeout = gen_rand_reconnect_delay();
|
||||
self.socket = NodeSocket::WaitingReconnect(timeout);
|
||||
return Async::Ready(NodeEvent::Disconnected(err))
|
||||
}
|
||||
}
|
||||
NodeSocket::Dialing(mut s) => match s.poll() {
|
||||
Ok(Async::Ready(sink)) => {
|
||||
debug!(target: "telemetry", "Connected to {}", self.addr);
|
||||
let conn = NodeSocketConnected { sink, pending: VecDeque::new(), need_flush: false };
|
||||
self.socket = NodeSocket::Connected(conn);
|
||||
return Async::Ready(NodeEvent::Connected)
|
||||
},
|
||||
Ok(Async::NotReady) => break NodeSocket::Dialing(s),
|
||||
Err(err) => {
|
||||
debug!(target: "telemetry", "Error while dialing {}: {:?}", self.addr, err);
|
||||
let timeout = gen_rand_reconnect_delay();
|
||||
socket = NodeSocket::WaitingReconnect(timeout);
|
||||
}
|
||||
}
|
||||
NodeSocket::ReconnectNow => match self.transport.clone().dial(self.addr.clone()) {
|
||||
Ok(d) => {
|
||||
debug!(target: "telemetry", "Started dialing {}", self.addr);
|
||||
socket = NodeSocket::Dialing(d);
|
||||
}
|
||||
Err(err) => {
|
||||
debug!(target: "telemetry", "Error while dialing {}: {:?}", self.addr, err);
|
||||
let timeout = gen_rand_reconnect_delay();
|
||||
socket = NodeSocket::WaitingReconnect(timeout);
|
||||
}
|
||||
}
|
||||
NodeSocket::WaitingReconnect(mut s) => if let Ok(Async::Ready(_)) = s.poll() {
|
||||
socket = NodeSocket::ReconnectNow;
|
||||
} else {
|
||||
break NodeSocket::WaitingReconnect(s)
|
||||
}
|
||||
NodeSocket::Poisoned => {
|
||||
error!(target: "telemetry", "Poisoned connection with {}", self.addr);
|
||||
break NodeSocket::Poisoned
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Async::NotReady
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a `Delay` object with a random timeout.
|
||||
///
|
||||
/// If there are general connection issues, not all endpoints should be synchronized in their
|
||||
/// re-connection time.
|
||||
fn gen_rand_reconnect_delay() -> Delay {
|
||||
let random_delay = rand::thread_rng().gen_range(5, 10);
|
||||
Delay::new(Instant::now() + Duration::from_secs(random_delay))
|
||||
}
|
||||
|
||||
impl<TTrans: Transport, TSinkErr> NodeSocketConnected<TTrans>
|
||||
where TTrans::Output: Sink<SinkItem = BytesMut, SinkError = TSinkErr> {
|
||||
/// Processes the queue of messages for the connected socket.
|
||||
///
|
||||
/// The address is passed for logging purposes only.
|
||||
fn poll(&mut self, my_addr: &Multiaddr) -> Poll<void::Void, TSinkErr> {
|
||||
loop {
|
||||
if let Some(item) = self.pending.pop_front() {
|
||||
let item_len = item.len();
|
||||
if let AsyncSink::NotReady(item) = self.sink.start_send(item)? {
|
||||
self.pending.push_front(item);
|
||||
break
|
||||
} else {
|
||||
trace!(target: "telemetry", "Successfully sent {:?} bytes message to {}",
|
||||
item_len, my_addr);
|
||||
self.need_flush = true;
|
||||
}
|
||||
|
||||
} else if self.need_flush && self.sink.poll_complete()?.is_ready() {
|
||||
self.need_flush = false;
|
||||
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Async::NotReady)
|
||||
}
|
||||
}
|
||||
|
||||
impl<TTrans: Transport> fmt::Debug for Node<TTrans> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let state = match self.socket {
|
||||
NodeSocket::Connected(_) => "Connected",
|
||||
NodeSocket::Dialing(_) => "Dialing",
|
||||
NodeSocket::ReconnectNow => "Pending reconnect",
|
||||
NodeSocket::WaitingReconnect(_) => "Pending reconnect",
|
||||
NodeSocket::Poisoned => "Poisoned",
|
||||
};
|
||||
|
||||
f.debug_struct("Node")
|
||||
.field("addr", &self.addr)
|
||||
.field("state", &state)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user