Extract common part of relay loops (#660)

* extract common parts of relay loops: begin

* merge client impls

* backoff in exchange loop

* reconnect without clone
This commit is contained in:
Svyatoslav Nikolsky
2021-01-26 17:14:33 +03:00
committed by Bastian Köcher
parent 926520292e
commit 44bf84269a
23 changed files with 1016 additions and 776 deletions
+1
View File
@@ -29,6 +29,7 @@ pub const CONNECTION_ERROR_DELAY: Duration = Duration::from_secs(10);
pub mod initialize;
pub mod metrics;
pub mod relay_loop;
/// Block number traits shared by all chains that relay is able to serve.
pub trait BlockNumberBase:
+9 -7
View File
@@ -16,6 +16,7 @@
pub use substrate_prometheus_endpoint::{register, Counter, CounterVec, Gauge, GaugeVec, Opts, Registry, F64, U64};
use async_std::sync::{Arc, Mutex};
use std::net::SocketAddr;
use substrate_prometheus_endpoint::init_prometheus;
use sysinfo::{ProcessExt, RefreshKind, System, SystemExt};
@@ -36,9 +37,9 @@ pub trait Metrics {
}
/// Global Prometheus metrics.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct GlobalMetrics {
system: System,
system: Arc<Mutex<System>>,
system_average_load: GaugeVec<F64>,
process_cpu_usage_percentage: Gauge<F64>,
process_memory_usage_bytes: Gauge<U64>,
@@ -110,7 +111,7 @@ impl Metrics for GlobalMetrics {
impl Default for GlobalMetrics {
fn default() -> Self {
GlobalMetrics {
system: System::new_with_specifics(RefreshKind::everything()),
system: Arc::new(Mutex::new(System::new_with_specifics(RefreshKind::everything()))),
system_average_load: GaugeVec::new(Opts::new("system_average_load", "System load average"), &["over"])
.expect("metric is static and thus valid; qed"),
process_cpu_usage_percentage: Gauge::new("process_cpu_usage_percentage", "Process CPU usage")
@@ -126,9 +127,10 @@ impl Default for GlobalMetrics {
impl GlobalMetrics {
/// Update metrics.
pub fn update(&mut self) {
pub async fn update(&self) {
// update system-wide metrics
let load = self.system.get_load_average();
let mut system = self.system.lock().await;
let load = system.get_load_average();
self.system_average_load.with_label_values(&["1min"]).set(load.one);
self.system_average_load.with_label_values(&["5min"]).set(load.five);
self.system_average_load.with_label_values(&["15min"]).set(load.fifteen);
@@ -139,8 +141,8 @@ impl GlobalMetrics {
relay is not supposed to run in such MetricsParamss;\
qed",
);
let is_process_refreshed = self.system.refresh_process(pid);
match (is_process_refreshed, self.system.get_process(pid)) {
let is_process_refreshed = system.refresh_process(pid);
match (is_process_refreshed, system.get_process(pid)) {
(true, Some(process_info)) => {
let cpu_usage = process_info.cpu_usage() as f64;
let memory_usage = process_info.memory() * 1024;
+95
View File
@@ -0,0 +1,95 @@
// Copyright 2019-2020 Parity Technologies (UK) Ltd.
// This file is part of Parity Bridges Common.
// Parity Bridges Common is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity Bridges Common is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity Bridges Common. If not, see <http://www.gnu.org/licenses/>.
use crate::{FailedClient, MaybeConnectionError};
use async_trait::async_trait;
use std::{fmt::Debug, future::Future, time::Duration};
/// Default pause between reconnect attempts.
pub const RECONNECT_DELAY: Duration = Duration::from_secs(10);
/// Basic blockchain client from relay perspective.
#[async_trait]
pub trait Client: Clone + Send + Sync {
/// Type of error this clients returns.
type Error: Debug + MaybeConnectionError;
/// Try to reconnect to source node.
async fn reconnect(&mut self) -> Result<(), Self::Error>;
}
/// Run relay loop.
///
/// This function represents an outer loop, which in turn calls provided `loop_run` function to do
/// actual job. When `loop_run` returns, this outer loop reconnects to failed client (source,
/// target or both) and calls `loop_run` again.
pub fn run<SC: Client, TC: Client, R, F>(
reconnect_delay: Duration,
mut source_client: SC,
mut target_client: TC,
loop_run: R,
) where
R: Fn(SC, TC) -> F,
F: Future<Output = Result<(), FailedClient>>,
{
let mut local_pool = futures::executor::LocalPool::new();
local_pool.run_until(async move {
loop {
let result = loop_run(source_client.clone(), target_client.clone()).await;
match result {
Ok(()) => break,
Err(failed_client) => loop {
async_std::task::sleep(reconnect_delay).await;
if failed_client == FailedClient::Both || failed_client == FailedClient::Source {
match source_client.reconnect().await {
Ok(()) => (),
Err(error) => {
log::warn!(
target: "bridge",
"Failed to reconnect to source client. Going to retry in {}s: {:?}",
reconnect_delay.as_secs(),
error,
);
continue;
}
}
}
if failed_client == FailedClient::Both || failed_client == FailedClient::Target {
match target_client.reconnect().await {
Ok(()) => (),
Err(error) => {
log::warn!(
target: "bridge",
"Failed to reconnect to target client. Going to retry in {}s: {:?}",
reconnect_delay.as_secs(),
error,
);
continue;
}
}
}
break;
},
}
log::debug!(target: "bridge", "Restarting relay loop");
}
});
}