rpc: bump jsonrpsee v0.22 and fix race in rpc v2 chain_head (#3230)

Close #2992 

Breaking changes:
- rpc server grafana metric `substrate_rpc_requests_started` is removed
(not possible to implement anymore)
- rpc server grafana metric `substrate_rpc_requests_finished` is removed
(not possible to implement anymore)
- rpc server ws ping/pong not ACK:ed within 30 seconds more than three
times then the connection will be closed

Added
- rpc server grafana metric `substrate_rpc_sessions_time` is added to
get the duration for each websocket session
This commit is contained in:
Niklas Adolfsson
2024-02-14 23:18:22 +01:00
committed by GitHub
parent 7e7c488ba8
commit c7c4fe0184
53 changed files with 777 additions and 468 deletions
+85 -16
View File
@@ -22,21 +22,32 @@
pub mod middleware;
use std::{error::Error as StdError, net::SocketAddr, time::Duration};
use std::{convert::Infallible, error::Error as StdError, net::SocketAddr, time::Duration};
use http::header::HeaderValue;
use hyper::{
server::conn::AddrStream,
service::{make_service_fn, service_fn},
};
use jsonrpsee::{
server::middleware::{HostFilterLayer, ProxyGetRequestLayer},
RpcModule,
server::{
middleware::{
http::{HostFilterLayer, ProxyGetRequestLayer},
rpc::RpcServiceBuilder,
},
stop_channel, ws, PingConfig, StopHandle, TowerServiceBuilder,
},
Methods, RpcModule,
};
use tokio::net::TcpListener;
use tower::Service;
use tower_http::cors::{AllowOrigin, CorsLayer};
pub use crate::middleware::RpcMetrics;
pub use jsonrpsee::core::{
id_providers::{RandomIntegerIdProvider, RandomStringIdProvider},
traits::IdProvider,
};
pub use middleware::{MetricsLayer, RpcMetrics};
const MEGABYTE: u32 = 1024 * 1024;
@@ -92,7 +103,7 @@ pub async fn start_server<M: Send + Sync + 'static>(
let local_addr = std_listener.local_addr().ok();
let host_filter = hosts_filtering(cors.is_some(), local_addr);
let middleware = tower::ServiceBuilder::new()
let http_middleware = tower::ServiceBuilder::new()
.option_layer(host_filter)
// Proxy `GET /health` requests to internal `system_health` method.
.layer(ProxyGetRequestLayer::new("/health", "system_health")?)
@@ -103,10 +114,15 @@ pub async fn start_server<M: Send + Sync + 'static>(
.max_response_body_size(max_payload_out_mb.saturating_mul(MEGABYTE))
.max_connections(max_connections)
.max_subscriptions_per_connection(max_subs_per_conn)
.ping_interval(Duration::from_secs(30))
.set_middleware(middleware)
.enable_ws_ping(
PingConfig::new()
.ping_interval(Duration::from_secs(30))
.inactive_limit(Duration::from_secs(60))
.max_failures(3),
)
.set_http_middleware(http_middleware)
.set_message_buffer_capacity(message_buffer_capacity)
.custom_tokio_runtime(tokio_handle);
.custom_tokio_runtime(tokio_handle.clone());
if let Some(provider) = id_provider {
builder = builder.set_id_provider(provider);
@@ -114,22 +130,66 @@ pub async fn start_server<M: Send + Sync + 'static>(
builder = builder.set_id_provider(RandomStringIdProvider::new(16));
};
let rpc_api = build_rpc_api(rpc_api);
let handle = if let Some(metrics) = metrics {
let server = builder.set_logger(metrics).build_from_tcp(std_listener)?;
server.start(rpc_api)
} else {
let server = builder.build_from_tcp(std_listener)?;
server.start(rpc_api)
let (stop_handle, server_handle) = stop_channel();
let cfg = PerConnection {
methods: build_rpc_api(rpc_api).into(),
service_builder: builder.to_service_builder(),
metrics,
tokio_handle,
stop_handle: stop_handle.clone(),
};
let make_service = make_service_fn(move |_conn: &AddrStream| {
let cfg = cfg.clone();
async move {
let cfg = cfg.clone();
Ok::<_, Infallible>(service_fn(move |req| {
let PerConnection { service_builder, metrics, tokio_handle, stop_handle, methods } =
cfg.clone();
let is_websocket = ws::is_upgrade_request(&req);
let transport_label = if is_websocket { "ws" } else { "http" };
let metrics = metrics.map(|m| MetricsLayer::new(m, transport_label));
let rpc_middleware = RpcServiceBuilder::new().option_layer(metrics.clone());
let mut svc =
service_builder.set_rpc_middleware(rpc_middleware).build(methods, stop_handle);
async move {
if is_websocket {
let on_disconnect = svc.on_session_closed();
// Spawn a task to handle when the connection is closed.
tokio_handle.spawn(async move {
let now = std::time::Instant::now();
metrics.as_ref().map(|m| m.ws_connect());
on_disconnect.await;
metrics.as_ref().map(|m| m.ws_disconnect(now));
});
}
svc.call(req).await
}
}))
}
});
let server = hyper::Server::from_tcp(std_listener)?.serve(make_service);
tokio::spawn(async move {
let graceful = server.with_graceful_shutdown(async move { stop_handle.shutdown().await });
let _ = graceful.await;
});
log::info!(
"Running JSON-RPC server: addr={}, allowed origins={}",
local_addr.map_or_else(|| "unknown".to_string(), |a| a.to_string()),
format_cors(cors)
);
Ok(handle)
Ok(server_handle)
}
fn hosts_filtering(enabled: bool, addr: Option<SocketAddr>) -> Option<HostFilterLayer> {
@@ -185,3 +245,12 @@ fn format_cors(maybe_cors: Option<&Vec<String>>) -> String {
format!("{:?}", ["*"])
}
}
#[derive(Clone)]
struct PerConnection<RpcMiddleware, HttpMiddleware> {
methods: Methods,
stop_handle: StopHandle,
metrics: Option<RpcMetrics>,
tokio_handle: tokio::runtime::Handle,
service_builder: TowerServiceBuilder<RpcMiddleware, HttpMiddleware>,
}
@@ -1,226 +0,0 @@
// This file is part of Substrate.
// Copyright (C) Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! RPC middleware to collect prometheus metrics on RPC calls.
use jsonrpsee::server::logger::{
HttpRequest, Logger, MethodKind, Params, SuccessOrError, TransportProtocol,
};
use prometheus_endpoint::{
register, Counter, CounterVec, HistogramOpts, HistogramVec, Opts, PrometheusError, Registry,
U64,
};
use std::net::SocketAddr;
/// Histogram time buckets in microseconds.
const HISTOGRAM_BUCKETS: [f64; 11] = [
5.0,
25.0,
100.0,
500.0,
1_000.0,
2_500.0,
10_000.0,
25_000.0,
100_000.0,
1_000_000.0,
10_000_000.0,
];
/// Metrics for RPC middleware storing information about the number of requests started/completed,
/// calls started/completed and their timings.
#[derive(Debug, Clone)]
pub struct RpcMetrics {
/// Number of RPC requests received since the server started.
requests_started: CounterVec<U64>,
/// Number of RPC requests completed since the server started.
requests_finished: CounterVec<U64>,
/// Histogram over RPC execution times.
calls_time: HistogramVec,
/// Number of calls started.
calls_started: CounterVec<U64>,
/// Number of calls completed.
calls_finished: CounterVec<U64>,
/// Number of Websocket sessions opened.
ws_sessions_opened: Option<Counter<U64>>,
/// Number of Websocket sessions closed.
ws_sessions_closed: Option<Counter<U64>>,
}
impl RpcMetrics {
/// Create an instance of metrics
pub fn new(metrics_registry: Option<&Registry>) -> Result<Option<Self>, PrometheusError> {
if let Some(metrics_registry) = metrics_registry {
Ok(Some(Self {
requests_started: register(
CounterVec::new(
Opts::new(
"substrate_rpc_requests_started",
"Number of RPC requests (not calls) received by the server.",
),
&["protocol"],
)?,
metrics_registry,
)?,
requests_finished: register(
CounterVec::new(
Opts::new(
"substrate_rpc_requests_finished",
"Number of RPC requests (not calls) processed by the server.",
),
&["protocol"],
)?,
metrics_registry,
)?,
calls_time: register(
HistogramVec::new(
HistogramOpts::new(
"substrate_rpc_calls_time",
"Total time [μs] of processed RPC calls",
)
.buckets(HISTOGRAM_BUCKETS.to_vec()),
&["protocol", "method"],
)?,
metrics_registry,
)?,
calls_started: register(
CounterVec::new(
Opts::new(
"substrate_rpc_calls_started",
"Number of received RPC calls (unique un-batched requests)",
),
&["protocol", "method"],
)?,
metrics_registry,
)?,
calls_finished: register(
CounterVec::new(
Opts::new(
"substrate_rpc_calls_finished",
"Number of processed RPC calls (unique un-batched requests)",
),
&["protocol", "method", "is_error"],
)?,
metrics_registry,
)?,
ws_sessions_opened: register(
Counter::new(
"substrate_rpc_sessions_opened",
"Number of persistent RPC sessions opened",
)?,
metrics_registry,
)?
.into(),
ws_sessions_closed: register(
Counter::new(
"substrate_rpc_sessions_closed",
"Number of persistent RPC sessions closed",
)?,
metrics_registry,
)?
.into(),
}))
} else {
Ok(None)
}
}
}
impl Logger for RpcMetrics {
type Instant = std::time::Instant;
fn on_connect(
&self,
_remote_addr: SocketAddr,
_request: &HttpRequest,
transport: TransportProtocol,
) {
if let TransportProtocol::WebSocket = transport {
self.ws_sessions_opened.as_ref().map(|counter| counter.inc());
}
}
fn on_request(&self, transport: TransportProtocol) -> Self::Instant {
let transport_label = transport_label_str(transport);
let now = std::time::Instant::now();
self.requests_started.with_label_values(&[transport_label]).inc();
now
}
fn on_call(&self, name: &str, params: Params, kind: MethodKind, transport: TransportProtocol) {
let transport_label = transport_label_str(transport);
log::trace!(
target: "rpc_metrics",
"[{}] on_call name={} params={:?} kind={}",
transport_label,
name,
params,
kind,
);
self.calls_started.with_label_values(&[transport_label, name]).inc();
}
fn on_result(
&self,
name: &str,
success_or_error: SuccessOrError,
started_at: Self::Instant,
transport: TransportProtocol,
) {
let transport_label = transport_label_str(transport);
let micros = started_at.elapsed().as_micros();
log::debug!(
target: "rpc_metrics",
"[{}] {} call took {} μs",
transport_label,
name,
micros,
);
self.calls_time.with_label_values(&[transport_label, name]).observe(micros as _);
self.calls_finished
.with_label_values(&[
transport_label,
name,
// the label "is_error", so `success` should be regarded as false
// and vice-versa to be registrered correctly.
if success_or_error.is_success() { "false" } else { "true" },
])
.inc();
}
fn on_response(&self, result: &str, started_at: Self::Instant, transport: TransportProtocol) {
let transport_label = transport_label_str(transport);
log::trace!(target: "rpc_metrics", "[{}] on_response started_at={:?}", transport_label, started_at);
log::trace!(target: "rpc_metrics::extra", "[{}] result={:?}", transport_label, result);
self.requests_finished.with_label_values(&[transport_label]).inc();
}
fn on_disconnect(&self, _remote_addr: SocketAddr, transport: TransportProtocol) {
if let TransportProtocol::WebSocket = transport {
self.ws_sessions_closed.as_ref().map(|counter| counter.inc());
}
}
}
fn transport_label_str(t: TransportProtocol) -> &'static str {
match t {
TransportProtocol::Http => "http",
TransportProtocol::WebSocket => "ws",
}
}
@@ -0,0 +1,281 @@
// This file is part of Substrate.
// Copyright (C) Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! RPC middleware to collect prometheus metrics on RPC calls.
use std::{
future::Future,
pin::Pin,
task::{Context, Poll},
time::Instant,
};
use jsonrpsee::{server::middleware::rpc::RpcServiceT, types::Request, MethodResponse};
use pin_project::pin_project;
use prometheus_endpoint::{
register, Counter, CounterVec, HistogramOpts, HistogramVec, Opts, PrometheusError, Registry,
U64,
};
/// Histogram time buckets in microseconds.
const HISTOGRAM_BUCKETS: [f64; 11] = [
5.0,
25.0,
100.0,
500.0,
1_000.0,
2_500.0,
10_000.0,
25_000.0,
100_000.0,
1_000_000.0,
10_000_000.0,
];
/// Metrics for RPC middleware storing information about the number of requests started/completed,
/// calls started/completed and their timings.
#[derive(Debug, Clone)]
pub struct RpcMetrics {
/// Histogram over RPC execution times.
calls_time: HistogramVec,
/// Number of calls started.
calls_started: CounterVec<U64>,
/// Number of calls completed.
calls_finished: CounterVec<U64>,
/// Number of Websocket sessions opened.
ws_sessions_opened: Option<Counter<U64>>,
/// Number of Websocket sessions closed.
ws_sessions_closed: Option<Counter<U64>>,
/// Histogram over RPC websocket sessions.
ws_sessions_time: HistogramVec,
}
impl RpcMetrics {
/// Create an instance of metrics
pub fn new(metrics_registry: Option<&Registry>) -> Result<Option<Self>, PrometheusError> {
if let Some(metrics_registry) = metrics_registry {
Ok(Some(Self {
calls_time: register(
HistogramVec::new(
HistogramOpts::new(
"substrate_rpc_calls_time",
"Total time [μs] of processed RPC calls",
)
.buckets(HISTOGRAM_BUCKETS.to_vec()),
&["protocol", "method"],
)?,
metrics_registry,
)?,
calls_started: register(
CounterVec::new(
Opts::new(
"substrate_rpc_calls_started",
"Number of received RPC calls (unique un-batched requests)",
),
&["protocol", "method"],
)?,
metrics_registry,
)?,
calls_finished: register(
CounterVec::new(
Opts::new(
"substrate_rpc_calls_finished",
"Number of processed RPC calls (unique un-batched requests)",
),
&["protocol", "method", "is_error"],
)?,
metrics_registry,
)?,
ws_sessions_opened: register(
Counter::new(
"substrate_rpc_sessions_opened",
"Number of persistent RPC sessions opened",
)?,
metrics_registry,
)?
.into(),
ws_sessions_closed: register(
Counter::new(
"substrate_rpc_sessions_closed",
"Number of persistent RPC sessions closed",
)?,
metrics_registry,
)?
.into(),
ws_sessions_time: register(
HistogramVec::new(
HistogramOpts::new(
"substrate_rpc_sessions_time",
"Total time [s] for each websocket session",
)
.buckets(HISTOGRAM_BUCKETS.to_vec()),
&["protocol"],
)?,
metrics_registry,
)?,
}))
} else {
Ok(None)
}
}
pub(crate) fn ws_connect(&self) {
self.ws_sessions_opened.as_ref().map(|counter| counter.inc());
}
pub(crate) fn ws_disconnect(&self, now: Instant) {
let micros = now.elapsed().as_secs();
self.ws_sessions_closed.as_ref().map(|counter| counter.inc());
self.ws_sessions_time.with_label_values(&["ws"]).observe(micros as _);
}
}
/// Metrics layer.
#[derive(Clone)]
pub struct MetricsLayer {
inner: RpcMetrics,
transport_label: &'static str,
}
impl MetricsLayer {
/// Create a new [`MetricsLayer`].
pub fn new(metrics: RpcMetrics, transport_label: &'static str) -> Self {
Self { inner: metrics, transport_label }
}
pub(crate) fn ws_connect(&self) {
self.inner.ws_connect();
}
pub(crate) fn ws_disconnect(&self, now: Instant) {
self.inner.ws_disconnect(now)
}
}
impl<S> tower::Layer<S> for MetricsLayer {
type Service = Metrics<S>;
fn layer(&self, inner: S) -> Self::Service {
Metrics::new(inner, self.inner.clone(), self.transport_label)
}
}
/// Metrics middleware.
#[derive(Clone)]
pub struct Metrics<S> {
service: S,
metrics: RpcMetrics,
transport_label: &'static str,
}
impl<S> Metrics<S> {
/// Create a new metrics middleware.
pub fn new(service: S, metrics: RpcMetrics, transport_label: &'static str) -> Metrics<S> {
Metrics { service, metrics, transport_label }
}
}
impl<'a, S> RpcServiceT<'a> for Metrics<S>
where
S: Send + Sync + RpcServiceT<'a>,
{
type Future = ResponseFuture<'a, S::Future>;
fn call(&self, req: Request<'a>) -> Self::Future {
let now = Instant::now();
log::trace!(
target: "rpc_metrics",
"[{}] on_call name={} params={:?}",
self.transport_label,
req.method_name(),
req.params(),
);
self.metrics
.calls_started
.with_label_values(&[self.transport_label, req.method_name()])
.inc();
ResponseFuture {
fut: self.service.call(req.clone()),
metrics: self.metrics.clone(),
req,
now,
transport_label: self.transport_label,
}
}
}
/// Response future for metrics.
#[pin_project]
pub struct ResponseFuture<'a, F> {
#[pin]
fut: F,
metrics: RpcMetrics,
req: Request<'a>,
now: Instant,
transport_label: &'static str,
}
impl<'a, F> std::fmt::Debug for ResponseFuture<'a, F> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("ResponseFuture")
}
}
impl<'a, F: Future<Output = MethodResponse>> Future for ResponseFuture<'a, F> {
type Output = F::Output;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let this = self.project();
let res = this.fut.poll(cx);
if let Poll::Ready(rp) = &res {
let method_name = this.req.method_name();
let transport_label = &this.transport_label;
let now = this.now;
let metrics = &this.metrics;
log::trace!(target: "rpc_metrics", "[{transport_label}] on_response started_at={:?}", now);
log::trace!(target: "rpc_metrics::extra", "[{transport_label}] result={:?}", rp);
let micros = now.elapsed().as_micros();
log::debug!(
target: "rpc_metrics",
"[{transport_label}] {method_name} call took {} μs",
micros,
);
metrics
.calls_time
.with_label_values(&[transport_label, method_name])
.observe(micros as _);
metrics
.calls_finished
.with_label_values(&[
transport_label,
method_name,
// the label "is_error", so `success` should be regarded as false
// and vice-versa to be registrered correctly.
if rp.is_success() { "false" } else { "true" },
])
.inc();
}
res
}
}
@@ -0,0 +1,23 @@
// This file is part of Substrate.
// Copyright (C) Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! JSON-RPC specific middleware.
pub mod metrics;
pub use metrics::*;