rpc server: add prometheus label is_rate_limited (#3504)

After some discussion with @kogeler after the we added the rate-limit
middleware it may slow down
the rpc call timings metrics significantly because it works as follows:

1. The rate limit guard is checked when the call comes and if a slot is
available -> process the call
2. If no free spot is available then the call will be sleeping
`jitter_delay + min_time_rate_guard` then woken up and checked at most
ten times
3. If no spot is available after 10 iterations -> the call is rejected
(this may take tens of seconds)

Thus, this PR adds a label "is_rate_limited" to filter those out on the
metrics "substrate_rpc_calls_time" and "substrate_rpc_calls_finished".

I had to merge two middleware layers Metrics and RateLimit to avoid
shared state in a hacky way.

---------

Co-authored-by: James Wilson <james@jsdw.me>
This commit is contained in:
Niklas Adolfsson
2024-03-05 10:50:57 +01:00
committed by GitHub
parent 0eda0b3f17
commit efcea0edab
7 changed files with 240 additions and 213 deletions
+17 -8
View File
@@ -49,7 +49,7 @@ pub use jsonrpsee::{
},
server::{middleware::rpc::RpcServiceBuilder, BatchRequestConfig},
};
pub use middleware::{MetricsLayer, RateLimitLayer, RpcMetrics};
pub use middleware::{Metrics, MiddlewareLayer, RpcMetrics};
const MEGABYTE: u32 = 1024 * 1024;
@@ -173,13 +173,22 @@ where
let is_websocket = ws::is_upgrade_request(&req);
let transport_label = if is_websocket { "ws" } else { "http" };
let metrics = metrics.map(|m| MetricsLayer::new(m, transport_label));
let rate_limit = rate_limit.map(|r| RateLimitLayer::per_minute(r));
let middleware_layer = match (metrics, rate_limit) {
(None, None) => None,
(Some(metrics), None) => Some(
MiddlewareLayer::new().with_metrics(Metrics::new(metrics, transport_label)),
),
(None, Some(rate_limit)) =>
Some(MiddlewareLayer::new().with_rate_limit_per_minute(rate_limit)),
(Some(metrics), Some(rate_limit)) => Some(
MiddlewareLayer::new()
.with_metrics(Metrics::new(metrics, transport_label))
.with_rate_limit_per_minute(rate_limit),
),
};
// NOTE: The metrics needs to run first to include rate-limited calls in the
// metrics.
let rpc_middleware =
RpcServiceBuilder::new().option_layer(metrics.clone()).option_layer(rate_limit);
RpcServiceBuilder::new().option_layer(middleware_layer.clone());
let mut svc =
service_builder.set_rpc_middleware(rpc_middleware).build(methods, stop_handle);
@@ -191,9 +200,9 @@ where
// Spawn a task to handle when the connection is closed.
tokio_handle.spawn(async move {
let now = std::time::Instant::now();
metrics.as_ref().map(|m| m.ws_connect());
middleware_layer.as_ref().map(|m| m.ws_connect());
on_disconnect.await;
metrics.as_ref().map(|m| m.ws_disconnect(now));
middleware_layer.as_ref().map(|m| m.ws_disconnect(now));
});
}