mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-26 07:41:15 +00:00
rpc server: add prometheus label is_rate_limited (#3504)
After some discussion with @kogeler after the we added the rate-limit middleware it may slow down the rpc call timings metrics significantly because it works as follows: 1. The rate limit guard is checked when the call comes and if a slot is available -> process the call 2. If no free spot is available then the call will be sleeping `jitter_delay + min_time_rate_guard` then woken up and checked at most ten times 3. If no spot is available after 10 iterations -> the call is rejected (this may take tens of seconds) Thus, this PR adds a label "is_rate_limited" to filter those out on the metrics "substrate_rpc_calls_time" and "substrate_rpc_calls_finished". I had to merge two middleware layers Metrics and RateLimit to avoid shared state in a hacky way. --------- Co-authored-by: James Wilson <james@jsdw.me>
This commit is contained in:
@@ -18,10 +18,131 @@
|
||||
|
||||
//! JSON-RPC specific middleware.
|
||||
|
||||
/// Grafana metrics middleware.
|
||||
pub mod metrics;
|
||||
/// Rate limit middleware.
|
||||
pub mod rate_limit;
|
||||
use std::{
|
||||
num::NonZeroU32,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use futures::future::{BoxFuture, FutureExt};
|
||||
use governor::{clock::Clock, Jitter};
|
||||
use jsonrpsee::{
|
||||
server::middleware::rpc::RpcServiceT,
|
||||
types::{ErrorObject, Id, Request},
|
||||
MethodResponse,
|
||||
};
|
||||
|
||||
mod metrics;
|
||||
mod rate_limit;
|
||||
|
||||
pub use metrics::*;
|
||||
pub use rate_limit::*;
|
||||
|
||||
const MAX_JITTER: Duration = Duration::from_millis(50);
|
||||
const MAX_RETRIES: usize = 10;
|
||||
|
||||
/// JSON-RPC middleware layer.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct MiddlewareLayer {
|
||||
rate_limit: Option<RateLimit>,
|
||||
metrics: Option<Metrics>,
|
||||
}
|
||||
|
||||
impl MiddlewareLayer {
|
||||
/// Create an empty MiddlewareLayer.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Enable new rate limit middleware enforced per minute.
|
||||
pub fn with_rate_limit_per_minute(self, n: NonZeroU32) -> Self {
|
||||
Self { rate_limit: Some(RateLimit::per_minute(n)), metrics: self.metrics }
|
||||
}
|
||||
|
||||
/// Enable metrics middleware.
|
||||
pub fn with_metrics(self, metrics: Metrics) -> Self {
|
||||
Self { rate_limit: self.rate_limit, metrics: Some(metrics) }
|
||||
}
|
||||
|
||||
/// Register a new websocket connection.
|
||||
pub fn ws_connect(&self) {
|
||||
self.metrics.as_ref().map(|m| m.ws_connect());
|
||||
}
|
||||
|
||||
/// Register that a websocket connection was closed.
|
||||
pub fn ws_disconnect(&self, now: Instant) {
|
||||
self.metrics.as_ref().map(|m| m.ws_disconnect(now));
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> tower::Layer<S> for MiddlewareLayer {
|
||||
type Service = Middleware<S>;
|
||||
|
||||
fn layer(&self, service: S) -> Self::Service {
|
||||
Middleware { service, rate_limit: self.rate_limit.clone(), metrics: self.metrics.clone() }
|
||||
}
|
||||
}
|
||||
|
||||
/// JSON-RPC middleware that handles metrics
|
||||
/// and rate-limiting.
|
||||
///
|
||||
/// These are part of the same middleware
|
||||
/// because the metrics needs to know whether
|
||||
/// a call was rate-limited or not because
|
||||
/// it will impact the roundtrip for a call.
|
||||
pub struct Middleware<S> {
|
||||
service: S,
|
||||
rate_limit: Option<RateLimit>,
|
||||
metrics: Option<Metrics>,
|
||||
}
|
||||
|
||||
impl<'a, S> RpcServiceT<'a> for Middleware<S>
|
||||
where
|
||||
S: Send + Sync + RpcServiceT<'a> + Clone + 'static,
|
||||
{
|
||||
type Future = BoxFuture<'a, MethodResponse>;
|
||||
|
||||
fn call(&self, req: Request<'a>) -> Self::Future {
|
||||
let now = Instant::now();
|
||||
|
||||
self.metrics.as_ref().map(|m| m.on_call(&req));
|
||||
|
||||
let service = self.service.clone();
|
||||
let rate_limit = self.rate_limit.clone();
|
||||
let metrics = self.metrics.clone();
|
||||
|
||||
async move {
|
||||
let mut is_rate_limited = false;
|
||||
|
||||
if let Some(limit) = rate_limit.as_ref() {
|
||||
let mut attempts = 0;
|
||||
let jitter = Jitter::up_to(MAX_JITTER);
|
||||
|
||||
loop {
|
||||
if attempts >= MAX_RETRIES {
|
||||
return reject_too_many_calls(req.id);
|
||||
}
|
||||
|
||||
if let Err(rejected) = limit.inner.check() {
|
||||
tokio::time::sleep(jitter + rejected.wait_time_from(limit.clock.now()))
|
||||
.await;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
is_rate_limited = true;
|
||||
attempts += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let rp = service.call(req.clone()).await;
|
||||
metrics.as_ref().map(|m| m.on_response(&req, &rp, is_rate_limited, now));
|
||||
|
||||
rp
|
||||
}
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
fn reject_too_many_calls(id: Id) -> MethodResponse {
|
||||
MethodResponse::error(id, ErrorObject::owned(-32999, "RPC rate limit exceeded", None::<()>))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user