mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-12 13:31:10 +00:00
Alert on frequent network errors (#7410)
* Introduce is_frequent util * Add dirty warn_if_frequent! implementation * Add freq * Fix order in condition * Update * Update docs * Fix * Remove old impl * Fix errors * Add wif to av-distr * Add wif to col prot * Rename * Add wif to state-distr * Address review comments * Change Freq implementation * Remove the zero division check * Make rate explicit * Fix typo * Update rate constant * Introduce explicit rates * Update docs * Split errors freq * Downgrade coarsetime
This commit is contained in:
@@ -91,7 +91,11 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
///
|
||||
/// We basically always want to try and continue on error. This utility function is meant to
|
||||
/// consume top-level errors by simply logging them
|
||||
pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), FatalError> {
|
||||
pub fn log_error(
|
||||
result: Result<()>,
|
||||
ctx: &'static str,
|
||||
warn_freq: &mut gum::Freq,
|
||||
) -> std::result::Result<(), FatalError> {
|
||||
match result.into_nested()? {
|
||||
Ok(()) => Ok(()),
|
||||
Err(jfyi) => {
|
||||
@@ -104,7 +108,8 @@ pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(
|
||||
JfyiError::FetchPoV(_) |
|
||||
JfyiError::SendResponse |
|
||||
JfyiError::NoSuchPoV |
|
||||
JfyiError::Runtime(_) => gum::debug!(target: LOG_TARGET, error = ?jfyi, ctx),
|
||||
JfyiError::Runtime(_) =>
|
||||
gum::warn_if_frequent!(freq: warn_freq, max_rate: gum::Times::PerHour(100), target: LOG_TARGET, error = ?jfyi, ctx),
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
|
||||
@@ -97,6 +97,7 @@ impl AvailabilityDistributionSubsystem {
|
||||
|
||||
let IncomingRequestReceivers { pov_req_receiver, chunk_req_receiver } = recvs;
|
||||
let mut requester = Requester::new(metrics.clone()).fuse();
|
||||
let mut warn_freq = gum::Freq::new();
|
||||
|
||||
{
|
||||
let sender = ctx.sender().clone();
|
||||
@@ -147,6 +148,7 @@ impl AvailabilityDistributionSubsystem {
|
||||
.update_fetching_heads(&mut ctx, &mut runtime, update, &spans)
|
||||
.await,
|
||||
"Error in Requester::update_fetching_heads",
|
||||
&mut warn_freq,
|
||||
)?;
|
||||
},
|
||||
FromOrchestra::Signal(OverseerSignal::BlockFinalized(hash, _)) => {
|
||||
@@ -188,6 +190,7 @@ impl AvailabilityDistributionSubsystem {
|
||||
)
|
||||
.await,
|
||||
"pov_requester::fetch_pov",
|
||||
&mut warn_freq,
|
||||
)?;
|
||||
},
|
||||
}
|
||||
|
||||
@@ -260,6 +260,8 @@ impl RunningTask {
|
||||
let mut succeeded = false;
|
||||
let mut count: u32 = 0;
|
||||
let mut span = self.span.child("run-fetch-chunk-task").with_relay_parent(self.relay_parent);
|
||||
let mut network_error_freq = gum::Freq::new();
|
||||
let mut canceled_freq = gum::Freq::new();
|
||||
// Try validators in reverse order:
|
||||
while let Some(validator) = self.group.pop() {
|
||||
// Report retries:
|
||||
@@ -272,7 +274,10 @@ impl RunningTask {
|
||||
.with_chunk_index(self.request.index.0)
|
||||
.with_stage(jaeger::Stage::AvailabilityDistribution);
|
||||
// Send request:
|
||||
let resp = match self.do_request(&validator).await {
|
||||
let resp = match self
|
||||
.do_request(&validator, &mut network_error_freq, &mut canceled_freq)
|
||||
.await
|
||||
{
|
||||
Ok(resp) => resp,
|
||||
Err(TaskError::ShuttingDown) => {
|
||||
gum::info!(
|
||||
@@ -342,6 +347,8 @@ impl RunningTask {
|
||||
async fn do_request(
|
||||
&mut self,
|
||||
validator: &AuthorityDiscoveryId,
|
||||
nerwork_error_freq: &mut gum::Freq,
|
||||
canceled_freq: &mut gum::Freq,
|
||||
) -> std::result::Result<ChunkFetchingResponse, TaskError> {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
@@ -386,7 +393,9 @@ impl RunningTask {
|
||||
Err(TaskError::PeerError)
|
||||
},
|
||||
Err(RequestError::NetworkError(err)) => {
|
||||
gum::debug!(
|
||||
gum::warn_if_frequent!(
|
||||
freq: nerwork_error_freq,
|
||||
max_rate: gum::Times::PerHour(100),
|
||||
target: LOG_TARGET,
|
||||
origin = ?validator,
|
||||
relay_parent = ?self.relay_parent,
|
||||
@@ -400,7 +409,9 @@ impl RunningTask {
|
||||
Err(TaskError::PeerError)
|
||||
},
|
||||
Err(RequestError::Canceled(oneshot::Canceled)) => {
|
||||
gum::debug!(
|
||||
gum::warn_if_frequent!(
|
||||
freq: canceled_freq,
|
||||
max_rate: gum::Times::PerHour(100),
|
||||
target: LOG_TARGET,
|
||||
origin = ?validator,
|
||||
relay_parent = ?self.relay_parent,
|
||||
|
||||
Reference in New Issue
Block a user