mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-11 16:31:07 +00:00
observability: add two more timers (#5124)
* add two more timers * Update node/network/availability-recovery/src/metrics.rs * Try to improve comments spelling * Cargo fmt iteration Co-authored-by: Vsevolod Stakhov <vsevolod.stakhov@parity.io>
This commit is contained in:
committed by
GitHub
parent
2c8ea1e0e4
commit
d309a24e50
@@ -458,6 +458,8 @@ impl RequestChunksFromValidators {
|
||||
params: &RecoveryParams,
|
||||
sender: &mut impl SubsystemSender,
|
||||
) -> Result<AvailableData, RecoveryError> {
|
||||
let metrics = ¶ms.metrics;
|
||||
|
||||
// First query the store for any chunks we've got.
|
||||
{
|
||||
let (tx, rx) = oneshot::channel();
|
||||
@@ -504,6 +506,7 @@ impl RequestChunksFromValidators {
|
||||
return Err(RecoveryError::Unavailable)
|
||||
}
|
||||
|
||||
let recovery_possible = metrics.time_erasure_recovery_becomes_possible();
|
||||
self.launch_parallel_requests(params, sender).await;
|
||||
self.wait_for_chunks(params).await;
|
||||
|
||||
@@ -511,6 +514,9 @@ impl RequestChunksFromValidators {
|
||||
// If that fails, or a re-encoding of it doesn't match the expected erasure root,
|
||||
// return Err(RecoveryError::Invalid)
|
||||
if self.received_chunks.len() >= params.threshold {
|
||||
drop(recovery_possible);
|
||||
let recovery_duration = metrics.time_erasure_recovery();
|
||||
|
||||
return match polkadot_erasure_coding::reconstruct_v1(
|
||||
params.validators.len(),
|
||||
self.received_chunks.values().map(|c| (&c.chunk[..], c.index.0 as usize)),
|
||||
@@ -530,6 +536,7 @@ impl RequestChunksFromValidators {
|
||||
|
||||
Ok(data)
|
||||
} else {
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?params.candidate_hash,
|
||||
@@ -541,6 +548,7 @@ impl RequestChunksFromValidators {
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
recovery_duration.map(|rd| rd.stop_and_discard());
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
candidate_hash = ?params.candidate_hash,
|
||||
@@ -552,6 +560,8 @@ impl RequestChunksFromValidators {
|
||||
Err(RecoveryError::Invalid)
|
||||
},
|
||||
}
|
||||
} else {
|
||||
recovery_possible.map(|rp| rp.stop_and_discard());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,12 +14,9 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use polkadot_node_subsystem_util::{
|
||||
metrics,
|
||||
metrics::{
|
||||
prometheus,
|
||||
prometheus::{Counter, CounterVec, Opts, PrometheusError, Registry, U64},
|
||||
},
|
||||
use polkadot_node_subsystem_util::metrics::{
|
||||
self,
|
||||
prometheus::{self, Counter, CounterVec, Histogram, Opts, PrometheusError, Registry, U64},
|
||||
};
|
||||
|
||||
/// Availability Distribution metrics.
|
||||
@@ -42,8 +39,15 @@ struct MetricsInner {
|
||||
/// - `invalid` ... Chunk was received, but not valid.
|
||||
/// - `success`
|
||||
chunk_requests_finished: CounterVec<U64>,
|
||||
|
||||
/// The duration of request to response.
|
||||
time_chunk_request: prometheus::Histogram,
|
||||
time_chunk_request: Histogram,
|
||||
|
||||
/// The duration between the pure recovery and verification.
|
||||
time_erasure_recovery: Histogram,
|
||||
|
||||
/// The duration between the first request and the time when we have a sufficient number of chunks to recover.
|
||||
time_erasure_recovery_becomes_possible: Histogram,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
@@ -93,10 +97,25 @@ impl Metrics {
|
||||
metrics.chunk_requests_finished.with_label_values(&["success"]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a timer to time request/response duration.
|
||||
pub fn time_chunk_request(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.time_chunk_request.start_timer())
|
||||
}
|
||||
|
||||
/// Get a timer to time erasure code recover.
|
||||
pub fn time_erasure_recovery(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.time_erasure_recovery.start_timer())
|
||||
}
|
||||
|
||||
/// Get a timer to measure the time duration until a sufficient amount of chunks were available to attempt recovery.
|
||||
pub fn time_erasure_recovery_becomes_possible(
|
||||
&self,
|
||||
) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0
|
||||
.as_ref()
|
||||
.map(|metrics| metrics.time_erasure_recovery_becomes_possible.start_timer())
|
||||
}
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
@@ -126,6 +145,20 @@ impl metrics::Metrics for Metrics {
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
time_erasure_recovery: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"polkadot_parachain_availability_recovery_time_erasure_recovery",
|
||||
"Time spent to recover the erasure code and verify the merkle root by re-encoding as erasure chunks",
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
time_erasure_recovery_becomes_possible: prometheus::register(
|
||||
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
|
||||
"polkadot_parachain_availability_recovery_time_erasure_recovery_becomes_possible",
|
||||
"Time spent launching the first request until a sufficient amount of chunks was recovered",
|
||||
))?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user