Add one Jaeger span per relay parent (#2196)

* Add one Jaeger span per relay parent

This adds one Jaeger span per relay parent, instead of always creating
new spans per relay parent. This should improve the UI view, because
subsystems are now grouped below one common span.

* Fix doc tests

* Replace `PerLeaveSpan` to `PerLeafSpan`

* More renaming

* Moare

* Update node/subsystem/src/lib.rs

Co-authored-by: Andronik Ordian <write@reusable.software>

* Skip the spans

* Increase `spec_version`

Co-authored-by: Andronik Ordian <write@reusable.software>
This commit is contained in:
Bastian Köcher
2021-01-05 15:09:25 +01:00
committed by GitHub
parent ceb9e2161c
commit 5be092894e
32 changed files with 535 additions and 322 deletions
@@ -24,16 +24,11 @@ use polkadot_primitives::v1::{
CollatorId, CoreIndex, CoreState, Hash, Id as ParaId, CandidateReceipt, PoV, ValidatorId,
};
use polkadot_subsystem::{
jaeger,
jaeger, PerLeafSpan,
FromOverseer, OverseerSignal, SubsystemContext,
messages::{
AllMessages, CollatorProtocolMessage,
NetworkBridgeMessage,
},
};
use polkadot_node_network_protocol::{
v1 as protocol_v1, View, PeerId, NetworkBridgeEvent, RequestId,
messages::{AllMessages, CollatorProtocolMessage, NetworkBridgeMessage},
};
use polkadot_node_network_protocol::{v1 as protocol_v1, View, PeerId, NetworkBridgeEvent, RequestId, OurView};
use polkadot_node_subsystem_util::{
validator_discovery,
request_validators_ctx,
@@ -188,7 +183,10 @@ struct State {
peer_views: HashMap<PeerId, View>,
/// Our own view.
view: View,
view: OurView,
/// Span per relay parent.
span_per_relay_parent: HashMap<Hash, PerLeafSpan>,
/// Possessed collations.
///
@@ -431,7 +429,8 @@ async fn process_msg(
state.collating_on = Some(id);
}
DistributeCollation(receipt, pov) => {
let _span1 = jaeger::hash_span(&receipt.descriptor.relay_parent, "distributing-collation");
let _span1 = state.span_per_relay_parent
.get(&receipt.descriptor.relay_parent).map(|s| s.child("distributing-collation"));
let _span2 = jaeger::pov_span(&pov, "distributing-collation");
match state.collating_on {
Some(id) if receipt.descriptor.para_id != id => {
@@ -542,12 +541,12 @@ async fn handle_incoming_peer_message(
);
}
RequestCollation(request_id, relay_parent, para_id) => {
let _span = jaeger::hash_span(&relay_parent, "rx-collation-request");
let _span = state.span_per_relay_parent.get(&relay_parent).map(|s| s.child("request-collation"));
match state.collating_on {
Some(our_para_id) => {
if our_para_id == para_id {
if let Some(collation) = state.collations.get(&relay_parent).cloned() {
let _span = _span.child("sending");
let _span = _span.as_ref().map(|s| s.child("sending"));
send_collation(ctx, state, request_id, origin, collation.0, collation.1).await;
}
} else {
@@ -665,12 +664,13 @@ async fn handle_network_msg(
#[tracing::instrument(level = "trace", skip(state), fields(subsystem = LOG_TARGET))]
async fn handle_our_view_change(
state: &mut State,
view: View,
view: OurView,
) -> Result<()> {
for removed in state.view.difference(&view) {
state.collations.remove(removed);
state.our_validators_groups.remove(removed);
state.connection_requests.remove(removed);
state.span_per_relay_parent.remove(removed);
}
state.view = view;
@@ -725,11 +725,10 @@ pub(crate) async fn run(
mod tests {
use super::*;
use std::time::Duration;
use std::{time::Duration, sync::Arc};
use assert_matches::assert_matches;
use futures::{executor, future, Future, channel::mpsc};
use smallvec::smallvec;
use sp_core::crypto::Pair;
use sp_keyring::Sr25519Keyring;
@@ -739,10 +738,10 @@ mod tests {
ValidatorIndex, GroupRotationInfo, AuthorityDiscoveryId,
SessionIndex, SessionInfo,
};
use polkadot_subsystem::{ActiveLeavesUpdate, messages::{RuntimeApiMessage, RuntimeApiRequest}};
use polkadot_subsystem::{ActiveLeavesUpdate, messages::{RuntimeApiMessage, RuntimeApiRequest}, JaegerSpan};
use polkadot_node_subsystem_util::TimeoutExt;
use polkadot_subsystem_testhelpers as test_helpers;
use polkadot_node_network_protocol::view;
use polkadot_node_network_protocol::{view, our_view};
#[derive(Default)]
struct TestCandidateBuilder {
@@ -888,17 +887,15 @@ mod tests {
self.relay_parent.randomize();
}
let hashes = if merge_views {
vec![old_relay_parent, self.relay_parent]
let our_view = if merge_views {
our_view![old_relay_parent, self.relay_parent]
} else {
vec![self.relay_parent]
our_view![self.relay_parent]
};
overseer_send(
virtual_overseer,
CollatorProtocolMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::OurViewChange(View { heads: hashes, finalized_number: 0 }),
),
CollatorProtocolMessage::NetworkBridgeUpdateV1(NetworkBridgeEvent::OurViewChange(our_view)),
).await;
}
}
@@ -997,15 +994,15 @@ mod tests {
overseer_signal(
virtual_overseer,
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: smallvec![test_state.relay_parent],
deactivated: smallvec![],
activated: [(test_state.relay_parent, Arc::new(JaegerSpan::Disabled))][..].into(),
deactivated: [][..].into(),
}),
).await;
overseer_send(
virtual_overseer,
CollatorProtocolMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::OurViewChange(view![test_state.relay_parent]),
NetworkBridgeEvent::OurViewChange(our_view![test_state.relay_parent]),
),
).await;
}
@@ -14,9 +14,7 @@
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use std::collections::{HashMap, HashSet};
use std::time::Duration;
use std::task::Poll;
use std::{collections::{HashMap, HashSet}, time::Duration, task::Poll, sync::Arc};
use futures::{
StreamExt,
@@ -30,20 +28,16 @@ use polkadot_primitives::v1::{
Id as ParaId, CandidateReceipt, CollatorId, Hash, PoV,
};
use polkadot_subsystem::{
jaeger,
jaeger, PerLeafSpan, JaegerSpan,
FromOverseer, OverseerSignal, SubsystemContext,
messages::{
AllMessages, CandidateSelectionMessage, CollatorProtocolMessage, NetworkBridgeMessage,
},
};
use polkadot_node_network_protocol::{
v1 as protocol_v1, View, PeerId, ReputationChange as Rep, RequestId,
NetworkBridgeEvent,
};
use polkadot_node_subsystem_util::{
TimeoutExt as _,
metrics::{self, prometheus},
v1 as protocol_v1, View, OurView, PeerId, ReputationChange as Rep, RequestId, NetworkBridgeEvent,
};
use polkadot_node_subsystem_util::{TimeoutExt as _, metrics::{self, prometheus}};
use super::{modify_reputation, LOG_TARGET, Result};
@@ -172,7 +166,7 @@ struct PerRequest {
#[derive(Default)]
struct State {
/// Our own view.
view: View,
view: OurView,
/// Track all active collators and their views.
peer_views: HashMap<PeerId, View>,
@@ -215,6 +209,9 @@ struct State {
/// Metrics.
metrics: Metrics,
/// Span per relay parent.
span_per_relay_parent: HashMap<Hash, PerLeafSpan>,
}
/// Another subsystem has requested to fetch collations on a particular leaf for some para.
@@ -505,7 +502,7 @@ where
state.peer_views.entry(origin).or_default();
}
AdvertiseCollation(relay_parent, para_id) => {
let _span = jaeger::hash_span(&relay_parent, "advertising-collation");
let _span = state.span_per_relay_parent.get(&relay_parent).map(|s| s.child("advertise-collation"));
state.advertisements.entry(origin.clone()).or_default().insert((para_id, relay_parent));
if let Some(collator) = state.known_collators.get(&origin) {
@@ -517,7 +514,8 @@ where
modify_reputation(ctx, origin, COST_UNEXPECTED_MESSAGE).await;
}
Collation(request_id, receipt, pov) => {
let _span1 = jaeger::hash_span(&receipt.descriptor.relay_parent, "received-collation");
let _span1 = state.span_per_relay_parent.get(&receipt.descriptor.relay_parent)
.map(|s| s.child("received-collation"));
let _span2 = jaeger::pov_span(&pov, "received-collation");
received_collation(ctx, state, origin, request_id, receipt, pov).await;
}
@@ -556,9 +554,19 @@ async fn remove_relay_parent(
#[tracing::instrument(level = "trace", skip(state), fields(subsystem = LOG_TARGET))]
async fn handle_our_view_change(
state: &mut State,
view: View,
view: OurView,
) -> Result<()> {
let old_view = std::mem::replace(&mut (state.view), view);
let old_view = std::mem::replace(&mut state.view, view);
let added: HashMap<Hash, Arc<JaegerSpan>> = state.view
.span_per_head()
.iter()
.filter(|v| !old_view.contains(&v.0))
.map(|v| (v.0.clone(), v.1.clone()))
.collect();
added.into_iter().for_each(|(h, s)| {
state.span_per_relay_parent.insert(h, PerLeafSpan::new(s, "validator-side"));
});
let removed = old_view
.difference(&state.view)
@@ -571,6 +579,7 @@ async fn handle_our_view_change(
for removed in removed.into_iter() {
state.recently_removed_heads.insert(removed.clone());
remove_relay_parent(state, removed).await?;
state.span_per_relay_parent.remove(&removed);
}
Ok(())
@@ -663,7 +672,7 @@ where
);
}
FetchCollation(relay_parent, collator_id, para_id, tx) => {
let _span = jaeger::hash_span(&relay_parent, "fetching-collation");
let _span = state.span_per_relay_parent.get(&relay_parent).map(|s| s.child("fetch-collation"));
fetch_collation(ctx, state, relay_parent, collator_id, para_id, tx).await;
}
ReportCollator(id) => {
@@ -760,7 +769,7 @@ mod tests {
use polkadot_primitives::v1::{BlockData, CollatorPair};
use polkadot_subsystem_testhelpers as test_helpers;
use polkadot_node_network_protocol::view;
use polkadot_node_network_protocol::our_view;
#[derive(Clone)]
struct TestState {
@@ -873,7 +882,7 @@ mod tests {
overseer_send(
&mut virtual_overseer,
CollatorProtocolMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::OurViewChange(view![test_state.relay_parent])
NetworkBridgeEvent::OurViewChange(our_view![test_state.relay_parent])
)
).await;
@@ -931,7 +940,7 @@ mod tests {
overseer_send(
&mut virtual_overseer,
CollatorProtocolMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::OurViewChange(view![test_state.relay_parent])
NetworkBridgeEvent::OurViewChange(our_view![test_state.relay_parent])
)
).await;
@@ -1022,7 +1031,7 @@ mod tests {
overseer_send(
&mut virtual_overseer,
CollatorProtocolMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::OurViewChange(view![Hash::repeat_byte(0x42)])
NetworkBridgeEvent::OurViewChange(our_view![Hash::repeat_byte(0x42)])
)
).await;
@@ -1050,7 +1059,7 @@ mod tests {
overseer_send(
&mut virtual_overseer,
CollatorProtocolMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::OurViewChange(view![test_state.relay_parent])
NetworkBridgeEvent::OurViewChange(our_view![test_state.relay_parent])
)
).await;
@@ -1134,8 +1143,8 @@ mod tests {
overseer_send(
&mut virtual_overseer,
CollatorProtocolMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::OurViewChange(view![test_state.relay_parent])
)
NetworkBridgeEvent::OurViewChange(our_view![test_state.relay_parent])
),
).await;
let peer_b = PeerId::random();