mirror of
https://github.com/pezkuwichain/pezkuwi-telemetry.git
synced 2026-06-14 09:31:01 +00:00
Allow multiple SystemConnects to be handled from a single node in the shard
This commit is contained in:
@@ -29,22 +29,27 @@ enum ToAggregator {
|
||||
/// messages from it will be ignored.
|
||||
#[derive(Clone,Debug)]
|
||||
pub enum FromWebsocket {
|
||||
/// Tell the aggregator about a new node.
|
||||
Add {
|
||||
message_id: node::NodeMessageId,
|
||||
ip: Option<std::net::IpAddr>,
|
||||
node: common::types::NodeDetails,
|
||||
/// Fire this when the connection is established.
|
||||
Initialize {
|
||||
/// When a message is sent back up this channel, we terminate
|
||||
/// the websocket connection and force the node to reconnect
|
||||
/// so that it sends its system info again incase the telemetry
|
||||
/// core has restarted.
|
||||
close_connection: mpsc::Sender<()>
|
||||
},
|
||||
/// Tell the aggregator about a new node.
|
||||
Add {
|
||||
message_id: node::NodeMessageId,
|
||||
ip: Option<std::net::IpAddr>,
|
||||
node: common::types::NodeDetails,
|
||||
},
|
||||
/// Update/pass through details about a node.
|
||||
Update {
|
||||
message_id: node::NodeMessageId,
|
||||
payload: node::Payload
|
||||
}
|
||||
},
|
||||
/// Make a note when the node disconnects.
|
||||
Disconnected
|
||||
}
|
||||
|
||||
pub type FromAggregator = internal_messages::FromShardAggregator;
|
||||
@@ -139,10 +144,13 @@ impl Aggregator {
|
||||
connected_to_telemetry_core = false;
|
||||
log::info!("Disconnected from telemetry core");
|
||||
},
|
||||
ToAggregator::FromWebsocket(conn_id, FromWebsocket::Add { message_id, ip, node, close_connection }) => {
|
||||
// Keep the close_connection channel incase we need it:
|
||||
ToAggregator::FromWebsocket(_conn_id, FromWebsocket::Initialize { close_connection }) => {
|
||||
// We boot all connections on a reconnect-to-core to force new systemconnected
|
||||
// messages to be sent. We could boot on muting, but need to be careful not to boot
|
||||
// connections where we mute one set of messages it sends and not others.
|
||||
close_connections.push(close_connection);
|
||||
|
||||
},
|
||||
ToAggregator::FromWebsocket(conn_id, FromWebsocket::Add { message_id, ip, node }) => {
|
||||
// Don't bother doing anything else if we're disconnected, since we'll force the
|
||||
// ndoe to reconnect anyway when the backend does:
|
||||
if !connected_to_telemetry_core { continue }
|
||||
@@ -178,6 +186,20 @@ impl Aggregator {
|
||||
payload
|
||||
}).await;
|
||||
},
|
||||
ToAggregator::FromWebsocket(disconnected_conn_id, FromWebsocket::Disconnected) => {
|
||||
// Find all of the local IDs corresponding to the disconnected connection ID and
|
||||
// remove them, telling Telemetry Core about them too. This could be more efficient,
|
||||
// but the mapping isn't currently cached and it's not a super frequent op.
|
||||
let local_ids_disconnected: Vec<_> = to_local_id.iter()
|
||||
.filter(|(_, &(conn_id, _))| disconnected_conn_id == conn_id)
|
||||
.map(|(local_id, _)| local_id)
|
||||
.collect();
|
||||
|
||||
for local_id in local_ids_disconnected {
|
||||
to_local_id.remove_by_id(local_id);
|
||||
let _ = tx_to_telemetry_core.send(FromShardAggregator::RemoveNode { local_id }).await;
|
||||
}
|
||||
},
|
||||
ToAggregator::FromTelemetryCore(FromTelemetryCore::Mute { local_id }) => {
|
||||
// Ignore incoming messages if we're not connected to the backend:
|
||||
if !connected_to_telemetry_core { continue }
|
||||
|
||||
+36
-36
@@ -89,8 +89,13 @@ async fn start_server(opts: Opts) -> anyhow::Result<()> {
|
||||
let tx_to_aggregator = aggregator.subscribe_node();
|
||||
log::info!("Opening /submit connection from {:?}", addr);
|
||||
ws.on_upgrade(move |websocket| async move {
|
||||
handle_websocket_connection(websocket, tx_to_aggregator, addr).await;
|
||||
let (mut tx_to_aggregator, websocket) = handle_websocket_connection(websocket, tx_to_aggregator, addr).await;
|
||||
log::info!("Closing /submit connection from {:?}", addr);
|
||||
// Tell the aggregator that this connection has closed, so it can tidy up.
|
||||
let _ = tx_to_aggregator.send(FromWebsocket::Disconnected).await;
|
||||
// Note: IF we want to close with a status code and reason, we need to construct
|
||||
// a ws::Message using `ws::Message::close_with`, rather than using this method:
|
||||
let _ = websocket.close().await;
|
||||
})
|
||||
});
|
||||
|
||||
@@ -101,53 +106,38 @@ async fn start_server(opts: Opts) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
/// This takes care of handling messages from an established socket connection.
|
||||
async fn handle_websocket_connection<S>(websocket: ws::WebSocket, mut tx_to_aggregator: S, addr: Option<SocketAddr>)
|
||||
async fn handle_websocket_connection<S>(mut websocket: ws::WebSocket, mut tx_to_aggregator: S, addr: Option<SocketAddr>) -> (S, ws::WebSocket)
|
||||
where S: futures::Sink<FromWebsocket, Error = anyhow::Error> + Unpin
|
||||
{
|
||||
let mut websocket = websocket.fuse();
|
||||
|
||||
// This could be a oneshot channel, but it's useful to be able to clone
|
||||
// messages, and we can't clone oneshot channel senders.
|
||||
let (close_connection_tx, mut close_connection_rx) = mpsc::channel(0);
|
||||
|
||||
// First, we wait until we receive a SystemConnected message.
|
||||
// Until this turns up, we ignore other messages. We could buffer
|
||||
// a few quite easily if we liked.
|
||||
while let Some(msg) = websocket.next().await {
|
||||
let node_message = match deserialize_ws_message(msg) {
|
||||
Ok(Some(msg)) => msg,
|
||||
Ok(None) => continue,
|
||||
Err(e) => { log::error!("{}", e); break }
|
||||
};
|
||||
|
||||
let message_id = node_message.id();
|
||||
let payload = node_message.into_payload();
|
||||
|
||||
if let node::Payload::SystemConnected(info) = payload {
|
||||
let _ = tx_to_aggregator.send(FromWebsocket::Add {
|
||||
message_id,
|
||||
ip: addr.map(|a| a.ip()),
|
||||
node: info.node,
|
||||
close_connection: close_connection_tx,
|
||||
}).await;
|
||||
break;
|
||||
}
|
||||
// Tell the aggregator about this new connection, and give it a way to close this connection:
|
||||
let init_msg = FromWebsocket::Initialize {
|
||||
close_connection: close_connection_tx
|
||||
};
|
||||
if let Err(e) = tx_to_aggregator.send(init_msg).await {
|
||||
log::error!("Error sending message to aggregator: {}", e);
|
||||
return (tx_to_aggregator, websocket);
|
||||
}
|
||||
|
||||
// Now, the node has been added, so we forward messages along as updates.
|
||||
// We keep an eye on the close_connection channel; if that resolves, then
|
||||
// end this loop and let the connection close gracefully.
|
||||
// Now we've "initialized", wait for messages from the node. Messages will
|
||||
// either be `SystemConnected` type messages that inform us that a new set
|
||||
// of messages with some message ID will be sent (a node could have more
|
||||
// than one of these), or updates linked to a specific message_id.
|
||||
loop {
|
||||
futures::select_biased! {
|
||||
tokio::select! {
|
||||
// The close channel has fired, so end the loop:
|
||||
_ = close_connection_rx.next() => {
|
||||
log::info!("connection to {:?} being closed by aggregator", addr);
|
||||
break
|
||||
},
|
||||
// A message was received; handle it:
|
||||
msg = websocket.next() => {
|
||||
let msg = match msg {
|
||||
Some(msg) => msg,
|
||||
None => break
|
||||
None => { log::warn!("Websocket connection from {:?} closed", addr); break }
|
||||
};
|
||||
|
||||
let node_message = match deserialize_ws_message(msg) {
|
||||
@@ -159,7 +149,19 @@ async fn handle_websocket_connection<S>(websocket: ws::WebSocket, mut tx_to_aggr
|
||||
let message_id = node_message.id();
|
||||
let payload = node_message.into_payload();
|
||||
|
||||
if let Err(e) = tx_to_aggregator.send(FromWebsocket::Update { message_id, payload } ).await {
|
||||
// Until the aggregator receives an `Add` message, which we can create once
|
||||
// we see one of these SystemConnected ones, it will ignore messages with
|
||||
// the corresponding message_id.
|
||||
if let node::Payload::SystemConnected(info) = payload {
|
||||
let _ = tx_to_aggregator.send(FromWebsocket::Add {
|
||||
message_id,
|
||||
ip: addr.map(|a| a.ip()),
|
||||
node: info.node,
|
||||
}).await;
|
||||
}
|
||||
// Anything that's not an "Add" is an Update. The aggregator will ignore
|
||||
// updates against a message_id that hasn't first been Added, above.
|
||||
else if let Err(e) = tx_to_aggregator.send(FromWebsocket::Update { message_id, payload } ).await {
|
||||
log::error!("Failed to send node message to aggregator: {}", e);
|
||||
continue;
|
||||
}
|
||||
@@ -167,10 +169,8 @@ async fn handle_websocket_connection<S>(websocket: ws::WebSocket, mut tx_to_aggr
|
||||
}
|
||||
}
|
||||
|
||||
// loops ended; attempt to close the connection gracefully.
|
||||
// Note: IF we want to close with a status code and reason, we need to construct
|
||||
// a ws::Message using `ws::Message::close_with`, rather than using this method:
|
||||
let _ = websocket.close().await;
|
||||
// Return what we need to close the connection gracefully:
|
||||
(tx_to_aggregator, websocket)
|
||||
}
|
||||
|
||||
/// Deserialize an incoming websocket message, returning an error if something
|
||||
|
||||
Reference in New Issue
Block a user