make possible to test, test, and fix feed cutoff

This commit is contained in:
James Wilson
2021-07-26 16:38:24 +01:00
parent 50935b29fc
commit ecf5fccaab
5 changed files with 129 additions and 23 deletions
+2 -2
View File
@@ -67,8 +67,8 @@ impl Connection {
};
let msg = match message_data {
soketto::Data::Text(_) => Ok(RecvMessage::Binary(data)),
soketto::Data::Binary(_) => String::from_utf8(data)
soketto::Data::Binary(_) => Ok(RecvMessage::Binary(data)),
soketto::Data::Text(_) => String::from_utf8(data)
.map(|s| RecvMessage::Text(s))
.map_err(|e| e.into()),
};
+35 -14
View File
@@ -33,12 +33,16 @@ struct Opts {
socket: std::net::SocketAddr,
/// The desired log level; one of 'error', 'warn', 'info', 'debug' or 'trace', where
/// 'error' only logs errors and 'trace' logs everything.
#[structopt(required = false, long = "log", default_value = "info")]
#[structopt(long = "log", default_value = "info")]
log_level: log::LevelFilter,
/// Space delimited list of the names of chains that are not allowed to connect to
/// telemetry. Case sensitive.
#[structopt(required = false, long = "denylist")]
#[structopt(long, required = false)]
denylist: Vec<String>,
/// If it takes longer than this number of seconds to send the current batch of messages
/// to a feed, the feed connection will be closed.
#[structopt(long, default_value = "10")]
feed_timeout: u64
}
#[tokio::main]
@@ -60,7 +64,10 @@ async fn main() {
/// Declare our routes and start the server.
async fn start_server(opts: Opts) -> anyhow::Result<()> {
let aggregator = Aggregator::spawn(opts.denylist).await?;
let server = http_utils::start_server(opts.socket, move |addr, req| {
let socket_addr = opts.socket;
let feed_timeout = opts.feed_timeout;
let server = http_utils::start_server(socket_addr, move |addr, req| {
let aggregator = aggregator.clone();
async move {
match (req.method(), req.uri().path().trim_end_matches('/')) {
@@ -73,7 +80,7 @@ async fn start_server(opts: Opts) -> anyhow::Result<()> {
Ok(http_utils::upgrade_to_websocket(req, move |ws_send, ws_recv| async move {
let tx_to_aggregator = aggregator.subscribe_feed();
let (mut tx_to_aggregator, mut ws_send)
= handle_feed_websocket_connection(ws_send, ws_recv, tx_to_aggregator).await;
= handle_feed_websocket_connection(ws_send, ws_recv, tx_to_aggregator, feed_timeout).await;
log::info!("Closing /feed connection from {:?}", addr);
// Tell the aggregator that this connection has closed, so it can tidy up.
let _ = tx_to_aggregator.send(FromFeedWebsocket::Disconnected).await;
@@ -234,6 +241,7 @@ async fn handle_feed_websocket_connection<S>(
mut ws_send: http_utils::WsSender,
mut ws_recv: http_utils::WsReceiver,
mut tx_to_aggregator: S,
feed_timeout: u64
) -> (S, http_utils::WsSender)
where
S: futures::Sink<FromFeedWebsocket, Error = anyhow::Error> + Unpin + Send + 'static,
@@ -304,7 +312,7 @@ where
// Send messages to the feed:
let send_handle = tokio::spawn(async move {
loop {
'outer: loop {
let debounce = tokio::time::sleep_until(Instant::now() + Duration::from_millis(75));
let msgs = tokio::select! {
@@ -326,21 +334,34 @@ where
}
});
// We have 10 seconds to send and flush messages. If the client isn't keeping up with our
// We have a deadline to send and flush messages. If the client isn't keeping up with our
// messages, the number we obtain from `ReadyChunksAll` will gradually increase and eventually
// we'll hit this deadline and the client will be booted.
let message_send_deadline = Instant::now() + Duration::from_secs(10);
let message_send_deadline = Instant::now() + Duration::from_secs(feed_timeout);
for bytes in all_msg_bytes {
if let Err(e) = ws_send.send_binary(&bytes).await {
log::warn!("Closing feed websocket due to error sending data: {}", e);
break;
match tokio::time::timeout_at(message_send_deadline, ws_send.send_binary(&bytes)).await {
Err(_) => {
log::warn!("Closing feed websocket that was too slow to keep up (1)");
break 'outer;
}
Ok(Err(e)) => {
log::warn!("Closing feed websocket due to error sending data: {}", e);
break 'outer;
}
Ok(_) => {}
}
}
if let Err(e) = tokio::time::timeout_at(message_send_deadline, ws_send.flush()).await {
log::warn!("Closing feed websocket due to error flushing data: {}", e);
break;
match tokio::time::timeout_at(message_send_deadline, ws_send.flush()).await {
Err(_) => {
log::warn!("Closing feed websocket that was too slow to keep up (2)");
break
}
Ok(Err(e)) => {
log::warn!("Closing feed websocket due to error flushing data: {}", e);
break;
}
Ok(_) => {}
}
debounce.await;
+62 -1
View File
@@ -6,7 +6,7 @@ use std::time::Duration;
use test_utils::{
assert_contains_matches,
feed_message_de::{FeedMessage, NodeDetails},
workspace::start_server_debug
workspace::{ start_server, CoreOpts, start_server_debug }
};
/// The simplest test we can run; the main benefit of this test (since we check similar)
@@ -476,3 +476,64 @@ async fn feed_can_subscribe_and_unsubscribe_from_chain() {
// Tidy up:
server.shutdown().await;
}
/// Feeds will be disconnected if they can't receive messages quickly enough.
#[tokio::test]
async fn slow_feeds_are_disconnected() {
// Start server in release mode with a 1s feed timeout (to make the test run faster):
let mut server = start_server(
true,
CoreOpts { feed_timeout: Some(1) }
).await;
// Give us a shard to talk to:
let shard_id = server.add_shard().await.unwrap();
let (mut node_tx, _node_rx) = server.get_shard(shard_id).unwrap().connect_node().await.unwrap();
// Add a load of nodes from this shard so there's plenty of data to give to a feed.
// We want to exhaust any buffers between core and feed (eg BufWriters).
for n in 1..50_000 {
node_tx.send_json_text(json!({
"id":n,
"ts":"2021-07-12T10:37:47.714666+01:00",
"payload": {
"authority":true,
"chain":"Polkadot",
"config":"",
"genesis_hash": BlockHash::from_low_u64_ne(1),
"implementation":"Substrate Node",
"msg":"system.connected",
"name": format!("Alice {}", n),
"network_id":"12D3KooWEyoppNCUx8Yx66oV9fJnriXwCcXwDDUA2kj6vnc6iDEp",
"startup_time":"1625565542717",
"version":"2.0.0-07a1af348-aarch64-macos"
}
})).unwrap();
}
// Connect a raw feed so that we can control how fast we consume data from the websocket
let (mut raw_feed_tx, mut raw_feed_rx) = server.get_core().connect_feed_raw().await.unwrap();
// Subscribe the feed:
raw_feed_tx.send_text("subscribe:Polkadot").await.unwrap();
// Wait a little.. the feed hasn't been receiving messages so it should
// be booted after ~a second.
tokio::time::sleep(Duration::from_secs(2)).await;
let mut v = Vec::new();
// Drain anything out and expect to hit a "closed" error.
let res = loop {
if let Err(e) = raw_feed_rx.receive_data(&mut v).await {
break e
}
};
assert!(
matches!(res, soketto::connection::Error::Closed),
"Should be Closed error, but is {:?}", res
);
// Tidy up:
server.shutdown().await;
}
+1 -1
View File
@@ -1,4 +1,4 @@
mod commands;
mod start_server;
pub use start_server::{ start_server_debug, start_server_release };
pub use start_server::*;
@@ -1,6 +1,19 @@
use super::commands;
use crate::server::{self, Server, Command};
/// Additional options to pass to the feed command.
pub struct CoreOpts {
pub feed_timeout: Option<u64>
}
impl Default for CoreOpts {
fn default() -> Self {
Self {
feed_timeout: None
}
}
}
/// Start a telemetry server. We'll use `cargo run` by default, but you can also provide
/// env vars to configure the binary that runs for the shard and core process. Either:
///
@@ -18,7 +31,7 @@ use crate::server::{self, Server, Command};
/// - `TELEMETRY_SUBMIT_HOSTS` - hosts (comma separated) to connect to for telemetry `/submit`s.
/// - `TELEMETRY_FEED_HOST` - host to connect to for feeds (eg 127.0.0.1:3000)
///
pub async fn start_server(release_mode: bool) -> Server {
pub async fn start_server(release_mode: bool, core_opts: CoreOpts) -> Server {
// Start to a single process:
if let Ok(bin) = std::env::var("TELEMETRY_BIN") {
return Server::start(server::StartOpts::SingleProcess {
@@ -38,13 +51,24 @@ pub async fn start_server(release_mode: bool) -> Server {
}).await.unwrap();
}
// Start a shard and core process:
// Build the shard command
let shard_command = std::env::var("TELEMETRY_SHARD_BIN")
.map(|val| Command::new(val))
.unwrap_or_else(|_| commands::cargo_run_telemetry_shard(release_mode).expect("must be in rust workspace to run shard command"));
let core_command = std::env::var("TELEMETRY_CORE_BIN")
// Build the core command
let mut core_command = std::env::var("TELEMETRY_CORE_BIN")
.map(|val| Command::new(val))
.unwrap_or_else(|_| commands::cargo_run_telemetry_core(release_mode).expect("must be in rust workspace to run core command"));
// Append additional opts to the core command
if let Some(feed_timeout) = core_opts.feed_timeout {
core_command = core_command
.arg("--feed-timeout")
.arg(feed_timeout.to_string());
}
// Star the server
Server::start(server::StartOpts::ShardAndCore {
shard_command,
core_command
@@ -53,10 +77,10 @@ pub async fn start_server(release_mode: bool) -> Server {
/// Start a telemetry core server in debug mode. see [`start_server`] for details.
pub async fn start_server_debug() -> Server {
start_server(false).await
start_server(false, CoreOpts::default()).await
}
/// Start a telemetry core server in release mode. see [`start_server`] for details.
pub async fn start_server_release() -> Server {
start_server(true).await
start_server(true, CoreOpts::default()).await
}