mirror of
https://github.com/pezkuwichain/pezkuwi-telemetry.git
synced 2026-06-17 13:51:02 +00:00
add --num-cpus option
This commit is contained in:
Generated
+2
@@ -1271,6 +1271,7 @@ dependencies = [
|
|||||||
"http",
|
"http",
|
||||||
"hyper",
|
"hyper",
|
||||||
"log",
|
"log",
|
||||||
|
"num_cpus",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
"primitive-types",
|
"primitive-types",
|
||||||
@@ -1301,6 +1302,7 @@ dependencies = [
|
|||||||
"http",
|
"http",
|
||||||
"hyper",
|
"hyper",
|
||||||
"log",
|
"log",
|
||||||
|
"num_cpus",
|
||||||
"primitive-types",
|
"primitive-types",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ hex = "0.4.3"
|
|||||||
http = "0.2.4"
|
http = "0.2.4"
|
||||||
hyper = "0.14.11"
|
hyper = "0.14.11"
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
|
num_cpus = "1.13.0"
|
||||||
once_cell = "1.8.0"
|
once_cell = "1.8.0"
|
||||||
parking_lot = "0.11.1"
|
parking_lot = "0.11.1"
|
||||||
primitive-types = { version = "0.9.0", features = ["serde"] }
|
primitive-types = { version = "0.9.0", features = ["serde"] }
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ impl Aggregator {
|
|||||||
/// Return a sink that a feed can send messages into to be handled by the aggregator.
|
/// Return a sink that a feed can send messages into to be handled by the aggregator.
|
||||||
pub fn subscribe_feed(
|
pub fn subscribe_feed(
|
||||||
&self,
|
&self,
|
||||||
) -> impl Sink<inner_loop::FromFeedWebsocket, Error = anyhow::Error> + Send + Sync + Unpin + 'static
|
) -> (u64, impl Sink<inner_loop::FromFeedWebsocket, Error = anyhow::Error> + Send + Sync + Unpin + 'static)
|
||||||
{
|
{
|
||||||
// Assign a unique aggregator-local ID to each connection that subscribes, and pass
|
// Assign a unique aggregator-local ID to each connection that subscribes, and pass
|
||||||
// that along with every message to the aggregator loop:
|
// that along with every message to the aggregator loop:
|
||||||
@@ -125,11 +125,11 @@ impl Aggregator {
|
|||||||
|
|
||||||
// Calling `send` on this Sink requires Unpin. There may be a nicer way than this,
|
// Calling `send` on this Sink requires Unpin. There may be a nicer way than this,
|
||||||
// but pinning by boxing is the easy solution for now:
|
// but pinning by boxing is the easy solution for now:
|
||||||
Box::pin(tx_to_aggregator.with(move |msg| async move {
|
(feed_conn_id, Box::pin(tx_to_aggregator.with(move |msg| async move {
|
||||||
Ok(inner_loop::ToAggregator::FromFeedWebsocket(
|
Ok(inner_loop::ToAggregator::FromFeedWebsocket(
|
||||||
feed_conn_id.into(),
|
feed_conn_id.into(),
|
||||||
msg,
|
msg,
|
||||||
))
|
))
|
||||||
}))
|
})))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ mod feed_message;
|
|||||||
mod find_location;
|
mod find_location;
|
||||||
mod state;
|
mod state;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::atomic::AtomicUsize;
|
|
||||||
use tokio::time::{Duration, Instant};
|
use tokio::time::{Duration, Instant};
|
||||||
|
|
||||||
use aggregator::{
|
use aggregator::{
|
||||||
@@ -60,10 +59,13 @@ struct Opts {
|
|||||||
/// to a feed, the feed connection will be closed.
|
/// to a feed, the feed connection will be closed.
|
||||||
#[structopt(long, default_value = "10")]
|
#[structopt(long, default_value = "10")]
|
||||||
feed_timeout: u64,
|
feed_timeout: u64,
|
||||||
|
/// Number of worker threads to spawn. Defaults to the number of CPUs on the machine.
|
||||||
|
/// If "0" is given, use the number of CPUs available on the machine.
|
||||||
|
#[structopt(long)]
|
||||||
|
num_cpus: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
fn main() {
|
||||||
async fn main() {
|
|
||||||
let opts = Opts::from_args();
|
let opts = Opts::from_args();
|
||||||
|
|
||||||
SimpleLogger::new()
|
SimpleLogger::new()
|
||||||
@@ -73,9 +75,20 @@ async fn main() {
|
|||||||
|
|
||||||
log::info!("Starting Telemetry Core version: {}", VERSION);
|
log::info!("Starting Telemetry Core version: {}", VERSION);
|
||||||
|
|
||||||
if let Err(e) = start_server(opts).await {
|
let num_cpus_to_use = opts.num_cpus
|
||||||
log::error!("Error starting server: {}", e);
|
.and_then(|n| if n == 0 { None } else { Some(n) })
|
||||||
}
|
.unwrap_or_else(|| num_cpus::get());
|
||||||
|
|
||||||
|
tokio::runtime::Builder::new_multi_thread()
|
||||||
|
.enable_all()
|
||||||
|
.worker_threads(num_cpus_to_use)
|
||||||
|
.build()
|
||||||
|
.unwrap()
|
||||||
|
.block_on(async {
|
||||||
|
if let Err(e) = start_server(opts).await {
|
||||||
|
log::error!("Error starting server: {}", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Declare our routes and start the server.
|
/// Declare our routes and start the server.
|
||||||
@@ -95,13 +108,14 @@ async fn start_server(opts: Opts) -> anyhow::Result<()> {
|
|||||||
Ok(http_utils::upgrade_to_websocket(
|
Ok(http_utils::upgrade_to_websocket(
|
||||||
req,
|
req,
|
||||||
move |ws_send, ws_recv| async move {
|
move |ws_send, ws_recv| async move {
|
||||||
let tx_to_aggregator = aggregator.subscribe_feed();
|
let (feed_id, tx_to_aggregator) = aggregator.subscribe_feed();
|
||||||
let (mut tx_to_aggregator, mut ws_send) =
|
let (mut tx_to_aggregator, mut ws_send) =
|
||||||
handle_feed_websocket_connection(
|
handle_feed_websocket_connection(
|
||||||
ws_send,
|
ws_send,
|
||||||
ws_recv,
|
ws_recv,
|
||||||
tx_to_aggregator,
|
tx_to_aggregator,
|
||||||
feed_timeout,
|
feed_timeout,
|
||||||
|
feed_id,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
log::info!("Closing /feed connection from {:?}", addr);
|
log::info!("Closing /feed connection from {:?}", addr);
|
||||||
@@ -291,6 +305,7 @@ async fn handle_feed_websocket_connection<S>(
|
|||||||
mut ws_recv: http_utils::WsReceiver,
|
mut ws_recv: http_utils::WsReceiver,
|
||||||
mut tx_to_aggregator: S,
|
mut tx_to_aggregator: S,
|
||||||
feed_timeout: u64,
|
feed_timeout: u64,
|
||||||
|
feed_id: u64
|
||||||
) -> (S, http_utils::WsSender)
|
) -> (S, http_utils::WsSender)
|
||||||
where
|
where
|
||||||
S: futures::Sink<FromFeedWebsocket, Error = anyhow::Error> + Unpin + Send + 'static,
|
S: futures::Sink<FromFeedWebsocket, Error = anyhow::Error> + Unpin + Send + 'static,
|
||||||
@@ -364,34 +379,48 @@ where
|
|||||||
drop(send_closer_tx); // Kill the send task if this recv task ends
|
drop(send_closer_tx); // Kill the send task if this recv task ends
|
||||||
tx_to_aggregator
|
tx_to_aggregator
|
||||||
});
|
});
|
||||||
|
let mut i: u64 = 0;
|
||||||
// Send messages to the feed:
|
// Send messages to the feed:
|
||||||
let send_handle = tokio::spawn(async move {
|
let send_handle = tokio::spawn(async move {
|
||||||
'outer: loop {
|
'outer: loop {
|
||||||
|
let debounce = tokio::time::sleep_until(Instant::now() + Duration::from_millis(75));
|
||||||
|
|
||||||
let msgs = tokio::select! {
|
let msgs = tokio::select! {
|
||||||
msgs = rx_from_aggregator_chunks.next() => msgs,
|
msgs = rx_from_aggregator_chunks.next() => msgs,
|
||||||
_ = &mut send_closer_rx => { break }
|
_ = &mut send_closer_rx => { break }
|
||||||
};
|
};
|
||||||
|
|
||||||
// End the loop when connection from aggregator ends:
|
// End the loop when connection from aggregator ends:
|
||||||
let msgs = match msgs {
|
let msgs = match msgs {
|
||||||
Some(msgs) => msgs,
|
Some(msgs) => msgs,
|
||||||
None => break,
|
None => break,
|
||||||
};
|
};
|
||||||
|
|
||||||
let total_val = unsafe { total.load(std::sync::atomic::Ordering::Relaxed) };
|
if feed_id == 1 {
|
||||||
if msgs.len() > total_val {
|
i += 1;
|
||||||
unsafe { total.compare_exchange(total_val, msgs.len(), std::sync::atomic::Ordering::Relaxed, std::sync::atomic::Ordering::Relaxed); };
|
println!("FEED #{}, msgs: {}", i, msgs.len());
|
||||||
println!("Max msgs: {}", msgs.len());
|
if i > 1000 {
|
||||||
|
log::error!("TESTING: close feed");
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// End the loop when there are more than 10k messages queued up.
|
||||||
|
// This number is just picked as a fairly high limit that should account
|
||||||
|
// for many thousands of nodes on a chain. The higher this number is, the
|
||||||
|
// larger our channel storage and memory usage is liable to grow before the feed
|
||||||
|
// is dropped.
|
||||||
|
if msgs.len() > 100_000 {
|
||||||
|
log::warn!("Closing feed websocket that was too slow to keep up (too many messages buffered)");
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
|
||||||
// There is only one message type at the mo; bytes to send
|
// There is only one message type at the mo; bytes to send
|
||||||
// to the websocket. collect them all up to dispatch in one shot.
|
// to the websocket. collect them all up to dispatch in one shot.
|
||||||
let all_msg_bytes = msgs.into_iter().map(|msg| match msg {
|
let all_msg_bytes = msgs.into_iter().map(|msg| match msg {
|
||||||
ToFeedWebsocket::Bytes(bytes) => bytes,
|
ToFeedWebsocket::Bytes(bytes) => bytes,
|
||||||
});
|
});
|
||||||
|
|
||||||
// We have a deadline to send and flush messages. If the client isn't keeping up with our
|
// If the feed is too slow to receive the current batch of messages, we'll drop it.
|
||||||
// messages, the number we obtain from `ReadyChunksAll` will gradually increase and eventually
|
|
||||||
// we'll hit this deadline and the client will be booted.
|
|
||||||
let message_send_deadline = Instant::now() + Duration::from_secs(feed_timeout);
|
let message_send_deadline = Instant::now() + Duration::from_secs(feed_timeout);
|
||||||
|
|
||||||
for bytes in all_msg_bytes {
|
for bytes in all_msg_bytes {
|
||||||
@@ -399,7 +428,7 @@ if msgs.len() > total_val {
|
|||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
log::warn!("Closing feed websocket that was too slow to keep up (1)");
|
log::warn!("Closing feed websocket that was too slow to keep up (too slow to send messages)");
|
||||||
break 'outer;
|
break 'outer;
|
||||||
}
|
}
|
||||||
Ok(Err(e)) => {
|
Ok(Err(e)) => {
|
||||||
@@ -411,7 +440,7 @@ if msgs.len() > total_val {
|
|||||||
}
|
}
|
||||||
match tokio::time::timeout_at(message_send_deadline, ws_send.flush()).await {
|
match tokio::time::timeout_at(message_send_deadline, ws_send.flush()).await {
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
log::warn!("Closing feed websocket that was too slow to keep up (2)");
|
log::warn!("Closing feed websocket that was too slow to keep up (too slow to flush messages)");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Ok(Err(e)) => {
|
Ok(Err(e)) => {
|
||||||
@@ -420,6 +449,8 @@ if msgs.len() > total_val {
|
|||||||
}
|
}
|
||||||
Ok(_) => {}
|
Ok(_) => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
debounce.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
drop(recv_closer_tx); // Kill the recv task if this send task ends
|
drop(recv_closer_tx); // Kill the recv task if this send task ends
|
||||||
@@ -434,5 +465,3 @@ if msgs.len() > total_val {
|
|||||||
// loop ended; give socket back to parent:
|
// loop ended; give socket back to parent:
|
||||||
(tx_to_aggregator, ws_send)
|
(tx_to_aggregator, ws_send)
|
||||||
}
|
}
|
||||||
|
|
||||||
static mut total: std::sync::atomic::AtomicUsize = AtomicUsize::new(0);
|
|
||||||
|
|||||||
@@ -22,10 +22,10 @@ able to open a large number of connections and run some of the tests.
|
|||||||
Try running these:
|
Try running these:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
sudo sysctl -w kern.maxfiles=50000
|
sudo sysctl -w kern.maxfiles=100000
|
||||||
sudo sysctl -w kern.maxfilesperproc=50000
|
sudo sysctl -w kern.maxfilesperproc=100000
|
||||||
ulimit -n 50000
|
ulimit -n 100000
|
||||||
sudo sysctl -w kern.ipc.somaxconn=50000
|
sudo sysctl -w kern.ipc.somaxconn=100000
|
||||||
sudo sysctl -w kern.ipc.maxsockbuf=16777216
|
sudo sysctl -w kern.ipc.maxsockbuf=16777216
|
||||||
```
|
```
|
||||||
*/
|
*/
|
||||||
@@ -580,6 +580,7 @@ async fn slow_feeds_are_disconnected() {
|
|||||||
// Timeout faster so the test can be quicker:
|
// Timeout faster so the test can be quicker:
|
||||||
CoreOpts {
|
CoreOpts {
|
||||||
feed_timeout: Some(1),
|
feed_timeout: Some(1),
|
||||||
|
..Default::default()
|
||||||
},
|
},
|
||||||
// Allow us to send more messages in more easily:
|
// Allow us to send more messages in more easily:
|
||||||
ShardOpts {
|
ShardOpts {
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ hex = "0.4.3"
|
|||||||
http = "0.2.4"
|
http = "0.2.4"
|
||||||
hyper = "0.14.11"
|
hyper = "0.14.11"
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
|
num_cpus = "1.13.0"
|
||||||
primitive-types = { version = "0.9.0", features = ["serde"] }
|
primitive-types = { version = "0.9.0", features = ["serde"] }
|
||||||
serde = { version = "1.0.126", features = ["derive"] }
|
serde = { version = "1.0.126", features = ["derive"] }
|
||||||
serde_json = "1.0.64"
|
serde_json = "1.0.64"
|
||||||
|
|||||||
@@ -80,10 +80,13 @@ struct Opts {
|
|||||||
/// value prevented from reconnecting to this shard for, in seconds.
|
/// value prevented from reconnecting to this shard for, in seconds.
|
||||||
#[structopt(long, default_value = "600")]
|
#[structopt(long, default_value = "600")]
|
||||||
node_block_seconds: u64,
|
node_block_seconds: u64,
|
||||||
|
/// Number of worker threads to spawn. Defaults to the number of CPUs on the machine.
|
||||||
|
/// If "0" is given, use the number of CPUs available on the machine.
|
||||||
|
#[structopt(long)]
|
||||||
|
num_cpus: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
fn main() {
|
||||||
async fn main() {
|
|
||||||
let opts = Opts::from_args();
|
let opts = Opts::from_args();
|
||||||
|
|
||||||
SimpleLogger::new()
|
SimpleLogger::new()
|
||||||
@@ -93,9 +96,20 @@ async fn main() {
|
|||||||
|
|
||||||
log::info!("Starting Telemetry Shard version: {}", VERSION);
|
log::info!("Starting Telemetry Shard version: {}", VERSION);
|
||||||
|
|
||||||
if let Err(e) = start_server(opts).await {
|
let num_cpus_to_use = opts.num_cpus
|
||||||
log::error!("Error starting server: {}", e);
|
.and_then(|n| if n == 0 { None } else { Some(n) })
|
||||||
}
|
.unwrap_or_else(|| num_cpus::get());
|
||||||
|
|
||||||
|
tokio::runtime::Builder::new_multi_thread()
|
||||||
|
.enable_all()
|
||||||
|
.worker_threads(num_cpus_to_use)
|
||||||
|
.build()
|
||||||
|
.unwrap()
|
||||||
|
.block_on(async {
|
||||||
|
if let Err(e) = start_server(opts).await {
|
||||||
|
log::error!("Error starting server: {}", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Declare our routes and start the server.
|
/// Declare our routes and start the server.
|
||||||
|
|||||||
@@ -20,11 +20,15 @@ use crate::server::{self, Command, Server};
|
|||||||
/// Additional options to pass to the core command.
|
/// Additional options to pass to the core command.
|
||||||
pub struct CoreOpts {
|
pub struct CoreOpts {
|
||||||
pub feed_timeout: Option<u64>,
|
pub feed_timeout: Option<u64>,
|
||||||
|
pub num_cpus: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for CoreOpts {
|
impl Default for CoreOpts {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { feed_timeout: None }
|
Self {
|
||||||
|
feed_timeout: None,
|
||||||
|
num_cpus: None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -33,6 +37,7 @@ pub struct ShardOpts {
|
|||||||
pub max_nodes_per_connection: Option<usize>,
|
pub max_nodes_per_connection: Option<usize>,
|
||||||
pub max_node_data_per_second: Option<usize>,
|
pub max_node_data_per_second: Option<usize>,
|
||||||
pub node_block_seconds: Option<u64>,
|
pub node_block_seconds: Option<u64>,
|
||||||
|
pub num_cpus: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for ShardOpts {
|
impl Default for ShardOpts {
|
||||||
@@ -41,6 +46,7 @@ impl Default for ShardOpts {
|
|||||||
max_nodes_per_connection: None,
|
max_nodes_per_connection: None,
|
||||||
max_node_data_per_second: None,
|
max_node_data_per_second: None,
|
||||||
node_block_seconds: None,
|
node_block_seconds: None,
|
||||||
|
num_cpus: None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -114,6 +120,11 @@ pub async fn start_server(
|
|||||||
.arg("--node-block-seconds")
|
.arg("--node-block-seconds")
|
||||||
.arg(val.to_string());
|
.arg(val.to_string());
|
||||||
}
|
}
|
||||||
|
if let Some(val) = shard_opts.num_cpus {
|
||||||
|
shard_command = shard_command
|
||||||
|
.arg("--num-cpus")
|
||||||
|
.arg(val.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
// Build the core command
|
// Build the core command
|
||||||
let mut core_command = std::env::var("TELEMETRY_CORE_BIN")
|
let mut core_command = std::env::var("TELEMETRY_CORE_BIN")
|
||||||
@@ -127,6 +138,9 @@ pub async fn start_server(
|
|||||||
if let Some(val) = core_opts.feed_timeout {
|
if let Some(val) = core_opts.feed_timeout {
|
||||||
core_command = core_command.arg("--feed-timeout").arg(val.to_string());
|
core_command = core_command.arg("--feed-timeout").arg(val.to_string());
|
||||||
}
|
}
|
||||||
|
if let Some(val) = core_opts.num_cpus {
|
||||||
|
core_command = core_command.arg("--num-cpus").arg(val.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
// Star the server
|
// Star the server
|
||||||
Server::start(server::StartOpts::ShardAndCore {
|
Server::start(server::StartOpts::ShardAndCore {
|
||||||
|
|||||||
Reference in New Issue
Block a user