Handle removing a node, and a shard disconnecting (bulk remove)

This commit is contained in:
James Wilson
2021-06-25 17:21:24 +01:00
parent 4f60453689
commit 89dfad5bbe
6 changed files with 350 additions and 81 deletions
+134 -18
View File
@@ -238,21 +238,41 @@ impl InnerLoop {
},
state::AddNodeResult::NodeAddedToChain(details) => {
let node_id = details.id;
// Note the ID so that we know what node other messages are referring to:
// Record ID <-> (shardId,localId) for future messages:
self.node_ids.insert(node_id, (shard_conn_id, local_id));
let mut feed_serializer = FeedMessageSerializer::new();
feed_serializer.push(feed_message::AddedNode(node_id, details.node));
let chain_label = details.chain.label().to_owned();
// Don't hold onto details too long because we want &mut self later:
let old_chain_label = details.old_chain_label.to_owned();
let new_chain_label = details.new_chain_label.to_owned();
let chain_node_count = details.chain_node_count;
let has_chain_label_changed = details.has_chain_label_changed;
if let Some(bytes) = feed_serializer.into_finalized() {
// Tell chain subscribers about the node we've just added:
let mut feed_messages_for_chain = FeedMessageSerializer::new();
feed_messages_for_chain.push(feed_message::AddedNode(node_id, &details.node));
if let Some(bytes) = feed_messages_for_chain.into_finalized() {
self.broadcast_to_chain_feeds(
&chain_label,
&new_chain_label,
ToFeedWebsocket::Bytes(bytes)
).await
).await;
}
// Currently we only geographically locate IPV4 addresses so ignore IPV6;
// Tell everybody about the new node count and potential rename:
let mut feed_messages_for_all = FeedMessageSerializer::new();
if has_chain_label_changed {
feed_messages_for_all.push(feed_message::RemovedChain(&old_chain_label));
}
feed_messages_for_all.push(feed_message::AddedChain(&new_chain_label, chain_node_count));
if let Some(bytes) = feed_messages_for_all.into_finalized() {
let msg = ToFeedWebsocket::Bytes(bytes);
self.broadcast_to_all_feeds(msg).await;
}
// Ask for the grographical location of the node.
// Currently we only geographically locate IPV4 addresses so ignore IPV6.
if let Some(IpAddr::V4(ip_v4)) = ip {
let _ = self.tx_to_locator.send((node_id, ip_v4)).await;
}
@@ -260,9 +280,14 @@ impl InnerLoop {
}
},
FromShardWebsocket::Remove { local_id } => {
if let Some(node_id) = self.node_ids.remove_by_right(&(shard_conn_id, local_id)) {
// TODO: node_state.remove_node, Every feed should know about node count changes.
}
let node_id = match self.node_ids.remove_by_right(&(shard_conn_id, local_id)) {
Some((node_id, _)) => node_id,
None => {
log::error!("Cannot find ID for node with shard/connectionId of {}/{}", shard_conn_id, local_id);
return
}
};
self.remove_nodes_and_broadcast_result(Some(node_id)).await;
},
FromShardWebsocket::Update { local_id, payload } => {
// TODO: Fill this all in...
@@ -309,8 +334,15 @@ impl InnerLoop {
// TODO: node_state.update_node, then handle returned diffs
},
FromShardWebsocket::Disconnected => {
// The shard has disconnected; remove the shard channel, but also
// remove any nodes associated with the shard, firing the relevant feed messages.
// Find all nodes associated with this shard connection ID:
let node_ids_to_remove: Vec<NodeId> = self.node_ids
.iter()
.filter(|(_, &(this_shard_conn_id, _))| shard_conn_id == this_shard_conn_id)
.map(|(&node_id,_)| node_id)
.collect();
// ... and remove them:
self.remove_nodes_and_broadcast_result(node_ids_to_remove).await;
}
}
}
@@ -388,7 +420,7 @@ impl InnerLoop {
chain.finalized_block().height,
chain.finalized_block().hash
));
for (idx, (gid, node)) in chain.nodes().enumerate() {
for (idx, (node_id, node)) in chain.iter_nodes().enumerate() {
// Send subscription confirmation and chain head before doing all the nodes,
// and continue sending batches of 32 nodes a time over the wire subsequently
if idx % 32 == 0 {
@@ -396,14 +428,14 @@ impl InnerLoop {
let _ = feed_channel.send(ToFeedWebsocket::Bytes(bytes)).await;
}
}
feed_serializer.push(feed_message::AddedNode(gid, node));
feed_serializer.push(feed_message::AddedNode(node_id, node));
feed_serializer.push(feed_message::FinalizedBlock(
gid,
node_id,
node.finalized().height,
node.finalized().hash,
));
if node.stale() {
feed_serializer.push(feed_message::StaleNode(gid));
feed_serializer.push(feed_message::StaleNode(node_id));
}
}
if let Some(bytes) = feed_serializer.into_finalized() {
@@ -431,6 +463,81 @@ impl InnerLoop {
}
}
/// Remove all of the node IDs provided and broadcast messages to feeds as needed.
async fn remove_nodes_and_broadcast_result(&mut self, node_ids: impl IntoIterator<Item=NodeId>) {
// Group by chain to simplify the handling of feed messages:
let mut node_ids_per_chain: HashMap<String,Vec<NodeId>> = HashMap::new();
for node_id in node_ids.into_iter() {
if let Some(chain) = self.node_state.get_node_chain(node_id) {
let chain_label = chain.label().to_owned();
node_ids_per_chain.entry(chain_label).or_default().push(node_id);
}
}
// Remove the nodes for each chain
let mut feed_messages_for_all = FeedMessageSerializer::new();
for (chain_label, node_ids) in node_ids_per_chain {
let mut feed_messages_for_chain = FeedMessageSerializer::new();
for node_id in node_ids {
self.remove_node(
node_id,
&mut feed_messages_for_chain,
&mut feed_messages_for_all
);
}
if let Some(bytes) = feed_messages_for_chain.into_finalized() {
self.broadcast_to_chain_feeds(&chain_label, ToFeedWebsocket::Bytes(bytes)).await;
}
}
if let Some(bytes) = feed_messages_for_all.into_finalized() {
self.broadcast_to_all_feeds(ToFeedWebsocket::Bytes(bytes)).await;
}
}
/// Remove a single node by its ID, pushing any messages we'd want to send
/// out to feeds onto the provided feed serializers. Doesn't actually send
/// anything to the feeds; just updates state as needed.
fn remove_node(
&mut self,
node_id: NodeId,
feed_for_chain: &mut FeedMessageSerializer,
feed_for_all: &mut FeedMessageSerializer
) {
// Remove our top level association (this may already have been done).
self.node_ids.remove_by_left(&node_id);
let removed_details = match self.node_state.remove_node(node_id) {
Ok(remove_details) => remove_details,
Err(err) => {
log::error!("Error removing node {}: {}", node_id, err);
return
}
};
// The chain has been removed (no nodes left in it, or it was renamed):
if removed_details.chain_node_count == 0 || removed_details.has_chain_label_changed {
feed_for_all.push(feed_message::RemovedChain(
&removed_details.old_chain_label
));
}
// If the chain still exists, tell everybody about the new label or updated node count:
if removed_details.chain_node_count != 0 {
feed_for_all.push(
feed_message::AddedChain(&removed_details.new_chain_label, removed_details.chain_node_count)
);
}
// Assuming the chain hasn't gone away, tell chain subscribers about the node removal
if removed_details.chain_node_count != 0 {
feed_for_chain.push(
feed_message::RemovedNode(node_id)
);
}
}
/// Send a message to all chain feeds.
async fn broadcast_to_chain_feeds(&mut self, chain: &str, message: ToFeedWebsocket) {
if let Some(feeds) = self.chain_to_feed_conn_ids.get(chain) {
@@ -438,9 +545,18 @@ impl InnerLoop {
// How much faster would it be if we processed these in parallel?
// Is it practical to do so given lifetimes and such?
if let Some(chan) = self.feed_channels.get_mut(&feed_id) {
chan.send(message.clone()).await;
let _ = chan.send(message.clone()).await;
}
}
}
}
/// Send a message to everybody.
async fn broadcast_to_all_feeds(&mut self, message: ToFeedWebsocket) {
for chan in self.feed_channels.values_mut() {
// How much faster would it be if we processed these in parallel?
// Is it practical to do so given lifetimes and such?
let _ = chan.send(message.clone()).await;
}
}
}
+37 -20
View File
@@ -19,7 +19,7 @@ pub struct Chain {
/// the most commonly used label as nodes are added/removed.
labels: MostSeen<Label>,
/// Set of nodes that are in this chain
nodes: HashMap<NodeId, Node>,
node_ids: HashSet<NodeId>,
/// Best block
best: Block,
/// Finalized block
@@ -29,7 +29,9 @@ pub struct Chain {
/// Calculated average block time
average_block_time: Option<u64>,
/// When the best block first arrived
timestamp: Option<Timestamp>
timestamp: Option<Timestamp>,
/// Genesis hash of this chain
genesis_hash: BlockHash
}
pub enum AddNodeResult {
@@ -39,6 +41,10 @@ pub enum AddNodeResult {
}
}
pub struct RemoveNodeResult {
pub chain_renamed: bool
}
/// Labels of chains we consider "first party". These chains allow any
/// number of nodes to connect.
static FIRST_PARTY_NETWORKS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
@@ -55,52 +61,60 @@ const THIRD_PARTY_NETWORKS_MAX_NODES: usize = 500;
impl Chain {
/// Create a new chain with an initial label.
pub fn new(label: Label) -> Self {
pub fn new(genesis_hash: BlockHash) -> Self {
Chain {
labels: MostSeen::new(label),
nodes: HashMap::new(),
labels: MostSeen::default(),
node_ids: HashSet::new(),
best: Block::zero(),
finalized: Block::zero(),
block_times: NumStats::new(50),
average_block_time: None,
timestamp: None
timestamp: None,
genesis_hash
}
}
/// Can we add a node? If not, it's because the chain is at its quota.
pub fn can_add_node(&self) -> bool {
// Dynamically determine the max nodes based on the most common
// label so far, in case it changes to something with a different limit.
self.nodes.len() < max_nodes(self.labels.best())
self.node_ids.len() < max_nodes(self.labels.best())
}
/// Assign a node to this chain. If the function returns false, it
/// means that the node could not be added as we're at quota.
pub fn add_node(&mut self, node_id: NodeId, node_details: NodeDetails) -> AddNodeResult {
pub fn add_node(&mut self, node_id: NodeId, chain_label: &Box<str>) -> AddNodeResult {
if !self.can_add_node() {
return AddNodeResult::Overquota
}
let label_result = self.labels.insert(&node_details.chain);
let new_node = Node::new(node_details);
self.nodes.insert(node_id, new_node);
let label_result = self.labels.insert(chain_label);
self.node_ids.insert(node_id);
AddNodeResult::Added {
chain_renamed: label_result.has_changed()
}
}
pub fn get_node(&self, node_id: NodeId) -> Option<&Node> {
self.nodes.get(&node_id)
}
pub fn get_node_mut(&mut self, node_id: NodeId) -> Option<&mut Node> {
self.nodes.get_mut(&node_id)
/// Remove a node from this chain. We expect the label it used for the chain so
/// that we can keep track of which label is most popular.
pub fn remove_node(&mut self, node_id: NodeId, chain_label: &Box<str>) -> RemoveNodeResult {
let label_result = self.labels.remove(&chain_label);
self.node_ids.remove(&node_id);
RemoveNodeResult {
chain_renamed: label_result.has_changed()
}
}
pub fn label(&self) -> &str {
&self.labels.best()
}
pub fn node_count(&self) -> usize {
self.nodes.len()
pub fn node_ids(&self) -> impl Iterator<Item=NodeId> + '_ {
self.node_ids.iter().copied()
}
pub fn nodes(&self) -> impl Iterator<Item=(NodeId, &Node)> + '_ {
self.nodes.iter().map(|(id, node)| (*id, node))
pub fn node_count(&self) -> usize {
self.node_ids.len()
}
pub fn best_block(&self) -> &Block {
&self.best
@@ -114,6 +128,9 @@ impl Chain {
pub fn finalized_block(&self) -> &Block {
&self.finalized
}
pub fn genesis_hash(&self) -> &BlockHash {
&self.genesis_hash
}
}
/// First party networks (Polkadot, Kusama etc) are allowed any number of nodes.
+154 -41
View File
@@ -11,19 +11,24 @@ use crate::find_location;
use super::chain::{ self, Chain };
pub type NodeId = usize;
pub type Label = Arc<str>;
pub type ChainId = usize;
/// Our state constains node and chain information
pub struct State {
next_id: NodeId,
chains: HashMap<BlockHash, Chain>,
chains_by_label: HashMap<String, BlockHash>,
chains_by_node: HashMap<NodeId, BlockHash>,
/// Denylist for networks we do not want to allow connecting.
// Store nodes and chains in a fairly compact format.
nodes: DenseMap<Node>,
chains: DenseMap<Chain>,
// Find the right chain given various details.
chains_by_genesis_hash: HashMap<BlockHash, ChainId>,
chains_by_label: HashMap<Box<str>, ChainId>,
chains_by_node: HashMap<NodeId, ChainId>,
/// Chain labels that we do not want to allow connecting.
denylist: HashSet<String>,
}
/// Adding a node to a chain leads to this result:
/// Adding a node to a chain leads to this result
pub enum AddNodeResult<'a> {
/// The chain is on the "deny list", so we can't add the node
ChainOnDenyList,
@@ -36,42 +41,64 @@ pub enum AddNodeResult<'a> {
pub struct NodeAddedToChain<'a> {
/// The ID assigned to this node.
pub id: NodeId,
/// The chain the node was added to.
pub chain: &'a Chain,
/// The old label of the chain.
pub old_chain_label: Box<str>,
/// The new label of the chain.
pub new_chain_label: &'a str,
/// The node that was added.
pub node: &'a Node,
/// Is this chain newly added?
pub chain_just_added: bool,
/// Number of nodes in the chain. If 1, the chain was just added.
pub chain_node_count: usize,
/// Has the chain label been updated?
pub has_chain_label_changed: bool
}
pub struct RemoveNodeResult {
/// How many nodes remain on the chain (0 if the chain was removed):
chain_node_count: usize
// if removing a node is successful, we get this information back.
pub struct RemovedNode {
/// How many nodes remain on the chain (0 if the chain was removed)
pub chain_node_count: usize,
/// Has the chain label been updated?
pub has_chain_label_changed: bool,
/// The old label of the chain.
pub old_chain_label: Box<str>,
/// The new label of the chain.
pub new_chain_label: Box<str>,
}
/// If removing a node goes wrong, we get this back
#[derive(Debug, thiserror::Error)]
pub enum RemoveNodeError {
/// The node that you tried to remove wasn't found
#[error("Node not found")]
NodeNotFound,
/// The chain associated to the node wasn't found
#[error("Node chain not found")]
NodeChainNotFound
}
impl State {
pub fn new<T: IntoIterator<Item=String>>(denylist: T) -> State {
State {
next_id: 0,
chains: HashMap::new(),
nodes: DenseMap::new(),
chains: DenseMap::new(),
chains_by_genesis_hash: HashMap::new(),
chains_by_label: HashMap::new(),
chains_by_node: HashMap::new(),
denylist: denylist.into_iter().collect()
denylist: denylist.into_iter().collect(),
}
}
pub fn iter_chains(&self) -> impl Iterator<Item=&Chain> {
pub fn iter_chains(&self) -> impl Iterator<Item=StateChain<'_>> {
self.chains
.iter()
.map(|(_,chain)| chain)
.map(move |(_,chain)| StateChain { state: self, chain })
}
pub fn get_chain_by_label(&self, label: &str) -> Option<&Chain> {
pub fn get_chain_by_label(&self, label: &str) -> Option<StateChain<'_>> {
self.chains_by_label
.get(label)
.and_then(|chain_id| self.chains.get(chain_id))
.and_then(|&chain_id| self.chains.get(chain_id))
.map(|chain| StateChain { state: self, chain })
}
pub fn add_node(&mut self, genesis_hash: BlockHash, node_details: NodeDetails) -> AddNodeResult<'_> {
@@ -79,42 +106,94 @@ impl State {
return AddNodeResult::ChainOnDenyList;
}
let chain = self.chains
.entry(genesis_hash)
.or_insert_with(|| Chain::new(node_details.chain.clone()));
// Get the chain ID, creating a new empty chain if one doesn't exist.
let chain_id = match self.chains_by_genesis_hash.get(&genesis_hash) {
Some(id) => *id,
None => {
let chain_id = self.chains.add(Chain::new(genesis_hash));
self.chains_by_genesis_hash.insert(genesis_hash, chain_id);
chain_id
}
};
if !chain.can_add_node() {
return AddNodeResult::ChainOverQuota;
}
// Get the chain.
let chain = self.chains.get_mut(chain_id)
.expect("should be known to exist after the above (unless chains_by_genesis_hash out of sync)");
let node_id = self.next_id;
self.next_id += 1;
// What ID will the node have when it's added? We don't actually want
// to add it until we know whether the chain will accept it, but we want
// an ID to give to the chain.
let node_id = self.nodes.next_id();
let chain_label = node_details.chain.clone();
match chain.add_node(node_id, node_details) {
match chain.add_node(node_id, &chain_label) {
chain::AddNodeResult::Overquota => {
AddNodeResult::ChainOverQuota
},
chain::AddNodeResult::Added { chain_renamed } => {
let chain = &*chain;
// Actually add the node if the chain accepts it:
self.nodes.add(Node::new(node_details));
// Update the label we use to reference the chain if
// it changes (it'll always change first time a node's added):
if chain_renamed {
let label = chain.label().to_owned();
self.chains_by_label.remove(&label);
self.chains_by_label.insert(label, genesis_hash);
self.chains_by_label.remove(&chain_label);
self.chains_by_label.insert(chain.label().to_string().into_boxed_str(), chain_id);
}
let node = chain.get_node(node_id).unwrap();
let node = self.nodes.get(node_id).expect("node added above");
AddNodeResult::NodeAddedToChain(NodeAddedToChain {
id: node_id,
chain: chain,
node: node,
chain_just_added: chain.node_count() == 1,
old_chain_label: chain_label,
new_chain_label: chain.label(),
chain_node_count: chain.node_count(),
has_chain_label_changed: chain_renamed
})
}
}
}
/// Remove a node
pub fn remove_node(&mut self, node_id: NodeId) -> Result<RemovedNode,RemoveNodeError> {
self.nodes.remove(node_id)
.ok_or(RemoveNodeError::NodeNotFound)?;
let chain_id = self.chains_by_node.remove(&node_id)
.ok_or(RemoveNodeError::NodeChainNotFound)?;
let chain = self.chains.get_mut(chain_id)
.ok_or(RemoveNodeError::NodeChainNotFound)?;
let old_chain_label = chain.label().to_string().into_boxed_str();
let remove_result = chain.remove_node(node_id, &old_chain_label);
let new_chain_label = chain.label().to_string().into_boxed_str();
let chain_node_count = chain.node_count();
let genesis_hash = *chain.genesis_hash();
// Is the chain empty? Remove if so and clean up indexes to it
if chain_node_count == 0 {
self.chains_by_label.remove(&old_chain_label);
self.chains_by_genesis_hash.remove(&genesis_hash);
self.chains.remove(chain_id);
}
// Make sure chains always referenced by their most common label:
if remove_result.chain_renamed {
self.chains_by_label.remove(&old_chain_label);
self.chains_by_label.insert(new_chain_label.clone(), chain_id);
}
Ok(RemovedNode {
old_chain_label,
new_chain_label,
chain_node_count: chain_node_count,
has_chain_label_changed: remove_result.chain_renamed
})
}
/// Update the location for a node. Return `false` if the node was not found.
pub fn update_node_location(&mut self, node_id: NodeId, location: find_location::Location) -> bool {
if let Some(node) = self.get_node_mut(node_id) {
@@ -126,17 +205,16 @@ impl State {
}
/// Get the chain that a node belongs to.
pub fn get_node_chain(&self, node_id: NodeId) -> Option<&Chain> {
pub fn get_node_chain(&self, node_id: NodeId) -> Option<StateChain<'_>> {
self.chains_by_node
.get(&node_id)
.and_then(|chain_id| self.chains.get(chain_id))
.and_then(|&chain_id| self.chains.get(chain_id))
.map(|chain| StateChain { state: self, chain })
}
/// Obtain mutable access to a node, if it's found.
fn get_node_mut(&mut self, node_id: NodeId) -> Option<&mut Node> {
let chain_id = *self.chains_by_node.get(&node_id)?;
let chain = self.chains.get_mut(&chain_id)?;
chain.get_node_mut(node_id)
self.nodes.get_mut(node_id)
}
// /// Add a new node to our state.
@@ -166,3 +244,38 @@ impl State {
// }
}
/// When we ask for a chain, we get this struct back. This ensures that we have
/// a consistent public interface, and don't expose methods on [`Chain`] that
/// aren't really intended for use outside of [`State`] methods.
pub struct StateChain<'a> {
state: &'a State,
chain: &'a Chain
}
impl <'a> StateChain<'a> {
pub fn label(&self) -> &'a str {
self.chain.label()
}
pub fn node_count(&self) -> usize {
self.chain.node_count()
}
pub fn best_block(&self) -> &'a Block {
self.chain.best_block()
}
pub fn timestamp(&self) -> Timestamp {
self.chain.timestamp()
}
pub fn average_block_time(&self) -> Option<u64> {
self.chain.average_block_time()
}
pub fn finalized_block(&self) -> &'a Block {
self.chain.finalized_block()
}
pub fn iter_nodes(&self) -> impl Iterator<Item=(NodeId, &'a Node)> + 'a {
let state = self.state;
self.chain.node_ids().filter_map(move |id| {
Some((id, state.nodes.get(id)?))
})
}
}