approval-distribution: Update topology if authorities are discovered later (#2981)

Fixes: https://github.com/paritytech/polkadot-sdk/issues/2138.

Especially on restart AuthorithyDiscovery cache is not populated so we
create an invalid topology and messages won't be routed correctly for
the entire session. This PR proposes to try to fix this by updating the
topology as soon as we now the Authority/PeerId mapping, that should
impact the situation dramatically.


[This issue was hit
yesterday](https://grafana.teleport.parity.io/goto/o9q2625Sg?orgId=1),
on Westend and resulted in stalling the finality.


# TODO

- [x] Unit tests
- [x] Test impact on versi

---------

Signed-off-by: Alexandru Gheorghe <alexandru.gheorghe@parity.io>
This commit is contained in:
Alexandru Gheorghe
2024-01-25 12:58:37 +02:00
committed by GitHub
parent b57e53dc13
commit a6952c7469
6 changed files with 473 additions and 42 deletions
@@ -89,6 +89,26 @@ impl SessionGridTopology {
SessionGridTopology { shuffled_indices, canonical_shuffling, peer_ids }
}
/// Updates the known peer ids for the passed authorithies ids.
pub fn update_authority_ids(
&mut self,
peer_id: PeerId,
ids: &HashSet<AuthorityDiscoveryId>,
) -> bool {
let mut updated = false;
if !self.peer_ids.contains(&peer_id) {
for peer in self
.canonical_shuffling
.iter_mut()
.filter(|peer| ids.contains(&peer.discovery_id))
{
peer.peer_ids.push(peer_id);
self.peer_ids.insert(peer_id);
updated = true;
}
}
updated
}
/// Produces the outgoing routing logic for a particular peer.
///
/// Returns `None` if the validator index is out of bounds.
@@ -269,6 +289,7 @@ impl GridNeighbors {
pub struct SessionGridTopologyEntry {
topology: SessionGridTopology,
local_neighbors: GridNeighbors,
local_index: Option<ValidatorIndex>,
}
impl SessionGridTopologyEntry {
@@ -291,6 +312,25 @@ impl SessionGridTopologyEntry {
pub fn is_validator(&self, peer: &PeerId) -> bool {
self.topology.is_validator(peer)
}
/// Updates the known peer ids for the passed authorithies ids.
pub fn update_authority_ids(
&mut self,
peer_id: PeerId,
ids: &HashSet<AuthorityDiscoveryId>,
) -> bool {
let peer_id_updated = self.topology.update_authority_ids(peer_id, ids);
// If we added a new peer id we need to recompute the grid neighbors, so that
// neighbors_x and neighbors_y reflect the right peer ids.
if peer_id_updated {
if let Some(local_index) = self.local_index.as_ref() {
if let Some(new_grid) = self.topology.compute_grid_neighbors_for(*local_index) {
self.local_neighbors = new_grid;
}
}
}
peer_id_updated
}
}
/// A set of topologies indexed by session
@@ -305,6 +345,20 @@ impl SessionGridTopologies {
self.inner.get(&session).and_then(|val| val.0.as_ref())
}
/// Updates the known peer ids for the passed authorithies ids.
pub fn update_authority_ids(
&mut self,
peer_id: PeerId,
ids: &HashSet<AuthorityDiscoveryId>,
) -> bool {
self.inner
.iter_mut()
.map(|(_, topology)| {
topology.0.as_mut().map(|topology| topology.update_authority_ids(peer_id, ids))
})
.any(|updated| updated.unwrap_or_default())
}
/// Increase references counter for a specific topology
pub fn inc_session_refs(&mut self, session: SessionIndex) {
self.inner.entry(session).or_insert((None, 0)).1 += 1;
@@ -333,7 +387,7 @@ impl SessionGridTopologies {
.and_then(|l| topology.compute_grid_neighbors_for(l))
.unwrap_or_else(GridNeighbors::empty);
entry.0 = Some(SessionGridTopologyEntry { topology, local_neighbors });
entry.0 = Some(SessionGridTopologyEntry { topology, local_neighbors, local_index });
}
}
}
@@ -368,6 +422,7 @@ impl Default for SessionBoundGridTopologyStorage {
peer_ids: Default::default(),
},
local_neighbors: GridNeighbors::empty(),
local_index: None,
},
},
prev_topology: None,
@@ -412,7 +467,7 @@ impl SessionBoundGridTopologyStorage {
let old_current = std::mem::replace(
&mut self.current_topology,
GridTopologySessionBound {
entry: SessionGridTopologyEntry { topology, local_neighbors },
entry: SessionGridTopologyEntry { topology, local_neighbors, local_index },
session_index,
},
);