Vote out offline authorities (#524)

* notify when an authority appears to have missed their block

* Runtime API

* offline tracker

* Move to consensus

* generating reports of offline indices

* stubbed-out evaluation logic

* Slashing data pathwat

* usize -> u32

* Slash bad validators.

* update to rhododendron 0.3

* fix compilation of polkadot-consensus

* Support offline noting in checked_block

* include offline reports in block authorship voting

* do not vote validators offline after some time

* add test for offline-tracker

* fix test build

* bump spec version

* update wasm

* Only allow validators that are possible to slash

* Fix grumble

* More idiomatic

* New Wasm.

* update rhododendron

* improve logging and reduce round time exponent

* format offline validators in ss58
This commit is contained in:
Robert Habermeier
2018-08-11 11:29:30 +02:00
committed by Gav Wood
parent c2b20fe5b0
commit e8f21cf0c9
33 changed files with 500 additions and 136 deletions
@@ -74,6 +74,9 @@ impl DynamicInclusion {
Some(now + until)
}
}
/// Get the start instant.
pub fn started_at(&self) -> Instant { self.start }
}
#[cfg(test)]
+98 -11
View File
@@ -69,7 +69,7 @@ use std::time::{Duration, Instant};
use codec::{Decode, Encode};
use extrinsic_store::Store as ExtrinsicStore;
use polkadot_api::PolkadotApi;
use polkadot_primitives::{Hash, Block, BlockId, BlockNumber, Header, Timestamp, SessionKey};
use polkadot_primitives::{AccountId, Hash, Block, BlockId, BlockNumber, Header, Timestamp, SessionKey};
use polkadot_primitives::parachain::{Id as ParaId, Chain, DutyRoster, BlockData, Extrinsic as ParachainExtrinsic, CandidateReceipt, CandidateSignature};
use primitives::AuthorityId;
use transaction_pool::TransactionPool;
@@ -80,20 +80,26 @@ use futures::prelude::*;
use futures::future;
use collation::CollationFetch;
use dynamic_inclusion::DynamicInclusion;
use parking_lot::RwLock;
pub use self::collation::{validate_collation, Collators};
pub use self::error::{ErrorKind, Error};
pub use self::offline_tracker::OfflineTracker;
pub use self::shared_table::{SharedTable, StatementProducer, ProducedStatements, Statement, SignedStatement, GenericStatement};
pub use service::Service;
mod dynamic_inclusion;
mod evaluation;
mod error;
mod offline_tracker;
mod service;
mod shared_table;
pub mod collation;
/// Shared offline validator tracker.
pub type SharedOfflineTracker = Arc<RwLock<OfflineTracker>>;
// block size limit.
const MAX_TRANSACTIONS_SIZE: usize = 4 * 1024 * 1024;
@@ -240,6 +246,8 @@ pub struct ProposerFactory<C, N, P> {
pub parachain_empty_duration: Duration,
/// Store for extrinsic data.
pub extrinsic_store: ExtrinsicStore,
/// Offline-tracker.
pub offline: SharedOfflineTracker,
}
impl<C, N, P> bft::Environment<Block> for ProposerFactory<C, N, P>
@@ -255,10 +263,11 @@ impl<C, N, P> bft::Environment<Block> for ProposerFactory<C, N, P>
type Output = N::Output;
type Error = Error;
fn init(&self,
fn init(
&self,
parent_header: &Header,
authorities: &[AuthorityId],
sign_with: Arc<ed25519::Pair>
sign_with: Arc<ed25519::Pair>,
) -> Result<(Self::Proposer, Self::Input, Self::Output), Error> {
use runtime_primitives::traits::{Hash as HashT, BlakeTwo256};
@@ -269,6 +278,9 @@ impl<C, N, P> bft::Environment<Block> for ProposerFactory<C, N, P>
let random_seed = self.client.random_seed(&id)?;
let random_seed = BlakeTwo256::hash(&*random_seed);
let validators = self.client.validators(&id)?;
self.offline.write().note_new_block(&validators[..]);
let (group_info, local_duty) = make_group_info(
duty_roster,
authorities,
@@ -326,6 +338,8 @@ impl<C, N, P> bft::Environment<Block> for ProposerFactory<C, N, P>
random_seed,
table,
transaction_pool: self.transaction_pool.clone(),
offline: self.offline.clone(),
validators,
_drop_signal: drop_signal,
};
@@ -403,9 +417,22 @@ pub struct Proposer<C: PolkadotApi> {
random_seed: Hash,
table: Arc<SharedTable>,
transaction_pool: Arc<TransactionPool<C>>,
offline: SharedOfflineTracker,
validators: Vec<AccountId>,
_drop_signal: exit_future::Signal,
}
impl<C: PolkadotApi + Send + Sync> Proposer<C> {
fn primary_index(&self, round_number: usize, len: usize) -> usize {
use primitives::uint::U256;
let big_len = U256::from(len);
let offset = U256::from_big_endian(&self.random_seed.0) % big_len;
let offset = offset.low_u64() as usize + round_number;
offset % len
}
}
impl<C> bft::Proposer<Block> for Proposer<C>
where
C: PolkadotApi + Send + Sync,
@@ -441,6 +468,8 @@ impl<C> bft::Proposer<Block> for Proposer<C>
client: self.client.clone(),
transaction_pool: self.transaction_pool.clone(),
table: self.table.clone(),
offline: self.offline.clone(),
validators: self.validators.clone(),
timing,
})
}
@@ -515,6 +544,13 @@ impl<C> bft::Proposer<Block> for Proposer<C>
includability_tracker.join(temporary_delay)
};
// refuse to vote if this block says a validator is offline that we
// think isn't.
let offline = proposal.noted_offline();
if !self.offline.read().check_consistency(&self.validators[..], offline) {
return Box::new(futures::empty());
}
// evaluate whether the block is actually valid.
// TODO: is it better to delay this until the delays are finished?
let evaluated = self.client
@@ -536,13 +572,8 @@ impl<C> bft::Proposer<Block> for Proposer<C>
}
fn round_proposer(&self, round_number: usize, authorities: &[AuthorityId]) -> AuthorityId {
use primitives::uint::U256;
let len: U256 = authorities.len().into();
let offset = U256::from_big_endian(&self.random_seed.0) % len;
let offset = offset.low_u64() as usize + round_number;
let proposer = authorities[offset % authorities.len()].clone();
let offset = self.primary_index(round_number, authorities.len());
let proposer = authorities[offset].clone();
trace!(target: "bft", "proposer for round {} is {}", round_number, proposer);
proposer
@@ -611,6 +642,36 @@ impl<C> bft::Proposer<Block> for Proposer<C>
.expect("locally signed extrinsic is valid; qed");
}
}
fn on_round_end(&self, round_number: usize, was_proposed: bool) {
let primary_validator = self.validators[
self.primary_index(round_number, self.validators.len())
];
// alter the message based on whether we think the empty proposer was forced to skip the round.
// this is determined by checking if our local validator would have been forced to skip the round.
let consider_online = was_proposed || {
let forced_delay = self.dynamic_inclusion.acceptable_in(Instant::now(), self.table.includable_count());
let public = ::ed25519::Public::from_raw(primary_validator.0);
match forced_delay {
None => info!(
"Potential Offline Validator: {} failed to propose during assigned slot: {}",
public,
round_number,
),
Some(_) => info!(
"Potential Offline Validator {} potentially forced to skip assigned slot: {}",
public,
round_number,
),
}
forced_delay.is_some()
};
self.offline.write().note_round_end(primary_validator, consider_online);
}
}
fn current_timestamp() -> Timestamp {
@@ -667,16 +728,42 @@ pub struct CreateProposal<C: PolkadotApi> {
transaction_pool: Arc<TransactionPool<C>>,
table: Arc<SharedTable>,
timing: ProposalTiming,
validators: Vec<AccountId>,
offline: SharedOfflineTracker,
}
impl<C> CreateProposal<C> where C: PolkadotApi {
fn propose_with(&self, candidates: Vec<CandidateReceipt>) -> Result<Block, Error> {
use polkadot_api::BlockBuilder;
use runtime_primitives::traits::{Hash as HashT, BlakeTwo256};
use polkadot_primitives::InherentData;
const MAX_VOTE_OFFLINE_SECONDS: Duration = Duration::from_secs(60);
// TODO: handle case when current timestamp behind that in state.
let timestamp = current_timestamp();
let mut block_builder = self.client.build_block(&self.parent_id, timestamp, candidates)?;
let elapsed_since_start = self.timing.dynamic_inclusion.started_at().elapsed();
let offline_indices = if elapsed_since_start > MAX_VOTE_OFFLINE_SECONDS {
Vec::new()
} else {
self.offline.read().reports(&self.validators[..])
};
if !offline_indices.is_empty() {
info!(
"Submitting offline validators {:?} for slash-vote",
offline_indices.iter().map(|&i| self.validators[i as usize]).collect::<Vec<_>>(),
)
}
let inherent_data = InherentData {
timestamp,
parachain_heads: candidates,
offline_indices,
};
let mut block_builder = self.client.build_block(&self.parent_id, inherent_data)?;
{
let mut unqueue_invalid = Vec::new();
@@ -0,0 +1,137 @@
// Copyright 2018 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Tracks offline validators.
use polkadot_primitives::AccountId;
use std::collections::HashMap;
use std::time::{Instant, Duration};
// time before we report a validator.
const REPORT_TIME: Duration = Duration::from_secs(60 * 5);
struct Observed {
last_round_end: Instant,
offline_since: Instant,
}
impl Observed {
fn new() -> Observed {
let now = Instant::now();
Observed {
last_round_end: now,
offline_since: now,
}
}
fn note_round_end(&mut self, was_online: bool) {
let now = Instant::now();
self.last_round_end = now;
if was_online {
self.offline_since = now;
}
}
fn is_active(&self) -> bool {
// can happen if clocks are not monotonic
if self.offline_since > self.last_round_end { return true }
self.last_round_end.duration_since(self.offline_since) < REPORT_TIME
}
}
/// Tracks offline validators and can issue a report for those offline.
pub struct OfflineTracker {
observed: HashMap<AccountId, Observed>,
}
impl OfflineTracker {
/// Create a new tracker.
pub fn new() -> Self {
OfflineTracker { observed: HashMap::new() }
}
/// Note new consensus is starting with the given set of validators.
pub fn note_new_block(&mut self, validators: &[AccountId]) {
use std::collections::HashSet;
let set: HashSet<_> = validators.iter().cloned().collect();
self.observed.retain(|k, _| set.contains(k));
}
/// Note that a round has ended.
pub fn note_round_end(&mut self, validator: AccountId, was_online: bool) {
self.observed.entry(validator)
.or_insert_with(Observed::new)
.note_round_end(was_online);
}
/// Generate a vector of indices for offline account IDs.
pub fn reports(&self, validators: &[AccountId]) -> Vec<u32> {
validators.iter()
.enumerate()
.filter_map(|(i, v)| if self.is_online(v) {
None
} else {
Some(i as u32)
})
.collect()
}
/// Whether reports on a validator set are consistent with our view of things.
pub fn check_consistency(&self, validators: &[AccountId], reports: &[u32]) -> bool {
reports.iter().cloned().all(|r| {
let v = match validators.get(r as usize) {
Some(v) => v,
None => return false,
};
// we must think all validators reported externally are offline.
let thinks_online = self.is_online(v);
!thinks_online
})
}
fn is_online(&self, v: &AccountId) -> bool {
self.observed.get(v).map(Observed::is_active).unwrap_or(true)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn validator_offline() {
let mut tracker = OfflineTracker::new();
let v = [0; 32].into();
let v2 = [1; 32].into();
let v3 = [2; 32].into();
tracker.note_round_end(v, true);
tracker.note_round_end(v2, true);
tracker.note_round_end(v3, true);
let slash_time = REPORT_TIME + Duration::from_secs(5);
tracker.observed.get_mut(&v).unwrap().offline_since -= slash_time;
tracker.observed.get_mut(&v2).unwrap().offline_since -= slash_time;
assert_eq!(tracker.reports(&[v, v2, v3]), vec![0, 1]);
tracker.note_new_block(&[v, v3]);
assert_eq!(tracker.reports(&[v, v2, v3]), vec![0]);
}
}
@@ -172,6 +172,9 @@ impl Service {
N::TableRouter: Send + 'static,
<N::Collation as IntoFuture>::Future: Send + 'static,
{
use parking_lot::RwLock;
use super::OfflineTracker;
let (signal, exit) = ::exit_future::signal();
let thread = thread::spawn(move || {
let mut runtime = LocalRuntime::new().expect("Could not create local runtime");
@@ -185,6 +188,7 @@ impl Service {
parachain_empty_duration,
handle: thread_pool.clone(),
extrinsic_store: extrinsic_store.clone(),
offline: Arc::new(RwLock::new(OfflineTracker::new())),
};
let bft_service = Arc::new(BftService::new(client.clone(), key, factory));