From 781f908760a3314dfec591e83f79defada0a5a7f Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Thu, 11 Mar 2021 10:06:53 +0100 Subject: [PATCH] Implement PJR checker (#8160) * Apply. * get rid of glob import * use meaningful generic type name * pjr_check operates on `Supports` struct used elsewhere * improve algorithmic complexity of `prepare_pjr_input` * fix rustdoc warnings * improve module docs * typo * simplify debug assertion * add test finding the phase-change threshold value for a constructed scenario * add more threshold scenarios to disambiguate plausible interpretations * add link to npos paper reference * docs: staked_assignment -> supports Co-authored-by: Kian Paimani <5588131+kianenigma@users.noreply.github.com> * add utility method for generating npos inputs * add a fuzzer which asserts that all unbalanced seq_phragmen are PJR Note that this currently fails. I hope that this can be rectified by calculating the threshold instead of choosing some arbitrary number. * assert in all cases, not just debug * leverage a native solution to choose candidates * use existing helper methods * add pjr-check and incorporate into the fuzzer We should probably have one of the W3F people look at this to ensure we're not misconstruing any definitions, but this seems like a fairly straightforward implementation. * fix compilation errors * Enable manually setting iteration parameters in single run. This gives us the ability to reproducably extract cases where honggfuzz has discovered a panic. For example: $ cargo run --release --bin phragmen_pjr -- --candidates 569 --voters 100 Tue 23 Feb 2021 11:23:39 AM CET Compiling bitflags v1.2.1 Compiling unicode-width v0.1.8 Compiling unicode-segmentation v1.7.1 Compiling ansi_term v0.11.0 Compiling strsim v0.8.0 Compiling vec_map v0.8.2 Compiling proc-macro-error-attr v1.0.4 Compiling proc-macro-error v1.0.4 Compiling textwrap v0.11.0 Compiling atty v0.2.14 Compiling heck v0.3.2 Compiling clap v2.33.3 Compiling structopt-derive v0.4.14 Compiling structopt v0.3.21 Compiling sp-npos-elections-fuzzer v2.0.0-alpha.5 (/home/coriolinus/Documents/Projects/paritytech/substrate/primitives/npos-elections/fuzzer) Finished release [optimized] target(s) in 6.15s Running `/home/coriolinus/Documents/Projects/paritytech/substrate/target/release/phragmen_pjr -c 569 -v 100` thread 'main' panicked at 'unbalanced sequential phragmen must satisfy PJR', primitives/npos-elections/fuzzer/src/phragmen_pjr.rs:133:5 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace This is still not adequate proof that seq_phragmen is broken; it could very well be that our PJR checker is doing the wrong thing, or we've somehow missed a parameter of interest. Still, it's concerning. * update comment verbiage for accuracy * it is valid in PJR for an elected candidate to have 0 support * Fix phragmen_pjr fuzzer It turns out that the fundamental problem causing previous implementations of the fuzzer to fail wasn't in `seq_phragmen` _or_ in `pjr_check`: it was in the rounding errors introduced in the various conversions between the internal data representation and the external one. Fixing the fuzzer is then simply an issue of using the internal representation and staying in that representation. However, that leaves the issue that `seq_phragmen` occasionally produces an output which is technically not PJR due to rounding errors. In the future we will need to add some kind of "close-enough" threshold. However, that is explicitly out of scope of this PR. * restart ci; it appears to be stalled * use necessary import for no-std * use a more realistic distribution of voters and candidates This isn't ideal; more realistic numbers would be about twice these. However, either case generation or voting has nonlinear execution time, and doubling these values brings iteration time from ~20s to ~180s. Fuzzing 6x as fast should make up for fuzzing cases half the size. * identify specifically which PJR check may fail * move candidate collection comment into correct place * standard_threshold: use a calculation method which cannot overflow * Apply suggestions from code review (update comments) Co-authored-by: Kian Paimani <5588131+kianenigma@users.noreply.github.com> * clarify the effectiveness bounds for t-pjr check * how to spell "committee" * reorganize: high -> low abstraction * ensure standard threshold calc cannot panic Co-authored-by: Kian Paimani <5588131+kianenigma@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Shawn Tabrizi Co-authored-by: kianenigma Co-authored-by: Kian Paimani <5588131+kianenigma@users.noreply.github.com> Co-authored-by: Shawn Tabrizi --- substrate/Cargo.lock | 2 + substrate/frame/staking/src/lib.rs | 2 +- .../npos-elections/fuzzer/Cargo.toml | 14 +- .../npos-elections/fuzzer/src/common.rs | 123 ++++- .../npos-elections/fuzzer/src/phragmen_pjr.rs | 118 ++++ .../primitives/npos-elections/src/lib.rs | 53 +- .../primitives/npos-elections/src/phragmen.rs | 4 + .../primitives/npos-elections/src/pjr.rs | 519 ++++++++++++++++++ 8 files changed, 791 insertions(+), 44 deletions(-) create mode 100644 substrate/primitives/npos-elections/fuzzer/src/phragmen_pjr.rs create mode 100644 substrate/primitives/npos-elections/src/pjr.rs diff --git a/substrate/Cargo.lock b/substrate/Cargo.lock index 7194b5820f..911d1541f6 100644 --- a/substrate/Cargo.lock +++ b/substrate/Cargo.lock @@ -8803,9 +8803,11 @@ dependencies = [ "honggfuzz", "parity-scale-codec", "rand 0.7.3", + "sp-arithmetic", "sp-npos-elections", "sp-runtime", "sp-std", + "structopt", ] [[package]] diff --git a/substrate/frame/staking/src/lib.rs b/substrate/frame/staking/src/lib.rs index ed8a2efbd4..7f51d246c6 100644 --- a/substrate/frame/staking/src/lib.rs +++ b/substrate/frame/staking/src/lib.rs @@ -2740,8 +2740,8 @@ impl Module { // write new results. >::put(ElectionResult { elected_stashes: winners, - compute, exposures, + compute, }); QueuedScore::put(submitted_score); diff --git a/substrate/primitives/npos-elections/fuzzer/Cargo.toml b/substrate/primitives/npos-elections/fuzzer/Cargo.toml index bac8a165f3..3154a7861d 100644 --- a/substrate/primitives/npos-elections/fuzzer/Cargo.toml +++ b/substrate/primitives/npos-elections/fuzzer/Cargo.toml @@ -14,12 +14,14 @@ publish = false targets = ["x86_64-unknown-linux-gnu"] [dependencies] -sp-npos-elections = { version = "3.0.0", path = ".." } -sp-std = { version = "3.0.0", path = "../../std" } -sp-runtime = { version = "3.0.0", path = "../../runtime" } +codec = { package = "parity-scale-codec", version = "2.0.0", default-features = false, features = ["derive"] } honggfuzz = "0.5" rand = { version = "0.7.3", features = ["std", "small_rng"] } -codec = { package = "parity-scale-codec", version = "2.0.0", default-features = false, features = ["derive"] } +sp-arithmetic = { version = "3.0.0", path = "../../arithmetic" } +sp-npos-elections = { version = "3.0.0", path = ".." } +sp-runtime = { version = "3.0.0", path = "../../runtime" } +sp-std = { version = "3.0.0", path = "../../std" } +structopt = "0.3.21" [[bin]] name = "reduce" @@ -36,3 +38,7 @@ path = "src/phragmms_balancing.rs" [[bin]] name = "compact" path = "src/compact.rs" + +[[bin]] +name = "phragmen_pjr" +path = "src/phragmen_pjr.rs" diff --git a/substrate/primitives/npos-elections/fuzzer/src/common.rs b/substrate/primitives/npos-elections/fuzzer/src/common.rs index 29f0247f84..fe237c930d 100644 --- a/substrate/primitives/npos-elections/fuzzer/src/common.rs +++ b/substrate/primitives/npos-elections/fuzzer/src/common.rs @@ -20,10 +20,10 @@ // Each function will be used based on which fuzzer binary is being used. #![allow(dead_code)] -use sp_npos_elections::{ElectionResult, VoteWeight, phragmms, seq_phragmen}; -use sp_std::collections::btree_map::BTreeMap; +use rand::{self, seq::SliceRandom, Rng, RngCore}; +use sp_npos_elections::{phragmms, seq_phragmen, ElectionResult, VoteWeight}; use sp_runtime::Perbill; -use rand::{self, Rng, RngCore}; +use std::collections::{BTreeMap, HashSet}; /// converts x into the range [a, b] in a pseudo-fair way. pub fn to_range(x: usize, a: usize, b: usize) -> usize { @@ -39,11 +39,81 @@ pub fn to_range(x: usize, a: usize, b: usize) -> usize { pub enum ElectionType { Phragmen(Option<(usize, u128)>), - Phragmms(Option<(usize, u128)>) + Phragmms(Option<(usize, u128)>), } pub type AccountId = u64; +/// Generate a set of inputs suitable for fuzzing an election algorithm +/// +/// Given parameters governing how many candidates and voters should exist, generates a voting +/// scenario suitable for fuzz-testing an election algorithm. +/// +/// The returned candidate list is sorted. This sorting property should not affect the result of the +/// calculation. +/// +/// The returned voters list is sorted. This enables binary searching for a particular voter by +/// account id. This sorting property should not affect the results of the calculation. +/// +/// Each voter's selection of candidates to vote for is sorted. +/// +/// Note that this does not generate balancing parameters. +pub fn generate_random_npos_inputs( + candidate_count: usize, + voter_count: usize, + mut rng: impl Rng, +) -> ( + usize, + Vec, + Vec<(AccountId, VoteWeight, Vec)>, +) { + // cache for fast generation of unique candidate and voter ids + let mut used_ids = HashSet::with_capacity(candidate_count + voter_count); + + // always generate a sensible desired number of candidates: elections are uninteresting if we + // desire 0 candidates, or a number of candidates >= the actual number of candidates present + let rounds = rng.gen_range(1, candidate_count); + + // candidates are easy: just a completely random set of IDs + let mut candidates: Vec = Vec::with_capacity(candidate_count); + for _ in 0..candidate_count { + let mut id = rng.gen(); + // insert returns `false` when the value was already present + while !used_ids.insert(id) { + id = rng.gen(); + } + candidates.push(id); + } + candidates.sort_unstable(); + candidates.dedup(); + assert_eq!(candidates.len(), candidate_count); + + let mut voters = Vec::with_capacity(voter_count); + for _ in 0..voter_count { + let mut id = rng.gen(); + // insert returns `false` when the value was already present + while !used_ids.insert(id) { + id = rng.gen(); + } + + let vote_weight = rng.gen(); + + // it's not interesting if a voter chooses 0 or all candidates, so rule those cases out. + let n_candidates_chosen = rng.gen_range(1, candidates.len()); + + let mut chosen_candidates = Vec::with_capacity(n_candidates_chosen); + chosen_candidates.extend(candidates.choose_multiple(&mut rng, n_candidates_chosen)); + chosen_candidates.sort(); + voters.push((id, vote_weight, chosen_candidates)); + } + + voters.sort_unstable(); + voters.dedup_by_key(|(id, _weight, _chosen_candidates)| *id); + assert_eq!(voters.len(), voter_count); + + (rounds, candidates, voters) +} + pub fn generate_random_npos_result( voter_count: u64, target_count: u64, @@ -71,19 +141,20 @@ pub fn generate_random_npos_result( }); let mut voters = Vec::with_capacity(voter_count as usize); - (prefix ..= (prefix + voter_count)).for_each(|acc| { + (prefix..=(prefix + voter_count)).for_each(|acc| { let edge_per_this_voter = rng.gen_range(1, candidates.len()); // all possible targets let mut all_targets = candidates.clone(); // we remove and pop into `targets` `edge_per_this_voter` times. - let targets = (0..edge_per_this_voter).map(|_| { - let upper = all_targets.len() - 1; - let idx = rng.gen_range(0, upper); - all_targets.remove(idx) - }) - .collect::>(); + let targets = (0..edge_per_this_voter) + .map(|_| { + let upper = all_targets.len() - 1; + let idx = rng.gen_range(0, upper); + all_targets.remove(idx) + }) + .collect::>(); - let stake_var = rng.gen_range(ed, 100 * ed) ; + let stake_var = rng.gen_range(ed, 100 * ed); let stake = base_stake + stake_var; stake_of.insert(acc, stake); voters.push((acc, stake, targets)); @@ -91,20 +162,20 @@ pub fn generate_random_npos_result( ( match election_type { - ElectionType::Phragmen(conf) => - seq_phragmen::( - to_elect, - candidates.clone(), - voters.clone(), - conf, - ).unwrap(), - ElectionType::Phragmms(conf) => - phragmms::( - to_elect, - candidates.clone(), - voters.clone(), - conf, - ).unwrap(), + ElectionType::Phragmen(conf) => seq_phragmen::( + to_elect, + candidates.clone(), + voters.clone(), + conf, + ) + .unwrap(), + ElectionType::Phragmms(conf) => phragmms::( + to_elect, + candidates.clone(), + voters.clone(), + conf, + ) + .unwrap(), }, candidates, voters, diff --git a/substrate/primitives/npos-elections/fuzzer/src/phragmen_pjr.rs b/substrate/primitives/npos-elections/fuzzer/src/phragmen_pjr.rs new file mode 100644 index 0000000000..9727d1406a --- /dev/null +++ b/substrate/primitives/npos-elections/fuzzer/src/phragmen_pjr.rs @@ -0,0 +1,118 @@ +// This file is part of Substrate. + +// Copyright (C) 2020-2021 Parity Technologies (UK) Ltd. +// SPDX-License-Identifier: Apache-2.0 + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Fuzzing which ensures that running unbalanced sequential phragmen always produces a result +//! which satisfies our PJR checker. +//! +//! ## Running a single iteration +//! +//! Honggfuzz shuts down each individual loop iteration after a configurable time limit. +//! It can be helpful to run a single iteration on your hardware to help benchmark how long that time +//! limit should reasonably be. Simply run the program without the `fuzzing` configuration to run a +//! single iteration: `cargo run --bin phragmen_pjr`. +//! +//! ## Running +//! +//! Run with `HFUZZ_RUN_ARGS="-t 10" cargo hfuzz run phragmen_pjr`. +//! +//! Note the environment variable: by default, `cargo hfuzz` shuts down each iteration after 1 second +//! of runtime. We significantly increase that to ensure that the fuzzing gets a chance to complete. +//! Running a single iteration can help determine an appropriate value for this parameter. +//! +//! ## Debugging a panic +//! +//! Once a panic is found, it can be debugged with +//! `HFUZZ_RUN_ARGS="-t 10" cargo hfuzz run-debug phragmen_pjr hfuzz_workspace/phragmen_pjr/*.fuzz`. +//! + +#[cfg(fuzzing)] +use honggfuzz::fuzz; + +#[cfg(not(fuzzing))] +use structopt::StructOpt; + +mod common; +use common::{generate_random_npos_inputs, to_range}; +use rand::{self, SeedableRng}; +use sp_npos_elections::{pjr_check_core, seq_phragmen_core, setup_inputs, standard_threshold}; + +type AccountId = u64; + +const MIN_CANDIDATES: usize = 250; +const MAX_CANDIDATES: usize = 1000; +const MIN_VOTERS: usize = 500; +const MAX_VOTERS: usize = 2500; + +#[cfg(fuzzing)] +fn main() { + loop { + fuzz!(|data: (usize, usize, u64)| { + let (candidate_count, voter_count, seed) = data; + iteration(candidate_count, voter_count, seed); + }); + } +} + +#[cfg(not(fuzzing))] +#[derive(Debug, StructOpt)] +struct Opt { + /// How many candidates participate in this election + #[structopt(short, long)] + candidates: Option, + + /// How many voters participate in this election + #[structopt(short, long)] + voters: Option, + + /// Random seed to use in this election + #[structopt(long)] + seed: Option, +} + +#[cfg(not(fuzzing))] +fn main() { + let opt = Opt::from_args(); + // candidates and voters by default use the maxima, which turn out to be one less than + // the constant. + iteration( + opt.candidates.unwrap_or(MAX_CANDIDATES - 1), + opt.voters.unwrap_or(MAX_VOTERS - 1), + opt.seed.unwrap_or_default(), + ); +} + +fn iteration(mut candidate_count: usize, mut voter_count: usize, seed: u64) { + let rng = rand::rngs::SmallRng::seed_from_u64(seed); + candidate_count = to_range(candidate_count, MIN_CANDIDATES, MAX_CANDIDATES); + voter_count = to_range(voter_count, MIN_VOTERS, MAX_VOTERS); + + let (rounds, candidates, voters) = + generate_random_npos_inputs(candidate_count, voter_count, rng); + + let (candidates, voters) = setup_inputs(candidates, voters); + + // Run seq-phragmen + let (candidates, voters) = seq_phragmen_core::(rounds, candidates, voters) + .expect("seq_phragmen must succeed"); + + let threshold = standard_threshold(rounds, voters.iter().map(|voter| voter.budget())); + + assert!( + pjr_check_core(&candidates, &voters, threshold), + "unbalanced sequential phragmen must satisfy PJR", + ); +} diff --git a/substrate/primitives/npos-elections/src/lib.rs b/substrate/primitives/npos-elections/src/lib.rs index d45698e174..c87085ef9f 100644 --- a/substrate/primitives/npos-elections/src/lib.rs +++ b/substrate/primitives/npos-elections/src/lib.rs @@ -18,11 +18,12 @@ //! - [`seq_phragmen`]: Implements the Phragmén Sequential Method. An un-ranked, relatively fast //! election method that ensures PJR, but does not provide a constant factor approximation of the //! maximin problem. -//! - [`phragmms()`]: Implements a hybrid approach inspired by Phragmén which is executed faster but -//! it can achieve a constant factor approximation of the maximin problem, similar to that of the -//! MMS algorithm. -//! - [`balance`]: Implements the star balancing algorithm. This iterative process can push a -//! solution toward being more `balances`, which in turn can increase its score. +//! - [`phragmms`](phragmms::phragmms): Implements a hybrid approach inspired by Phragmén which is +//! executed faster but it can achieve a constant factor approximation of the maximin problem, +//! similar to that of the MMS algorithm. +//! - [`balance`](balancing::balance): Implements the star balancing algorithm. This iterative +//! process can push a solution toward being more "balanced", which in turn can increase its +//! score. //! //! ### Terminology //! @@ -98,18 +99,20 @@ mod mock; #[cfg(test)] mod tests; -mod phragmen; -mod balancing; -mod phragmms; -mod node; -mod reduce; -mod helpers; +pub mod phragmen; +pub mod balancing; +pub mod phragmms; +pub mod node; +pub mod reduce; +pub mod helpers; +pub mod pjr; pub use reduce::reduce; pub use helpers::*; pub use phragmen::*; pub use phragmms::*; pub use balancing::*; +pub use pjr::*; // re-export the compact macro, with the dependencies of the macro. #[doc(hidden)] @@ -282,6 +285,12 @@ pub struct Candidate { round: usize, } +impl Candidate { + pub fn to_ptr(self) -> CandidatePtr { + Rc::new(RefCell::new(self)) + } +} + /// A vote being casted by a [`Voter`] to a [`Candidate`] is an `Edge`. #[derive(Clone, Default)] pub struct Edge { @@ -326,6 +335,18 @@ impl std::fmt::Debug for Voter { } impl Voter { + /// Create a new `Voter`. + pub fn new(who: AccountId) -> Self { + Self { who, ..Default::default() } + } + + /// Returns `true` if `self` votes for `target`. + /// + /// Note that this does not take into account if `target` is elected (i.e. is *active*) or not. + pub fn votes_for(&self, target: &AccountId) -> bool { + self.edges.iter().any(|e| &e.who == target) + } + /// Returns none if this voter does not have any non-zero distributions. /// /// Note that this might create _un-normalized_ assignments, due to accuracy loss of `P`. Call @@ -401,6 +422,12 @@ impl Voter { } }) } + + /// This voter's budget + #[inline] + pub fn budget(&self) -> ExtendedBalance { + self.budget + } } /// Final result of the election. @@ -734,7 +761,7 @@ pub fn is_score_better(this: ElectionScore, that: ElectionScore, ep /// This will perform some cleanup that are most often important: /// - It drops any votes that are pointing to non-candidates. /// - It drops duplicate targets within a voter. -pub(crate) fn setup_inputs( +pub fn setup_inputs( initial_candidates: Vec, initial_voters: Vec<(AccountId, VoteWeight, Vec)>, ) -> (Vec>, Vec>) { @@ -746,7 +773,7 @@ pub(crate) fn setup_inputs( .enumerate() .map(|(idx, who)| { c_idx_cache.insert(who.clone(), idx); - Rc::new(RefCell::new(Candidate { who, ..Default::default() })) + Candidate { who, ..Default::default() }.to_ptr() }) .collect::>>(); diff --git a/substrate/primitives/npos-elections/src/phragmen.rs b/substrate/primitives/npos-elections/src/phragmen.rs index dad6566673..a1e632acf5 100644 --- a/substrate/primitives/npos-elections/src/phragmen.rs +++ b/substrate/primitives/npos-elections/src/phragmen.rs @@ -63,6 +63,10 @@ const DEN: ExtendedBalance = ExtendedBalance::max_value(); /// `expect` this to return `Ok`. /// /// This can only fail if the normalization fails. +/// +/// Note that rounding errors can potentially cause the output of this function to fail a t-PJR +/// check where t is the standard threshold. The underlying algorithm is sound, but the conversions +/// between numeric types can be lossy. pub fn seq_phragmen( rounds: usize, initial_candidates: Vec, diff --git a/substrate/primitives/npos-elections/src/pjr.rs b/substrate/primitives/npos-elections/src/pjr.rs new file mode 100644 index 0000000000..61e0b2deb7 --- /dev/null +++ b/substrate/primitives/npos-elections/src/pjr.rs @@ -0,0 +1,519 @@ + // This file is part of Substrate. + +// Copyright (C) 2021 Parity Technologies (UK) Ltd. +// SPDX-License-Identifier: Apache-2.0 + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Implements functions and interfaces to check solutions for being t-PJR. +//! +//! PJR stands for proportional justified representation. PJR is an absolute measure to make +//! sure an NPoS solution adheres to a minimum standard. +//! +//! See [`pjr_check`] which is the main entry point of the module. + +use crate::{ + Candidate, + CandidatePtr, + Edge, + ExtendedBalance, + IdentifierT, + Support, + SupportMap, + Supports, + Voter, + VoteWeight, +}; +use sp_std::{rc::Rc, vec::Vec}; +use sp_std::collections::btree_map::BTreeMap; +use sp_arithmetic::{traits::Zero, Perbill}; + +/// The type used as the threshold. +/// +/// Just some reading sugar; Must always be same as [`ExtendedBalance`]; +type Threshold = ExtendedBalance; + +/// Compute the threshold corresponding to the standard PJR property +/// +/// `t-PJR` checks can check PJR according to an arbitrary threshold. The threshold can be any value, +/// but the property gets stronger as the threshold gets smaller. The strongest possible `t-PJR` property +/// corresponds to `t == 0`. +/// +/// However, standard PJR is less stringent than that. This function returns the threshold whose +/// strength corresponds to the standard PJR property. +/// +/// - `committee_size` is the number of winners of the election. +/// - `weights` is an iterator of voter stakes. If the sum of stakes is already known, +/// `std::iter::once(sum_of_stakes)` is appropriate here. +pub fn standard_threshold( + committee_size: usize, + weights: impl IntoIterator, +) -> Threshold { + weights + .into_iter() + .fold(Threshold::zero(), |acc, elem| { + acc.saturating_add(elem) + }) + / committee_size.max(1) as Threshold +} + +/// Check a solution to be PJR. +/// +/// The PJR property is true if `t-PJR` is true when `t == sum(stake) / committee_size`. +pub fn pjr_check( + supports: &Supports, + all_candidates: Vec, + all_voters: Vec<(AccountId, VoteWeight, Vec)>, +) -> bool { + let t = standard_threshold(supports.len(), all_voters.iter().map(|voter| voter.1 as ExtendedBalance)); + t_pjr_check(supports, all_candidates, all_voters, t) +} + +/// Check a solution to be t-PJR. +/// +/// ### Semantics +/// +/// The t-PJR property is defined in the paper ["Validator Election in Nominated Proof-of-Stake"][NPoS], +/// section 5, definition 1. +/// +/// In plain language, the t-PJR condition is: if there is a group of `N` voters +/// who have `r` common candidates and can afford to support each of them with backing stake `t` +/// (i.e `sum(stake(v) for v in voters) == r * t`), then this committee needs to be represented by at +/// least `r` elected candidates. +/// +/// Section 5 of the NPoS paper shows that this property can be tested by: for a feasible solution, +/// if `Max {score(c)} < t` where c is every unelected candidate, then this solution is t-PJR. There +/// may exist edge cases which satisfy the formal definition of t-PJR but do not pass this test, but +/// those should be rare enough that we can discount them. +/// +/// ### Interface +/// +/// In addition to data that can be computed from the [`Supports`] struct, a PJR check also +/// needs to inspect un-elected candidates and edges, thus `all_candidates` and `all_voters`. +/// +/// [NPoS]: https://arxiv.org/pdf/2004.12990v1.pdf +// +// ### Implementation Notes +// +// The paper uses mathematical notation, which priorities single-symbol names. For programmer ease, +// we map these to more descriptive names as follows: +// +// C => all_candidates +// N => all_voters +// (A, w) => (candidates, voters) +// +// Note that while the names don't explicitly say so, `candidates` are the winning candidates, and +// `voters` is the set of weighted edges from nominators to winning validators. +pub fn t_pjr_check( + supports: &Supports, + all_candidates: Vec, + all_voters: Vec<(AccountId, VoteWeight, Vec)>, + t: Threshold, +) -> bool { + // First order of business: derive `(candidates, voters)` from `supports`. + let (candidates, voters) = prepare_pjr_input( + supports, + all_candidates, + all_voters, + ); + // compute with threshold t. + pjr_check_core(candidates.as_ref(), voters.as_ref(), t) +} + +/// The internal implementation of the PJR check after having the data converted. +/// +/// [`pjr_check`] or [`t_pjr_check`] are typically easier to work with. +pub fn pjr_check_core( + candidates: &[CandidatePtr], + voters: &[Voter], + t: Threshold, +) -> bool { + let unelected = candidates.iter().filter(|c| !c.borrow().elected); + let maybe_max_pre_score = unelected.map(|c| (pre_score(Rc::clone(c), voters, t), c.borrow().who.clone())).max(); + // if unelected is empty then the solution is indeed PJR. + maybe_max_pre_score.map_or(true, |(max_pre_score, _)| max_pre_score < t) +} + + + +/// Convert the data types that the user runtime has into ones that can be used by this module. +/// +/// It is expected that this function's interface might change over time, or multiple variants of it +/// can be provided for different use cases. +/// +/// The ultimate goal, in any case, is to convert the election data into [`Candidate`] and [`Voter`] +/// types defined by this crate, whilst setting correct value for some of their fields, namely: +/// 1. Candidate [`backing_stake`](Candidate::backing_stake) and [`elected`](Candidate::elected) if they are a winner. +/// 2. Voter edge [`weight`](Edge::weight) if they are backing a winner. +/// 3. Voter [`budget`](Voter::budget). +/// +/// None of the `load` or `score` values are used and can be ignored. This is similar to +/// [`setup_inputs`] function of this crate. +/// +/// ### Performance (Weight) Notes +/// +/// Note that the current function is rather unfortunately inefficient. The most significant +/// slowdown is the fact that a typical solution that need to be checked for PJR only contains a +/// subset of the entire NPoS edge graph, encoded as `supports`. This only encodes the +/// edges that actually contribute to a winner's backing stake and ignores the rest to save space. +/// To check PJR, we need the entire voter set, including those edges that point to non-winners. +/// This could cause the caller runtime to have to read the entire list of voters, which is assumed +/// to be expensive. +/// +/// A sensible user of this module should make sure that the PJR check is executed and checked as +/// little as possible, and take sufficient economical measures to ensure that this function cannot +/// be abused. +fn prepare_pjr_input( + supports: &Supports, + all_candidates: Vec, + all_voters: Vec<(AccountId, VoteWeight, Vec)>, +) -> (Vec>, Vec>) { + let mut candidates_index: BTreeMap = BTreeMap::new(); + + // dump the staked assignments in a voter-major map for faster access down the road. + let mut assignment_map: BTreeMap> = BTreeMap::new(); + for (winner_id, Support { voters, .. }) in supports.iter() { + for (voter_id, support) in voters.iter() { + assignment_map.entry(voter_id.clone()).or_default().push((winner_id.clone(), *support)); + } + } + + // Convert Suppports into a SupportMap + // + // As a flat list, we're limited to linear search. That gives the production of `candidates`, + // below, a complexity of `O(s*c)`, where `s == supports.len()` and `c == all_candidates.len()`. + // For large lists, that's pretty bad. + // + // A `SupportMap`, as a `BTreeMap`, has access timing of `O(lg n)`. This means that constructing + // the map and then indexing from it gives us timing of `O((s + c) * lg(s))`. If in the future + // we get access to a deterministic `HashMap`, we can further improve that to `O(s+c)`. + // + // However, it does mean allocating sufficient space to store all the data again. + let supports: SupportMap = supports.iter().cloned().collect(); + + // collect all candidates and winners into a unified `Vec`. + let candidates = all_candidates.into_iter().enumerate().map(|(i, c)| { + candidates_index.insert(c.clone(), i); + + // set the backing value and elected flag if the candidate is among the winners. + let who = c; + let maybe_support = supports.get(&who); + let elected = maybe_support.is_some(); + let backed_stake = maybe_support.map(|support| support.total).unwrap_or_default(); + + Candidate { who, elected, backed_stake, ..Default::default() }.to_ptr() + }).collect::>(); + + // collect all voters into a unified Vec. + let voters = all_voters.into_iter().map(|(v, w, ts)| { + let mut edges: Vec> = Vec::with_capacity(ts.len()); + for t in ts { + if edges.iter().any(|e| e.who == t) { + // duplicate edge. + continue; + } + + if let Some(idx) = candidates_index.get(&t) { + // if this edge is among the assignments, set the weight as well. + let weight = assignment_map + .get(&v) + .and_then(|d| d.iter().find_map(|(x, y)| if x == &t { Some(y) } else { None })) + .cloned() + .unwrap_or_default(); + edges.push(Edge { + who: t, + candidate: Rc::clone(&candidates[*idx]), + weight, + ..Default::default() + }); + } + } + + let who = v; + let budget: ExtendedBalance = w.into(); + Voter { who, budget, edges, ..Default::default() } + }).collect::>(); + + (candidates, voters) +} + +/// The pre-score of an unelected candidate. +/// +/// This is the amount of stake that *all voter* can spare to devote to this candidate without +/// allowing the backing stake of any other elected candidate to fall below `t`. +/// +/// In essence, it is the sum(slack(n, t)) for all `n` who vote for `unelected`. +fn pre_score( + unelected: CandidatePtr, + voters: &[Voter], + t: Threshold, +) -> ExtendedBalance { + debug_assert!(!unelected.borrow().elected); + voters + .iter() + .filter(|ref v| v.votes_for(&unelected.borrow().who)) + .fold(Zero::zero(), |acc: ExtendedBalance, voter| acc.saturating_add(slack(voter, t))) +} + + +/// The slack of a voter at a given state. +/// +/// The slack of each voter, with threshold `t` is the total amount of stake that this voter can +/// spare to a new potential member, whilst not dropping the backing stake of any of its currently +/// active members below `t`. In essence, for each of the current active candidates `c`, we assume +/// that we reduce the edge weight of `voter` to `c` from `w` to `w * min(1 / (t / support(c)))`. +/// +/// More accurately: +/// +/// 1. If `c` exactly has `t` backing or less, then we don't generate any slack. +/// 2. If `c` has more than `t`, then we reduce it to `t`. +fn slack(voter: &Voter, t: Threshold) -> ExtendedBalance { + let budget = voter.budget; + let leftover = voter.edges.iter().fold(Zero::zero(), |acc: ExtendedBalance, edge| { + let candidate = edge.candidate.borrow(); + if candidate.elected { + let extra = + Perbill::one().min(Perbill::from_rational_approximation(t, candidate.backed_stake)) + * edge.weight; + acc.saturating_add(extra) + } else { + // No slack generated here. + acc + } + }); + + // NOTE: candidate for saturating_log_sub(). Defensive-only. + budget.saturating_sub(leftover) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn setup_voter(who: u32, votes: Vec<(u32, u128, bool)>) -> Voter { + let mut voter = Voter::new(who); + let mut budget = 0u128; + let candidates = votes.into_iter().map(|(t, w, e)| { + budget += w; + Candidate { who: t, elected: e, backed_stake: w, ..Default::default() } + }).collect::>(); + let edges = candidates.into_iter().map(|c| + Edge { who: c.who, weight: c.backed_stake, candidate: c.to_ptr(), ..Default::default() } + ).collect::>(); + voter.edges = edges; + voter.budget = budget; + voter + } + + #[test] + fn slack_works() { + let voter = setup_voter(10, vec![(1, 10, true), (2, 20, true)]); + + assert_eq!(slack(&voter, 15), 5); + assert_eq!(slack(&voter, 17), 3); + assert_eq!(slack(&voter, 10), 10); + assert_eq!(slack(&voter, 5), 20); + + } + + #[test] + fn pre_score_works() { + // will give 5 slack + let v1 = setup_voter(10, vec![(1, 10, true), (2, 20, true), (3, 0, false)]); + // will give no slack + let v2 = setup_voter(20, vec![(1, 5, true), (2, 5, true)]); + // will give 10 slack. + let v3 = setup_voter(30, vec![(1, 20, true), (2, 20, true), (3, 0, false)]); + + let unelected = Candidate { who: 3u32, elected: false, ..Default::default() }.to_ptr(); + let score = pre_score(unelected, &vec![v1, v2, v3], 15); + + assert_eq!(score, 15); + } + + #[test] + fn can_convert_data_from_external_api() { + let all_candidates = vec![10, 20, 30, 40]; + let all_voters = vec![ + (1, 10, vec![10, 20, 30, 40]), + (2, 20, vec![10, 20, 30, 40]), + (3, 30, vec![10, 30]), + ]; + // tuples in voters vector are (AccountId, Balance) + let supports: Supports = vec![ + (20, Support { total: 15, voters: vec![(1, 5), (2, 10)]}), + (40, Support { total: 15, voters: vec![(1, 5), (2, 10)]}), + ]; + + let (candidates, voters) = prepare_pjr_input( + &supports, + all_candidates, + all_voters, + ); + + // elected flag and backing must be set correctly + assert_eq!( + candidates + .iter() + .map(|c| (c.borrow().who.clone(), c.borrow().elected, c.borrow().backed_stake)) + .collect::>(), + vec![(10, false, 0), (20, true, 15), (30, false, 0), (40, true, 15)], + ); + + // edge weight must be set correctly + assert_eq!( + voters + .iter() + .map(|v| ( + v.who, + v.budget, + v.edges.iter().map(|e| (e.who, e.weight)).collect::>(), + )).collect::>(), + vec![ + (1, 10, vec![(10, 0), (20, 5), (30, 0), (40, 5)]), + (2, 20, vec![(10, 0), (20, 10), (30, 0), (40, 10)]), + (3, 30, vec![(10, 0), (30, 0)]), + ], + ); + + // fyi. this is not PJR, obviously because the votes of 3 can bump the stake a lot but they + // are being ignored. + assert!(!pjr_check_core(&candidates, &voters, 1)); + assert!(!pjr_check_core(&candidates, &voters, 10)); + assert!(!pjr_check_core(&candidates, &voters, 20)); + } + + // These next tests ensure that the threshold phase change property holds for us, but that's not their real purpose. + // They were written to help develop an intuition about what the threshold value actually means + // in layman's terms. + // + // The results tend to support the intuition that the threshold is the voting power at and below + // which a voter's preferences can simply be ignored. + #[test] + fn find_upper_bound_for_threshold_scenario_1() { + let all_candidates = vec![10, 20, 30, 40]; + let all_voters = vec![ + (1, 10, vec![10, 20, 30, 40]), + (2, 20, vec![10, 20, 30, 40]), + (3, 30, vec![10, 30]), + ]; + // tuples in voters vector are (AccountId, Balance) + let supports: Supports = vec![ + (20, Support { total: 15, voters: vec![(1, 5), (2, 10)]}), + (40, Support { total: 15, voters: vec![(1, 5), (2, 10)]}), + ]; + + let (candidates, voters) = prepare_pjr_input( + &supports, + all_candidates, + all_voters, + ); + + find_threshold_phase_change_for_scenario(candidates, voters); + } + + #[test] + fn find_upper_bound_for_threshold_scenario_2() { + let all_candidates = vec![10, 20, 30, 40]; + let all_voters = vec![ + (1, 10, vec![10, 20, 30, 40]), + (2, 20, vec![10, 20, 30, 40]), + (3, 25, vec![10, 30]), + ]; + // tuples in voters vector are (AccountId, Balance) + let supports: Supports = vec![ + (20, Support { total: 15, voters: vec![(1, 5), (2, 10)]}), + (40, Support { total: 15, voters: vec![(1, 5), (2, 10)]}), + ]; + + let (candidates, voters) = prepare_pjr_input( + &supports, + all_candidates, + all_voters, + ); + + find_threshold_phase_change_for_scenario(candidates, voters); + } + + #[test] + fn find_upper_bound_for_threshold_scenario_3() { + let all_candidates = vec![10, 20, 30, 40]; + let all_voters = vec![ + (1, 10, vec![10, 20, 30, 40]), + (2, 20, vec![10, 20, 30, 40]), + (3, 35, vec![10, 30]), + ]; + // tuples in voters vector are (AccountId, Balance) + let supports: Supports = vec![ + (20, Support { total: 15, voters: vec![(1, 5), (2, 10)]}), + (40, Support { total: 15, voters: vec![(1, 5), (2, 10)]}), + ]; + + let (candidates, voters) = prepare_pjr_input( + &supports, + all_candidates, + all_voters, + ); + + find_threshold_phase_change_for_scenario(candidates, voters); + } + + fn find_threshold_phase_change_for_scenario( + candidates: Vec>, + voters: Vec> + ) -> Threshold { + let mut threshold = 1; + let mut prev_threshold = 0; + + // find the binary range containing the threshold beyond which the PJR check succeeds + while !pjr_check_core(&candidates, &voters, threshold) { + prev_threshold = threshold; + threshold = threshold.checked_mul(2).expect("pjr check must fail before we run out of capacity in u128"); + } + + // now binary search within that range to find the phase threshold + let mut high_bound = threshold; + let mut low_bound = prev_threshold; + + while high_bound - low_bound > 1 { + // maintain the invariant that low_bound fails and high_bound passes + let test = low_bound + ((high_bound - low_bound) / 2); + if pjr_check_core(&candidates, &voters, test) { + high_bound = test; + } else { + low_bound = test; + } + } + + println!("highest failing check: {}", low_bound); + println!("lowest succeeding check: {}", high_bound); + + // for a value to be a threshold, it must be the boundary between two conditions + let mut unexpected_failures = Vec::new(); + let mut unexpected_successes = Vec::new(); + for t in 0..=low_bound { + if pjr_check_core(&candidates, &voters, t) { + unexpected_successes.push(t); + } + } + for t in high_bound..(high_bound*2) { + if !pjr_check_core(&candidates, &voters, t) { + unexpected_failures.push(t); + } + } + dbg!(&unexpected_successes, &unexpected_failures); + assert!(unexpected_failures.is_empty() && unexpected_successes.is_empty()); + + high_bound + } +}