// Copyright (C) Parity Technologies (UK) Ltd. and Dijital Kurdistan Tech Institute // This file is part of Pezkuwi. // Pezkuwi is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // Pezkuwi is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // You should have received a copy of the GNU General Public License // along with Pezkuwi. If not, see . //! As part of Pezkuwi's availability system, certain pieces of data //! for each block are required to be kept available. //! //! The way we accomplish this is by erasure coding the data into n pieces //! and constructing a merkle root of the data. //! //! Each of n validators stores their piece of data. We assume `n = 3f + k`, `0 < k ≤ 3`. //! f is the maximum number of faulty validators in the system. //! The data is coded so any f+1 chunks can be used to reconstruct the full data. use codec::{Decode, Encode}; use pezkuwi_pez_node_primitives::{AvailableData, Proof}; use pezkuwi_primitives::{BlakeTwo256, Hash as H256, HashT}; use pezsp_core::Blake2Hasher; use pezsp_trie::{ trie_types::{TrieDBBuilder, TrieDBMutBuilderV0 as TrieDBMutBuilder}, LayoutV0, MemoryDB, Trie, TrieMut, EMPTY_PREFIX, }; use thiserror::Error; use novelpoly::{CodeParams, WrappedShard}; // we are limited to the field order of GF(2^16), which is 65536 const MAX_VALIDATORS: usize = novelpoly::f2e16::FIELD_SIZE; /// Errors in erasure coding. #[derive(Debug, Clone, PartialEq, Error)] pub enum Error { /// Returned when there are too many validators. #[error("There are too many validators")] TooManyValidators, /// Cannot encode something for zero or one validator #[error("Expected at least 2 validators")] NotEnoughValidators, /// Cannot reconstruct: wrong number of validators. #[error("Validator count mismatches between encoding and decoding")] WrongValidatorCount, /// Not enough chunks present. #[error("Not enough chunks to reconstruct message")] NotEnoughChunks, /// Too many chunks present. #[error("Too many chunks present")] TooManyChunks, /// Chunks not of uniform length or the chunks are empty. #[error("Chunks are not uniform, mismatch in length or are zero sized")] NonUniformChunks, /// An uneven byte-length of a shard is not valid for `GF(2^16)` encoding. #[error("Uneven length is not valid for field GF(2^16)")] UnevenLength, /// Chunk index out of bounds. #[error("Chunk is out of bounds: {chunk_index} not included in 0..{n_validators}")] ChunkIndexOutOfBounds { chunk_index: usize, n_validators: usize }, /// Bad payload in reconstructed bytes. #[error("Reconstructed payload invalid")] BadPayload, /// Unable to decode reconstructed bytes. #[error("Unable to decode reconstructed payload: {0}")] Decode(#[source] codec::Error), /// Invalid branch proof. #[error("Invalid branch proof")] InvalidBranchProof, /// Branch out of bounds. #[error("Branch is out of bounds")] BranchOutOfBounds, /// Unknown error #[error("An unknown error has appeared when reconstructing erasure code chunks")] UnknownReconstruction, /// Unknown error #[error("An unknown error has appeared when deriving code parameters from validator count")] UnknownCodeParam, } impl From for Error { fn from(error: novelpoly::Error) -> Self { match error { novelpoly::Error::NeedMoreShards { .. } => Self::NotEnoughChunks, novelpoly::Error::ParamterMustBePowerOf2 { .. } => Self::UnevenLength, novelpoly::Error::WantedShardCountTooHigh(_) => Self::TooManyValidators, novelpoly::Error::WantedShardCountTooLow(_) => Self::NotEnoughValidators, novelpoly::Error::PayloadSizeIsZero { .. } => Self::BadPayload, novelpoly::Error::InconsistentShardLengths { .. } => Self::NonUniformChunks, _ => Self::UnknownReconstruction, } } } /// Obtain a threshold of chunks that should be enough to recover the data. pub const fn recovery_threshold(n_validators: usize) -> Result { if n_validators > MAX_VALIDATORS { return Err(Error::TooManyValidators); } if n_validators <= 1 { return Err(Error::NotEnoughValidators); } let needed = n_validators.saturating_sub(1) / 3; Ok(needed + 1) } /// Obtain the threshold of systematic chunks that should be enough to recover the data. /// /// If the regular `recovery_threshold` is a power of two, then it returns the same value. /// Otherwise, it returns the next lower power of two. pub fn systematic_recovery_threshold(n_validators: usize) -> Result { code_params(n_validators).map(|params| params.k()) } fn code_params(n_validators: usize) -> Result { // we need to be able to reconstruct from 1/3 - eps let n_wanted = n_validators; let k_wanted = recovery_threshold(n_wanted)?; if n_wanted > MAX_VALIDATORS as usize { return Err(Error::TooManyValidators); } CodeParams::derive_parameters(n_wanted, k_wanted).map_err(|e| match e { novelpoly::Error::WantedShardCountTooHigh(_) => Error::TooManyValidators, novelpoly::Error::WantedShardCountTooLow(_) => Error::NotEnoughValidators, _ => Error::UnknownCodeParam, }) } /// Reconstruct the v1 available data from the set of systematic chunks. /// /// Provide a vector containing chunk data. If too few chunks are provided, recovery is not /// possible. pub fn reconstruct_from_systematic_v1( n_validators: usize, chunks: Vec>, ) -> Result { reconstruct_from_systematic(n_validators, chunks) } /// Reconstruct the available data from the set of systematic chunks. /// /// Provide a vector containing the first k chunks in order. If too few chunks are provided, /// recovery is not possible. pub fn reconstruct_from_systematic( n_validators: usize, chunks: Vec>, ) -> Result { let code_params = code_params(n_validators)?; let k = code_params.k(); for chunk_data in chunks.iter().take(k) { if !chunk_data.len().is_multiple_of(2) { return Err(Error::UnevenLength); } } let bytes = code_params.make_encoder().reconstruct_from_systematic( chunks.into_iter().take(k).map(|data| WrappedShard::new(data)).collect(), )?; Decode::decode(&mut &bytes[..]).map_err(|err| Error::Decode(err)) } /// Obtain erasure-coded chunks for v1 `AvailableData`, one for each validator. /// /// Works only up to 65536 validators, and `n_validators` must be non-zero. pub fn obtain_chunks_v1(n_validators: usize, data: &AvailableData) -> Result>, Error> { obtain_chunks(n_validators, data) } /// Obtain erasure-coded chunks, one for each validator. /// /// Works only up to 65536 validators, and `n_validators` must be non-zero. pub fn obtain_chunks(n_validators: usize, data: &T) -> Result>, Error> { let params = code_params(n_validators)?; let encoded = data.encode(); if encoded.is_empty() { return Err(Error::BadPayload); } let shards = params .make_encoder() .encode::(&encoded[..]) .expect("Payload non-empty, shard sizes are uniform, and validator numbers checked; qed"); Ok(shards.into_iter().map(|w: WrappedShard| w.into_inner()).collect()) } /// Reconstruct the v1 available data from a set of chunks. /// /// Provide an iterator containing chunk data and the corresponding index. /// The indices of the present chunks must be indicated. If too few chunks /// are provided, recovery is not possible. /// /// Works only up to 65536 validators, and `n_validators` must be non-zero. pub fn reconstruct_v1<'a, I: 'a>(n_validators: usize, chunks: I) -> Result where I: IntoIterator, { reconstruct(n_validators, chunks) } /// Reconstruct decodable data from a set of chunks. /// /// Provide an iterator containing chunk data and the corresponding index. /// The indices of the present chunks must be indicated. If too few chunks /// are provided, recovery is not possible. /// /// Works only up to 65536 validators, and `n_validators` must be non-zero. pub fn reconstruct<'a, I: 'a, T: Decode>(n_validators: usize, chunks: I) -> Result where I: IntoIterator, { let params = code_params(n_validators)?; let mut received_shards: Vec> = vec![None; n_validators]; for (chunk_data, chunk_idx) in chunks.into_iter().take(n_validators) { if !chunk_data.len().is_multiple_of(2) { return Err(Error::UnevenLength); } received_shards[chunk_idx] = Some(WrappedShard::new(chunk_data.to_vec())); } let payload_bytes = params.make_encoder().reconstruct(received_shards)?; Decode::decode(&mut &payload_bytes[..]).map_err(|_| Error::BadPayload) } /// An iterator that yields merkle branches and chunk data for all chunks to /// be sent to other validators. pub struct Branches<'a, I> { trie_storage: MemoryDB, root: H256, chunks: &'a [I], current_pos: usize, } impl<'a, I: AsRef<[u8]>> Branches<'a, I> { /// Get the trie root. pub fn root(&self) -> H256 { self.root } } impl<'a, I: AsRef<[u8]>> Iterator for Branches<'a, I> { type Item = (Proof, &'a [u8]); fn next(&mut self) -> Option { use pezsp_trie::Recorder; let mut recorder = Recorder::>::new(); let res = { let trie = TrieDBBuilder::new(&self.trie_storage, &self.root) .with_recorder(&mut recorder) .build(); (self.current_pos as u32).using_encoded(|s| trie.get(s)) }; match res.expect("all nodes in trie present; qed") { Some(_) => { let nodes: Vec> = recorder.drain().into_iter().map(|r| r.data).collect(); let chunk = self.chunks.get(self.current_pos).expect( "there is a one-to-one mapping of chunks to valid merkle branches; qed", ); self.current_pos += 1; Proof::try_from(nodes).ok().map(|proof| (proof, chunk.as_ref())) }, None => None, } } } /// Construct a trie from chunks of an erasure-coded value. This returns the root hash and an /// iterator of merkle proofs, one for each validator. pub fn branches<'a, I: 'a>(chunks: &'a [I]) -> Branches<'a, I> where I: AsRef<[u8]>, { let mut trie_storage: MemoryDB = MemoryDB::default(); let mut root = H256::default(); // construct trie mapping each chunk's index to its hash. { let mut trie = TrieDBMutBuilder::new(&mut trie_storage, &mut root).build(); for (i, chunk) in chunks.as_ref().iter().enumerate() { (i as u32).using_encoded(|encoded_index| { let chunk_hash = BlakeTwo256::hash(chunk.as_ref()); trie.insert(encoded_index, chunk_hash.as_ref()) .expect("a fresh trie stored in memory cannot have errors loading nodes; qed"); }) } } Branches { trie_storage, root, chunks, current_pos: 0 } } /// Verify a merkle branch, yielding the chunk hash meant to be present at that /// index. pub fn branch_hash(root: &H256, branch_nodes: &Proof, index: usize) -> Result { let mut trie_storage: MemoryDB = MemoryDB::default(); for node in branch_nodes.iter() { (&mut trie_storage as &mut pezsp_trie::HashDB<_>).insert(EMPTY_PREFIX, node); } let trie = TrieDBBuilder::new(&trie_storage, &root).build(); let res = (index as u32).using_encoded(|key| { trie.get_with(key, |raw_hash: &[u8]| H256::decode(&mut &raw_hash[..])) }); match res { Ok(Some(Ok(hash))) => Ok(hash), Ok(Some(Err(_))) => Err(Error::InvalidBranchProof), // hash failed to decode Ok(None) => Err(Error::BranchOutOfBounds), Err(_) => Err(Error::InvalidBranchProof), } } #[cfg(test)] mod tests { use std::sync::Arc; use super::*; use pezkuwi_pez_node_primitives::{AvailableData, BlockData, PoV}; use pezkuwi_primitives::{HeadData, PersistedValidationData}; use quickcheck::{Arbitrary, Gen, QuickCheck}; // In order to adequately compute the number of entries in the Merkle // trie, we must account for the fixed 16-ary trie structure. const KEY_INDEX_NIBBLE_SIZE: usize = 4; #[derive(Clone, Debug)] struct ArbitraryAvailableData(AvailableData); impl Arbitrary for ArbitraryAvailableData { fn arbitrary(g: &mut Gen) -> Self { // Limit the POV len to 1 mib, otherwise the test will take forever let pov_len = (u32::arbitrary(g) % (1024 * 1024)).max(2); let pov = (0..pov_len).map(|_| u8::arbitrary(g)).collect(); let pvd = PersistedValidationData { parent_head: HeadData((0..u16::arbitrary(g)).map(|_| u8::arbitrary(g)).collect()), relay_parent_number: u32::arbitrary(g), relay_parent_storage_root: [u8::arbitrary(g); 32].into(), max_pov_size: u32::arbitrary(g), }; ArbitraryAvailableData(AvailableData { pov: Arc::new(PoV { block_data: BlockData(pov) }), validation_data: pvd, }) } } #[test] fn field_order_is_right_size() { assert_eq!(MAX_VALIDATORS, 65536); } #[test] fn round_trip_works() { let pov = PoV { block_data: BlockData((0..255).collect()) }; let available_data = AvailableData { pov: pov.into(), validation_data: Default::default() }; let chunks = obtain_chunks(10, &available_data).unwrap(); assert_eq!(chunks.len(), 10); // any 4 chunks should work. let reconstructed: AvailableData = reconstruct( 10, [(&*chunks[1], 1), (&*chunks[4], 4), (&*chunks[6], 6), (&*chunks[9], 9)] .iter() .cloned(), ) .unwrap(); assert_eq!(reconstructed, available_data); } #[test] fn round_trip_systematic_works() { fn property(available_data: ArbitraryAvailableData, n_validators: u16) { let n_validators = n_validators.max(2); let kpow2 = systematic_recovery_threshold(n_validators as usize).unwrap(); let chunks = obtain_chunks(n_validators as usize, &available_data.0).unwrap(); assert_eq!( reconstruct_from_systematic_v1( n_validators as usize, chunks.into_iter().take(kpow2).collect() ) .unwrap(), available_data.0 ); } QuickCheck::new().quickcheck(property as fn(ArbitraryAvailableData, u16)) } #[test] fn reconstruct_does_not_panic_on_low_validator_count() { let reconstructed = reconstruct_v1(1, [].iter().cloned()); assert_eq!(reconstructed, Err(Error::NotEnoughValidators)); } fn generate_trie_and_generate_proofs(magnitude: u32) { let n_validators = 2_u32.pow(magnitude) as usize; let pov = PoV { block_data: BlockData(vec![2; n_validators / KEY_INDEX_NIBBLE_SIZE]) }; let available_data = AvailableData { pov: pov.into(), validation_data: Default::default() }; let chunks = obtain_chunks(magnitude as usize, &available_data).unwrap(); assert_eq!(chunks.len() as u32, magnitude); let branches = branches(chunks.as_ref()); let root = branches.root(); let proofs: Vec<_> = branches.map(|(proof, _)| proof).collect(); assert_eq!(proofs.len() as u32, magnitude); for (i, proof) in proofs.into_iter().enumerate() { let encode = Encode::encode(&proof); let decode = Decode::decode(&mut &encode[..]).unwrap(); assert_eq!(proof, decode); assert_eq!(encode, Encode::encode(&decode)); assert_eq!(branch_hash(&root, &proof, i).unwrap(), BlakeTwo256::hash(&chunks[i])); } } #[test] fn roundtrip_proof_encoding() { for i in 2..16 { generate_trie_and_generate_proofs(i); } } }