integrate faster erasure code (#2608)

Breaks compatibility for distributing PoV and PersistentValidationData between validators. Ref #2442
2026-06-12 19:21:13 +00:00 · 2021-03-18 13:25:58 +01:00
parent 9047bbb392
commit 928a03c179
4 changed files with 82 additions and 291 deletions
@@ -1044,7 +1044,7 @@ dependencies = [
 "cranelift-codegen",
 "cranelift-entity",
 "cranelift-frontend",
- "itertools",
+ "itertools 0.9.0",
 "log",
 "serde",
 "smallvec 1.6.1",
@@ -1882,6 +1882,12 @@ dependencies = [
 "sp-std",
 ]

+[[package]]
+name = "fs-err"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bcd1163ae48bda72a20ae26d66a04d3094135cadab911cff418ae5e33f253431"
+
 [[package]]
 name = "fs-swap"
 version = "0.2.5"
@@ -2685,6 +2691,15 @@ dependencies = [
 "either",
 ]

+[[package]]
+name = "itertools"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37d572918e350e82412fe766d24b15e6682fb2ed2bbe018280caa810397cb319"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "0.4.6"
@@ -5360,7 +5375,7 @@ version = "0.8.29"
 dependencies = [
 "parity-scale-codec",
 "polkadot-primitives",
- "reed-solomon-erasure",
+ "reed-solomon-novelpoly",
 "sp-core",
 "sp-trie",
 "thiserror",
@@ -6510,7 +6525,7 @@ checksum = "32d3ebd75ac2679c2af3a92246639f9fcc8a442ee420719cc4fe195b98dd5fa3"
 dependencies = [
 "bytes 1.0.1",
 "heck",
- "itertools",
+ "itertools 0.9.0",
 "log",
 "multimap",
 "petgraph",
@@ -6527,7 +6542,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "169a15f3008ecb5160cba7d37bcd690a7601b6d30cfb87a117d45e59d52af5d4"
 dependencies = [
 "anyhow",
- "itertools",
+ "itertools 0.9.0",
 "proc-macro2",
 "quote",
 "syn",
@@ -6840,12 +6855,15 @@ dependencies = [
 ]

 [[package]]
-name = "reed-solomon-erasure"
-version = "4.0.2"
+name = "reed-solomon-novelpoly"
+version = "0.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a415a013dd7c5d4221382329a5a3482566da675737494935cbbbcdec04662f9d"
+checksum = "886177a67de8d452f8955a5a1c70f9064e644bcf1862e8bcc3a68064014369be"
 dependencies = [
- "smallvec 1.6.1",
+ "derive_more",
+ "fs-err",
+ "itertools 0.10.0",
+ "thiserror",
 ]

 [[package]]
@@ -10977,6 +10995,6 @@ checksum = "a1e6e8778706838f43f771d80d37787cb2fe06dafe89dd3aebaf6721b9eaec81"
 dependencies = [
 "cc",
 "glob",
- "itertools",
+ "itertools 0.9.0",
 "libc",
 ]
@@ -6,8 +6,8 @@ edition = "2018"

 [dependencies]
 primitives = { package = "polkadot-primitives", path = "../primitives" }
-reed_solomon = { package = "reed-solomon-erasure", version = "4.0.2" }
-parity-scale-codec = { version = "2.0.0", default-features = false, features = ["derive"] }
+novelpoly = { package = "reed-solomon-novelpoly", version = "=0.0.1" }
+parity-scale-codec = { version = "2.0.0", default-features = false, features = ["std", "derive"] }
 sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
 trie = { package = "sp-trie", git = "https://github.com/paritytech/substrate", branch = "master" }
 thiserror = "1.0.23"
@@ -25,19 +25,17 @@
 //! The data is coded so any f+1 chunks can be used to reconstruct the full data.

 use parity_scale_codec::{Encode, Decode};
-use reed_solomon::galois_16::{self, ReedSolomon};
 use primitives::v0::{self, Hash as H256, BlakeTwo256, HashT};
 use primitives::v1;
 use sp_core::Blake2Hasher;
 use trie::{EMPTY_PREFIX, MemoryDB, Trie, TrieMut, trie_types::{TrieDBMut, TrieDB}};
 use thiserror::Error;

-use self::wrapped_shard::WrappedShard;
-
-mod wrapped_shard;
+use novelpoly::WrappedShard;
+use novelpoly::CodeParams;

 // we are limited to the field order of GF(2^16), which is 65536
-const MAX_VALIDATORS: usize = <galois_16::Field as reed_solomon::Field>::ORDER;
+const MAX_VALIDATORS: usize = novelpoly::f2e16::FIELD_SIZE;

 /// Errors in erasure coding.
 #[derive(Debug, Clone, PartialEq, Error)]
@@ -75,76 +73,41 @@ pub enum Error {
 	/// Branch out of bounds.
 	#[error("Branch is out of bounds")]
 	BranchOutOfBounds,
-}
-
-#[derive(Debug, PartialEq)]
-struct CodeParams {
-	data_shards: usize,
-	parity_shards: usize,
-}
-
-impl CodeParams {
-	// the shard length needed for a payload with initial size `base_len`.
-	fn shard_len(&self, base_len: usize) -> usize {
-		// how many bytes we actually need.
-		let needed_shard_len = base_len / self.data_shards
-			+ (base_len % self.data_shards != 0) as usize;
-
-		// round up to next even number
-		// (no actual space overhead since we are working in GF(2^16)).
-		needed_shard_len + needed_shard_len % 2
-	}
-
-	fn make_shards_for(&self, payload: &[u8]) -> Vec<WrappedShard> {
-		let shard_len = self.shard_len(payload.len());
-		let mut shards = vec![
-			WrappedShard::new(vec![0; shard_len]);
-			self.data_shards + self.parity_shards
-		];
-
-		for (data_chunk, blank_shard) in payload.chunks(shard_len).zip(&mut shards) {
-			// fill the empty shards with the corresponding piece of the payload,
-			// zero-padded to fit in the shards.
-			let len = std::cmp::min(shard_len, data_chunk.len());
-			let blank_shard: &mut [u8] = blank_shard.as_mut();
-			blank_shard[..len].copy_from_slice(&data_chunk[..len]);
-		}
-
-		shards
-	}
-
-	// make a reed-solomon instance.
-	fn make_encoder(&self) -> ReedSolomon {
-		ReedSolomon::new(self.data_shards, self.parity_shards)
-			.expect("this struct is not created with invalid shard number; qed")
-	}
-}
-
-/// Returns the maximum number of allowed, faulty chunks
-/// which does not prevent recovery given all other pieces
-/// are correct.
-const fn n_faulty(n_validators: usize) -> Result<usize, Error> {
-	if n_validators > MAX_VALIDATORS { return Err(Error::TooManyValidators) }
-	if n_validators <= 1 { return Err(Error::NotEnoughValidators) }
-
-	Ok(n_validators.saturating_sub(1) / 3)
-}
-
-fn code_params(n_validators: usize) -> Result<CodeParams, Error> {
-	let n_faulty = n_faulty(n_validators)?;
-	let n_good = n_validators - n_faulty;
-
-	Ok(CodeParams {
-		data_shards: n_faulty + 1,
-		parity_shards: n_good - 1,
-	})
+	/// Unknown error
+	#[error("An unknown error has appeared when reconstructing erasure code chunks")]
+	UnknownReconstruction,
+	/// Unknown error
+	#[error("An unknown error has appeared when deriving code parameters from validator count")]
+	UnknownCodeParam,
 }

 /// Obtain a threshold of chunks that should be enough to recover the data.
-pub fn recovery_threshold(n_validators: usize) -> Result<usize, Error> {
-	let n_faulty = n_faulty(n_validators)?;
+pub const fn recovery_threshold(n_validators: usize) -> Result<usize, Error> {
+	if n_validators > MAX_VALIDATORS { return Err(Error::TooManyValidators) }
+	if n_validators <= 1 { return Err(Error::NotEnoughValidators) }

-	Ok(n_faulty + 1)
+	let needed = n_validators.saturating_sub(1) / 3;
+	Ok(needed + 1)
+}
+
+fn code_params(n_validators: usize) -> Result<CodeParams, Error> {
+	// we need to be able to reconstruct from 1/3 - eps
+
+	let n_wanted = n_validators;
+	let k_wanted = recovery_threshold(n_wanted)?;
+
+	if n_wanted > MAX_VALIDATORS as usize {
+		return Err(Error::TooManyValidators);
+	}
+
+	CodeParams::derive_parameters(n_wanted, k_wanted)
+		.map_err(|e| {
+			match e {
+				novelpoly::Error::WantedShardCountTooHigh(_) => Error::TooManyValidators,
+				novelpoly::Error::WantedShardCountTooLow(_) => Error::NotEnoughValidators,
+				_ => Error::UnknownCodeParam,
+			}
+		})
 }

 /// Obtain erasure-coded chunks for v0 `AvailableData`, one for each validator.
@@ -178,12 +141,10 @@ fn obtain_chunks<T: Encode>(n_validators: usize, data: &T)
 		return Err(Error::BadPayload);
 	}

-	let mut shards = params.make_shards_for(&encoded[..]);
-
-	params.make_encoder().encode(&mut shards[..])
+	let shards = params.make_encoder().encode::<WrappedShard>(&encoded[..])
 		.expect("Payload non-empty, shard sizes are uniform, and validator numbers checked; qed");

-	Ok(shards.into_iter().map(|w| w.into_inner()).collect())
+	Ok(shards.into_iter().map(|w: WrappedShard| w.into_inner()).collect())
 }

 /// Reconstruct the v0 available data from a set of chunks.
@@ -225,7 +186,7 @@ fn reconstruct<'a, I: 'a, T: Decode>(n_validators: usize, chunks: I) -> Result<T
 	where I: IntoIterator<Item=(&'a [u8], usize)>
 {
 	let params = code_params(n_validators)?;
-	let mut shards: Vec<Option<WrappedShard>> = vec![None; n_validators];
+	let mut received_shards: Vec<Option<WrappedShard>> = vec![None; n_validators];
 	let mut shard_len = None;
 	for (chunk_data, chunk_idx) in chunks.into_iter().take(n_validators) {
 		if chunk_idx >= n_validators {
@@ -242,30 +203,25 @@ fn reconstruct<'a, I: 'a, T: Decode>(n_validators: usize, chunks: I) -> Result<T
 			return Err(Error::NonUniformChunks);
 		}

-		shards[chunk_idx] = Some(WrappedShard::new(chunk_data.to_vec()));
+		received_shards[chunk_idx] = Some(WrappedShard::new(chunk_data.to_vec()));
 	}

-	if let Err(e) = params.make_encoder().reconstruct(&mut shards[..]) {
-		match e {
-			reed_solomon::Error::TooFewShardsPresent => Err(Error::NotEnoughChunks)?,
-			reed_solomon::Error::InvalidShardFlags => Err(Error::WrongValidatorCount)?,
-			reed_solomon::Error::TooManyShards => Err(Error::TooManyChunks)?,
-			reed_solomon::Error::EmptyShard => panic!("chunks are all non-empty; this is checked above; qed"),
-			reed_solomon::Error::IncorrectShardSize => panic!("chunks are all same len; this is checked above; qed"),
-			_ => panic!("reed_solomon encoder returns no more variants for this function; qed"),
+
+	let res = params.make_encoder().reconstruct(received_shards);
+
+	let payload_bytes= match res {
+		Err(e) => match e {
+			novelpoly::Error::NeedMoreShards { .. } => return Err(Error::NotEnoughChunks),
+			novelpoly::Error::ParamterMustBePowerOf2 { .. } => return Err(Error::UnevenLength),
+			novelpoly::Error::WantedShardCountTooHigh(_) => return Err(Error::TooManyValidators),
+			novelpoly::Error::WantedShardCountTooLow(_) => return Err(Error::NotEnoughValidators),
+			novelpoly::Error::PayloadSizeIsZero { .. } => return Err(Error::BadPayload),
+			_ => return Err(Error::UnknownReconstruction),
 		}
-	}
+		Ok(payload_bytes) => payload_bytes,
+	};

-	// lazily decode from the data shards.
-	Decode::decode(&mut ShardInput {
-		remaining_len: shard_len.map(|s| s * params.data_shards).unwrap_or(0),
-		cur_shard: None,
-		shards: shards.iter()
-			.map(|x| x.as_ref())
-			.take(params.data_shards)
-			.map(|x| x.expect("all data shards have been recovered; qed"))
-			.map(|x| x.as_ref()),
-	}).or_else(|_| Err(Error::BadPayload))
+	Decode::decode(&mut &payload_bytes[..]).or_else(|_e| Err(Error::BadPayload))
 }

 /// An iterator that yields merkle branches and chunk data for all chunks to
@@ -333,7 +289,7 @@ pub fn branches<'a, I: 'a>(chunks: &'a [I]) -> Branches<'a, I>
 	Branches {
 		trie_storage,
 		root,
-		chunks: chunks,
+		chunks,
 		current_pos: 0,
 	}
 }
@@ -418,55 +374,6 @@ mod tests {
 		assert_eq!(MAX_VALIDATORS, 65536);
 	}

-	#[test]
-	fn test_code_params() {
-		assert_eq!(code_params(0), Err(Error::NotEnoughValidators));
-
-		assert_eq!(code_params(1), Err(Error::NotEnoughValidators));
-
-		assert_eq!(code_params(2), Ok(CodeParams {
-			data_shards: 1,
-			parity_shards: 1,
-		}));
-
-		assert_eq!(code_params(3), Ok(CodeParams {
-			data_shards: 1,
-			parity_shards: 2,
-		}));
-
-		assert_eq!(code_params(4), Ok(CodeParams {
-			data_shards: 2,
-			parity_shards: 2,
-		}));
-
-		assert_eq!(code_params(100), Ok(CodeParams {
-			data_shards: 34,
-			parity_shards: 66,
-		}));
-	}
-
-	#[test]
-	fn shard_len_is_reasonable() {
-		let mut params = CodeParams {
-			data_shards: 5,
-			parity_shards: 0, // doesn't affect calculation.
-		};
-
-		assert_eq!(params.shard_len(100), 20);
-		assert_eq!(params.shard_len(99), 20);
-
-		// see if it rounds up to 2.
-		assert_eq!(params.shard_len(95), 20);
-		assert_eq!(params.shard_len(94), 20);
-
-		assert_eq!(params.shard_len(89), 18);
-
-		params.data_shards = 7;
-
-		// needs 3 bytes to fit, rounded up to next even number.
-		assert_eq!(params.shard_len(19), 4);
-	}
-
    #[test]
 	fn round_trip_works() {
 		let pov_block = PoVBlock {
@@ -1,134 +0,0 @@
-// Copyright 2019-2020 Parity Technologies (UK) Ltd.
-// This file is part of Polkadot.
-
-// Polkadot is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-
-// Polkadot is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-
-// You should have received a copy of the GNU General Public License
-// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
-
-//! Provides a safe wrapper that gives views into a byte-vec.
-
-/// Wrapper around a `Vec<u8>` that provides views as a `[u8]` and `[[u8; 2]]`.
-#[derive(Clone)]
-pub(crate) struct WrappedShard {
-	inner: Vec<u8>,
-}
-
-impl WrappedShard {
-	/// Wrap `data`.
-	pub(crate) fn new(mut data: Vec<u8>) -> Self {
-		if data.len() % 2 != 0 {
-			data.push(0);
-		}
-
-		WrappedShard { inner: data }
-	}
-
-	/// Unwrap and yield inner data.
-	pub(crate) fn into_inner(self) -> Vec<u8> {
-		self.inner
-	}
-}
-
-impl AsRef<[u8]> for WrappedShard {
-	fn as_ref(&self) -> &[u8] {
-		self.inner.as_ref()
-	}
-}
-
-impl AsMut<[u8]> for WrappedShard {
-	fn as_mut(&mut self) -> &mut [u8] {
-		self.inner.as_mut()
-	}
-}
-
-impl AsRef<[[u8; 2]]> for WrappedShard {
-	fn as_ref(&self) -> &[[u8; 2]] {
-		assert_eq!(self.inner.len() % 2, 0);
-		if self.inner.is_empty() { return &[] }
-		unsafe {
-			::std::slice::from_raw_parts(&self.inner[0] as *const _ as _, self.inner.len() / 2)
-		}
-	}
-}
-
-impl AsMut<[[u8; 2]]> for WrappedShard {
-	fn as_mut(&mut self) -> &mut [[u8; 2]] {
-		let len = self.inner.len();
-		assert_eq!(len % 2, 0);
-
-		if self.inner.is_empty() { return &mut [] }
-		unsafe {
-			::std::slice::from_raw_parts_mut(&mut self.inner[0] as *mut _ as _, len / 2)
-		}
-	}
-}
-
-impl std::iter::FromIterator<[u8; 2]> for WrappedShard {
-	fn from_iter<I: IntoIterator<Item=[u8; 2]>>(iterable: I) -> Self {
-		let iter = iterable.into_iter();
-
-		let (l, _) = iter.size_hint();
-		let mut inner = Vec::with_capacity(l * 2);
-
-		for [a, b] in iter {
-			inner.push(a);
-			inner.push(b);
-		}
-
-		debug_assert_eq!(inner.len() % 2, 0);
-		WrappedShard { inner }
-	}
-}
-
-#[cfg(test)]
-mod tests {
-	use super::WrappedShard;
-
-	#[test]
-	fn wrap_empty_ok() {
-		let mut wrapped = WrappedShard::new(Vec::new());
-		{
-			let _: &mut [u8] = wrapped.as_mut();
-			let _: &mut [[u8; 2]] = wrapped.as_mut();
-		}
-
-		{
-			let _: &[u8] = wrapped.as_ref();
-			let _: &[[u8; 2]] = wrapped.as_ref();
-		}
-	}
-
-	#[test]
-	fn data_order_preserved() {
-		let mut wrapped = WrappedShard::new(vec![1, 2, 3]);
-		{
-			let x: &[u8] = wrapped.as_ref();
-			assert_eq!(x, &[1, 2, 3, 0]);
-		}
-		{
-			let x: &mut [[u8; 2]] = wrapped.as_mut();
-			assert_eq!(x, &mut [[1, 2], [3, 0]]);
-			x[1] = [3, 4];
-		}
-		{
-			let x: &[u8] = wrapped.as_ref();
-			assert_eq!(x, &[1, 2, 3, 4]);
-		}
-	}
-
-	#[test]
-	fn from_iter() {
-		let w: WrappedShard = vec![[1, 2], [3, 4], [5, 6]].into_iter().collect();
-		let x: &[u8] = w.as_ref();
-		assert_eq!(x, &[1, 2, 3, 4, 5, 6])
-	}
-}