Get rid of corpus files (#190)

* Get rid of corpus files

* Update the readme
This commit is contained in:
Omar
2025-10-16 01:40:09 +03:00
committed by GitHub
parent 29bf5304ec
commit 0d7a87a728
17 changed files with 402 additions and 402 deletions
+180 -111
View File
@@ -1,131 +1,200 @@
use std::{
fs::File,
borrow::Cow,
collections::HashMap,
path::{Path, PathBuf},
};
use revive_dt_common::iterators::FilesWithExtensionIterator;
use serde::{Deserialize, Serialize};
use tracing::{debug, info};
use itertools::Itertools;
use revive_dt_common::{
iterators::{EitherIter, FilesWithExtensionIterator},
types::{Mode, ParsedMode, ParsedTestSpecifier},
};
use tracing::{debug, warn};
use crate::metadata::{Metadata, MetadataFile};
use anyhow::Context as _;
use crate::{
case::{Case, CaseIdx},
metadata::{Metadata, MetadataFile},
};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
#[serde(untagged)]
pub enum Corpus {
SinglePath { name: String, path: PathBuf },
MultiplePaths { name: String, paths: Vec<PathBuf> },
#[derive(Default)]
pub struct Corpus {
test_specifiers: HashMap<ParsedTestSpecifier, Vec<PathBuf>>,
metadata_files: HashMap<PathBuf, MetadataFile>,
}
impl Corpus {
pub fn try_from_path(file_path: impl AsRef<Path>) -> anyhow::Result<Self> {
let mut corpus = File::open(file_path.as_ref())
.map_err(anyhow::Error::from)
.and_then(|file| serde_json::from_reader::<_, Corpus>(file).map_err(Into::into))
.with_context(|| {
format!(
"Failed to open and deserialize corpus file at {}",
file_path.as_ref().display()
)
})?;
let corpus_directory = file_path
.as_ref()
.canonicalize()
.context("Failed to canonicalize the path to the corpus file")?
.parent()
.context("Corpus file has no parent")?
.to_path_buf();
for path in corpus.paths_iter_mut() {
*path = corpus_directory.join(path.as_path())
}
Ok(corpus)
pub fn new() -> Self {
Default::default()
}
pub fn enumerate_tests(&self) -> Vec<MetadataFile> {
let mut tests = self
.paths_iter()
.flat_map(|root_path| {
if !root_path.is_dir() {
Box::new(std::iter::once(root_path.to_path_buf()))
as Box<dyn Iterator<Item = _>>
} else {
Box::new(
FilesWithExtensionIterator::new(root_path)
.with_use_cached_fs(true)
.with_allowed_extension("sol")
.with_allowed_extension("json"),
)
pub fn with_test_specifier(
mut self,
test_specifier: ParsedTestSpecifier,
) -> anyhow::Result<Self> {
match &test_specifier {
ParsedTestSpecifier::FileOrDirectory {
metadata_or_directory_file_path: metadata_file_path,
}
| ParsedTestSpecifier::Case {
metadata_file_path, ..
}
| ParsedTestSpecifier::CaseWithMode {
metadata_file_path, ..
} => {
let metadata_files = enumerate_metadata_files(metadata_file_path);
self.test_specifiers.insert(
test_specifier,
metadata_files
.iter()
.map(|metadata_file| metadata_file.metadata_file_path.clone())
.collect(),
);
for metadata_file in metadata_files.into_iter() {
self.metadata_files
.insert(metadata_file.metadata_file_path.clone(), metadata_file);
}
.map(move |metadata_file_path| (root_path, metadata_file_path))
})
.filter_map(|(root_path, metadata_file_path)| {
Metadata::try_from_file(&metadata_file_path)
.or_else(|| {
debug!(
discovered_from = %root_path.display(),
metadata_file_path = %metadata_file_path.display(),
"Skipping file since it doesn't contain valid metadata"
);
None
})
.map(|metadata| MetadataFile {
metadata_file_path,
corpus_file_path: root_path.to_path_buf(),
content: metadata,
})
.inspect(|metadata_file| {
debug!(
metadata_file_path = %metadata_file.relative_path().display(),
"Loaded metadata file"
}
};
Ok(self)
}
pub fn cases_iterator(
&self,
) -> impl Iterator<Item = (&'_ MetadataFile, CaseIdx, &'_ Case, Cow<'_, Mode>)> + '_ {
let mut iterator = Box::new(std::iter::empty())
as Box<dyn Iterator<Item = (&'_ MetadataFile, CaseIdx, &'_ Case, Cow<'_, Mode>)> + '_>;
for (test_specifier, metadata_file_paths) in self.test_specifiers.iter() {
for metadata_file_path in metadata_file_paths {
let metadata_file = self
.metadata_files
.get(metadata_file_path)
.expect("Must succeed");
match test_specifier {
ParsedTestSpecifier::FileOrDirectory { .. } => {
for (case_idx, case) in metadata_file.cases.iter().enumerate() {
let case_idx = CaseIdx::new(case_idx);
let modes = case.modes.as_ref().or(metadata_file.modes.as_ref());
let modes = match modes {
Some(modes) => EitherIter::A(
ParsedMode::many_to_modes(modes.iter())
.map(Cow::<'static, _>::Owned),
),
None => EitherIter::B(Mode::all().map(Cow::<'static, _>::Borrowed)),
};
iterator = Box::new(
iterator.chain(
modes
.into_iter()
.map(move |mode| (metadata_file, case_idx, case, mode)),
),
)
}
}
ParsedTestSpecifier::Case { case_idx, .. } => {
let Some(case) = metadata_file.cases.get(*case_idx) else {
warn!(
test_specifier = %test_specifier,
metadata_file_path = %metadata_file_path.display(),
case_idx = case_idx,
case_count = metadata_file.cases.len(),
"Specified case not found in metadata file"
);
continue;
};
let case_idx = CaseIdx::new(*case_idx);
let modes = case.modes.as_ref().or(metadata_file.modes.as_ref());
let modes = match modes {
Some(modes) => EitherIter::A(
ParsedMode::many_to_modes(modes.iter())
.map(Cow::<'static, Mode>::Owned),
),
None => EitherIter::B(Mode::all().map(Cow::<'static, _>::Borrowed)),
};
iterator = Box::new(
iterator.chain(
modes
.into_iter()
.map(move |mode| (metadata_file, case_idx, case, mode)),
),
)
})
})
.collect::<Vec<_>>();
tests.sort_by(|a, b| a.metadata_file_path.cmp(&b.metadata_file_path));
tests.dedup_by(|a, b| a.metadata_file_path == b.metadata_file_path);
info!(
len = tests.len(),
corpus_name = self.name(),
"Found tests in Corpus"
);
tests
}
}
ParsedTestSpecifier::CaseWithMode { case_idx, mode, .. } => {
let Some(case) = metadata_file.cases.get(*case_idx) else {
warn!(
test_specifier = %test_specifier,
metadata_file_path = %metadata_file_path.display(),
case_idx = case_idx,
case_count = metadata_file.cases.len(),
"Specified case not found in metadata file"
);
continue;
};
let case_idx = CaseIdx::new(*case_idx);
pub fn name(&self) -> &str {
match self {
Corpus::SinglePath { name, .. } | Corpus::MultiplePaths { name, .. } => name.as_str(),
}
}
pub fn paths_iter(&self) -> impl Iterator<Item = &Path> {
match self {
Corpus::SinglePath { path, .. } => {
Box::new(std::iter::once(path.as_path())) as Box<dyn Iterator<Item = _>>
}
Corpus::MultiplePaths { paths, .. } => {
Box::new(paths.iter().map(|path| path.as_path())) as Box<dyn Iterator<Item = _>>
let mode = Cow::Borrowed(mode);
iterator = Box::new(iterator.chain(std::iter::once((
metadata_file,
case_idx,
case,
mode,
))))
}
}
}
}
iterator.unique_by(|item| (&item.0.metadata_file_path, item.1, item.3.clone()))
}
pub fn paths_iter_mut(&mut self) -> impl Iterator<Item = &mut PathBuf> {
match self {
Corpus::SinglePath { path, .. } => {
Box::new(std::iter::once(path)) as Box<dyn Iterator<Item = _>>
}
Corpus::MultiplePaths { paths, .. } => {
Box::new(paths.iter_mut()) as Box<dyn Iterator<Item = _>>
}
}
}
pub fn path_count(&self) -> usize {
match self {
Corpus::SinglePath { .. } => 1,
Corpus::MultiplePaths { paths, .. } => paths.len(),
}
pub fn metadata_file_count(&self) -> usize {
self.metadata_files.len()
}
}
fn enumerate_metadata_files(path: impl AsRef<Path>) -> Vec<MetadataFile> {
let root_path = path.as_ref();
let mut tests = if !root_path.is_dir() {
Box::new(std::iter::once(root_path.to_path_buf())) as Box<dyn Iterator<Item = _>>
} else {
Box::new(
FilesWithExtensionIterator::new(root_path)
.with_use_cached_fs(true)
.with_allowed_extension("sol")
.with_allowed_extension("json"),
)
}
.map(move |metadata_file_path| (root_path, metadata_file_path))
.filter_map(|(root_path, metadata_file_path)| {
Metadata::try_from_file(&metadata_file_path)
.or_else(|| {
debug!(
discovered_from = %root_path.display(),
metadata_file_path = %metadata_file_path.display(),
"Skipping file since it doesn't contain valid metadata"
);
None
})
.map(|metadata| MetadataFile {
metadata_file_path,
corpus_file_path: root_path.to_path_buf(),
content: metadata,
})
.inspect(|metadata_file| {
debug!(
metadata_file_path = %metadata_file.relative_path().display(),
"Loaded metadata file"
)
})
})
.collect::<Vec<_>>();
tests.sort_by(|a, b| a.metadata_file_path.cmp(&b.metadata_file_path));
tests.dedup_by(|a, b| a.metadata_file_path == b.metadata_file_path);
tests
}