Add a cached fs abstraction (#141)

This commit is contained in:
Omar
2025-08-14 18:21:05 +03:00
committed by GitHub
parent f2045db0e9
commit a59e287fa1
11 changed files with 230 additions and 19 deletions
+49
View File
@@ -0,0 +1,49 @@
//! This module implements a cached file system allowing for results to be stored in-memory rather
//! rather being queried from the file system again.
use std::fs;
use std::io::{Error, Result};
use std::path::{Path, PathBuf};
use moka::sync::Cache;
use once_cell::sync::Lazy;
pub fn read(path: impl AsRef<Path>) -> Result<Vec<u8>> {
static READ_CACHE: Lazy<Cache<PathBuf, Vec<u8>>> = Lazy::new(|| Cache::new(10_000));
let path = path.as_ref().canonicalize()?;
match READ_CACHE.get(path.as_path()) {
Some(content) => Ok(content),
None => {
let content = fs::read(path.as_path())?;
READ_CACHE.insert(path, content.clone());
Ok(content)
}
}
}
pub fn read_to_string(path: impl AsRef<Path>) -> Result<String> {
let content = read(path)?;
String::from_utf8(content).map_err(|_| {
Error::new(
std::io::ErrorKind::InvalidData,
"The contents of the file are not valid UTF8",
)
})
}
pub fn read_dir(path: impl AsRef<Path>) -> Result<Box<dyn Iterator<Item = Result<PathBuf>>>> {
static READ_DIR_CACHE: Lazy<Cache<PathBuf, Vec<PathBuf>>> = Lazy::new(|| Cache::new(10_000));
let path = path.as_ref().canonicalize()?;
match READ_DIR_CACHE.get(path.as_path()) {
Some(entries) => Ok(Box::new(entries.into_iter().map(Ok)) as Box<_>),
None => {
let entries = fs::read_dir(path.as_path())?
.flat_map(|maybe_entry| maybe_entry.map(|entry| entry.path()))
.collect();
READ_DIR_CACHE.insert(path.clone(), entries);
Ok(read_dir(path).unwrap())
}
}
}
@@ -19,6 +19,11 @@ pub struct FilesWithExtensionIterator {
/// this vector then they will be returned when the [`Iterator::next`] method is called. If not
/// then we visit one of the next directories to visit.
files_matching_allowed_extensions: Vec<PathBuf>,
/// This option controls if the the cached file system should be used or not. This could be
/// better for certain cases where the entries in the directories do not change and therefore
/// caching can be used.
use_cached_fs: bool,
}
impl FilesWithExtensionIterator {
@@ -27,6 +32,7 @@ impl FilesWithExtensionIterator {
allowed_extensions: Default::default(),
directories_to_search: vec![root_directory.as_ref().to_path_buf()],
files_matching_allowed_extensions: Default::default(),
use_cached_fs: Default::default(),
}
}
@@ -37,6 +43,11 @@ impl FilesWithExtensionIterator {
self.allowed_extensions.insert(allowed_extension.into());
self
}
pub fn with_use_cached_fs(mut self, use_cached_fs: bool) -> Self {
self.use_cached_fs = use_cached_fs;
self
}
}
impl Iterator for FilesWithExtensionIterator {
@@ -49,16 +60,19 @@ impl Iterator for FilesWithExtensionIterator {
let directory_to_search = self.directories_to_search.pop()?;
// Read all of the entries in the directory. If we failed to read this dir's entires then we
// elect to just ignore it and look in the next directory, we do that by calling the next
// method again on the iterator, which is an intentional decision that we made here instead
// of panicking.
let Ok(dir_entries) = std::fs::read_dir(directory_to_search) else {
return self.next();
let iterator = if self.use_cached_fs {
let Ok(dir_entries) = crate::cached_fs::read_dir(directory_to_search.as_path()) else {
return self.next();
};
Box::new(dir_entries) as Box<dyn Iterator<Item = std::io::Result<PathBuf>>>
} else {
let Ok(dir_entries) = std::fs::read_dir(directory_to_search) else {
return self.next();
};
Box::new(dir_entries.map(|maybe_entry| maybe_entry.map(|entry| entry.path()))) as Box<_>
};
for entry in dir_entries.flatten() {
let entry_path = entry.path();
for entry_path in iterator.flatten() {
if entry_path.is_dir() {
self.directories_to_search.push(entry_path)
} else if entry_path.is_file()
+1
View File
@@ -1,6 +1,7 @@
//! This crate provides common concepts, functionality, types, macros, and more that other crates in
//! the workspace can benefit from.
pub mod cached_fs;
pub mod fs;
pub mod futures;
pub mod iterators;