refactor: benchmarks

This commit is contained in:
Kristofers Solo 2025-04-01 14:29:43 +03:00
parent 385a4bf20e
commit 1a0f34c996
9 changed files with 347 additions and 141 deletions

View File

@ -46,4 +46,5 @@ expect_used = "warn"
[[bench]]
name = "benchmark"
path = "benches/benchmark.rs"
harness = false

View File

@ -1,145 +1,23 @@
use anyhow::Result;
mod common;
mod scenarios;
use common::setup::init_temp_dir;
use criterion::{Criterion, criterion_group, criterion_main};
use csv::Reader;
use serde::{Deserialize, Deserializer};
use std::{
fs::{File, create_dir_all},
path::{Path, PathBuf},
process::Command,
str::FromStr,
u64,
use scenarios::{
basic::benchmark_basic, edge_cases::benchmark_edge_cases,
specific::benchmark_specific_scenarios,
};
use tempfile::TempDir;
use std::time::Duration;
const BASE_DIR: &str = env!("CARGO_MANIFEST_DIR");
#[allow(dead_code)]
#[derive(Debug, Clone, Default, Deserialize)]
struct Size(u64);
#[allow(dead_code)]
#[derive(Debug, Clone, Default, Deserialize)]
struct Modified(u64);
#[allow(dead_code)]
#[derive(Debug, Clone, Deserialize)]
struct Permissions(u32);
impl Default for Permissions {
fn default() -> Self {
Self(644)
}
}
#[derive(Debug, Clone, PartialEq)]
enum EntryType {
Dir,
File,
Symlink,
Other(String),
}
impl FromStr for EntryType {
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"dir" => Ok(Self::Dir),
"file" => Ok(Self::File),
"symlink" => Ok(Self::Symlink),
other if other.is_empty() => Err("Empty entry type".to_string()),
other => Ok(Self::Other(other.into())),
}
}
}
impl<'de> Deserialize<'de> for EntryType {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Self::from_str(&s).map_err(serde::de::Error::custom)
}
}
#[allow(dead_code)]
#[derive(Debug, Clone, Deserialize)]
struct FileEntry {
#[serde(rename = "type")]
entry_type: EntryType,
directory: PathBuf,
path: PathBuf,
#[serde(default)]
size: Size,
#[serde(default)]
modified: Modified,
#[serde(default)]
permissions: Permissions,
}
impl FileEntry {
fn to_tempfile(&self, base: &Path) -> Result<()> {
let full_path = base.join(&self.path);
match self.entry_type {
EntryType::Dir => create_dir(&full_path),
EntryType::File => create_file(&full_path),
EntryType::Symlink => Ok(()),
EntryType::Other(_) => Ok(()),
}
}
}
fn create_file(path: &Path) -> Result<()> {
if let Some(parent) = path.parent() {
create_dir(parent)?;
}
File::create(path)?;
Ok(())
}
fn create_dir(path: &Path) -> Result<()> {
create_dir_all(path)?;
Ok(())
}
fn process_directory(path: &Path) {
let binary_path = PathBuf::from(BASE_DIR).join("target/release/project-finder");
Command::new(binary_path)
.arg(path)
.output()
.expect("failed to run binary");
}
fn setup_entries() -> Result<TempDir> {
let temp_dir = TempDir::new()?;
println!("Temporary directory: {:?}", temp_dir.path());
let csv_path = PathBuf::from(BASE_DIR)
.join("benches/fixtures")
.join("snapshot-2025-03-31_09-20-03.csv");
let mut rdr = Reader::from_path(csv_path)?;
rdr.deserialize::<FileEntry>()
.for_each(|entry| match entry {
Ok(entry) => {
if let Err(e) = entry.to_tempfile(temp_dir.path()) {
// eprintln!("Error processing entry: {}", e);
}
}
Err(e) => eprintln!("Failed to deserialize entry: {}", e),
});
Ok(temp_dir)
}
fn benchmark_processing(c: &mut Criterion) {
let temp_dir = setup_entries().expect("Failed to setup file entries");
c.bench_function("process_directory", |b| {
b.iter(|| process_directory(temp_dir.path()))
});
}
criterion_group!(benches, benchmark_processing);
criterion_group!(
name = benches;
config = {
let c = Criterion::default()
.sample_size(10)
.measurement_time(Duration::from_secs(30));
init_temp_dir();
c
};
targets = benchmark_basic, benchmark_edge_cases, benchmark_specific_scenarios
);
criterion_main!(benches);

2
benches/common/mod.rs Normal file
View File

@ -0,0 +1,2 @@
pub mod setup;
pub mod utils;

188
benches/common/setup.rs Normal file
View File

@ -0,0 +1,188 @@
use crate::common::utils::BASE_DIR;
use anyhow;
use csv::Reader;
use regex::Regex;
use serde::{Deserialize, Deserializer};
use std::{
fs::{self, File, create_dir_all},
path::{Path, PathBuf},
str::FromStr,
sync::OnceLock,
};
use tempfile::TempDir;
pub static TEMP_DIR: OnceLock<TempDir> = OnceLock::new();
pub fn init_temp_dir() {
TEMP_DIR.get_or_init(|| setup_entries().expect("Failed to setup test directory"));
}
#[derive(Debug, Clone)]
pub struct BenchParams {
pub depth: usize,
pub max_results: usize,
pub verbose: bool,
}
#[allow(dead_code)]
#[derive(Debug, Clone, Deserialize)]
struct FileEntry {
#[serde(rename = "type")]
entry_type: EntryType,
directory: PathBuf,
path: PathBuf,
#[serde(default)]
size: Size,
#[serde(default)]
modified: Modified,
#[serde(default)]
permissions: Permissions,
}
#[allow(dead_code)]
#[derive(Debug, Clone, Default, Deserialize)]
struct Size(#[serde(deserialize_with = "deserialize_u64_from_empty")] u64);
#[allow(dead_code)]
#[derive(Debug, Clone, Default, Deserialize)]
struct Modified(#[serde(deserialize_with = "deserialize_u64_from_empty")] u64);
#[allow(dead_code)]
#[derive(Debug, Clone, Deserialize)]
struct Permissions(#[serde(deserialize_with = "deserialize_u16_from_empty")] u16);
pub fn setup_entries() -> anyhow::Result<TempDir> {
let temp_dir = TempDir::new()?;
println!("Temporary directory: {:?}", temp_dir.path());
let fixtures_dir = PathBuf::from(BASE_DIR).join("benches/fixtures");
let snapshot_path = last_snaphow_file(&fixtures_dir)?;
let mut rdr = Reader::from_path(snapshot_path)?;
rdr.deserialize::<FileEntry>()
.for_each(|entry| match entry {
Ok(entry) => {
if let Err(e) = entry.to_tempfile(temp_dir.path()) {
eprintln!("Error processing entry: {}", e);
}
}
Err(e) => eprintln!("Failed to deserialize entry: {}", e),
});
Ok(temp_dir)
}
fn last_snaphow_file(dir: &Path) -> anyhow::Result<PathBuf> {
let re = Regex::new(r"^snapshot-(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})\.csv$")?;
let mut snapshots = fs::read_dir(dir)?
.filter_map(|entry| {
entry.ok().and_then(|entry| {
let file_name = entry.file_name();
if let Some(caps) = re.captures(&file_name.to_string_lossy()) {
let [y, m, d, h, min, s] = (1..=6)
.filter_map(|i| caps.get(i)?.as_str().parse().ok())
.collect::<Vec<u32>>()
.try_into()
.ok()?;
return Some(((y, m, d, h, min, s), entry.path()));
}
None
})
})
.collect::<Vec<_>>();
snapshots.sort_by_key(|(timestamp, _)| *timestamp);
snapshots
.last()
.map(|(_, path)| path.clone())
.ok_or_else(|| anyhow::anyhow!("No snapshot files found in directory"))
}
fn deserialize_u64_from_empty<'de, D>(deserializer: D) -> Result<u64, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
if s.trim().is_empty() {
return Ok(0);
}
s.parse().map_err(serde::de::Error::custom)
}
fn deserialize_u16_from_empty<'de, D>(deserializer: D) -> Result<u16, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
if s.trim().is_empty() {
return Ok(644);
}
s.parse().map_err(serde::de::Error::custom)
}
impl Default for Permissions {
fn default() -> Self {
Self(644)
}
}
#[derive(Debug, Clone, PartialEq)]
enum EntryType {
Dir,
File,
Symlink,
Other(String),
}
impl FromStr for EntryType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"dir" => Ok(Self::Dir),
"file" => Ok(Self::File),
"symlink" => Ok(Self::Symlink),
other if other.is_empty() => Err("Empty entry type".to_string()),
other => Ok(Self::Other(other.into())),
}
}
}
impl<'de> Deserialize<'de> for EntryType {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Self::from_str(&s).map_err(serde::de::Error::custom)
}
}
impl FileEntry {
fn to_tempfile(&self, base: &Path) -> anyhow::Result<()> {
let full_path = base.join(&self.path);
match self.entry_type {
EntryType::Dir => create_dir(&full_path),
EntryType::File => create_file(&full_path),
EntryType::Symlink => Ok(()),
EntryType::Other(_) => Ok(()),
}
}
}
fn create_file(path: &Path) -> anyhow::Result<()> {
if let Some(parent) = path.parent() {
create_dir(parent)?;
}
File::create(path)?;
Ok(())
}
fn create_dir(path: &Path) -> anyhow::Result<()> {
create_dir_all(path)?;
Ok(())
}

62
benches/common/utils.rs Normal file
View File

@ -0,0 +1,62 @@
use std::{
path::{Path, PathBuf},
process::Command,
};
pub const BASE_DIR: &str = env!("CARGO_MANIFEST_DIR");
pub fn run_binary_with_args(
path: &Path,
depth: usize,
max_results: usize,
verbose: bool,
) -> anyhow::Result<()> {
let binary_path = PathBuf::from(BASE_DIR).join("target/release/project-finder");
if !binary_path.exists() {
return Err(anyhow::anyhow!(
"Binary not found at {}. Did you run `cargo build --release`?",
binary_path.display()
));
}
let mut cmd = Command::new(&binary_path);
// Add the path to search
cmd.arg(path);
// Add depth parameter
cmd.arg("--depth").arg(depth.to_string());
// Add max_results parameter if not zero
if max_results > 0 {
cmd.arg("--max-results").arg(max_results.to_string());
}
// Add verbose flag if true
if verbose {
cmd.arg("--verbose");
}
let output = cmd.output().map_err(|e| {
anyhow::anyhow!("Failed to execute binary {}: {}", binary_path.display(), e)
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow::anyhow!(
"Process failed with status: {}\nStderr: {}",
output.status,
stderr
));
}
Ok(())
}
pub fn create_deep_directory(base: &Path, depth: usize) -> anyhow::Result<()> {
todo!()
}
pub fn create_wide_directory(base: &Path, width: usize) -> anyhow::Result<()> {
todo!()
}

View File

@ -0,0 +1,60 @@
use crate::common::{
setup::{BenchParams, TEMP_DIR, init_temp_dir, setup_entries},
utils::{BASE_DIR, run_binary_with_args},
};
use criterion::{BenchmarkId, Criterion};
use std::{
path::{Path, PathBuf},
process::Command,
};
fn process_directory(path: &Path) {
let binary_path = PathBuf::from(BASE_DIR).join("target/release/project-finder");
Command::new(binary_path)
.arg(path)
.output()
.expect("failed to run binary");
}
pub fn benchmark_basic(c: &mut Criterion) {
init_temp_dir();
let temp_dir = TEMP_DIR.get().unwrap().path();
let params = vec![
BenchParams {
depth: 1,
max_results: 0,
verbose: false,
},
BenchParams {
depth: 5,
max_results: 0,
verbose: false,
},
];
let mut group = c.benchmark_group("basic_scenarios");
group.bench_function("process_directory", |b| {
b.iter(|| process_directory(temp_dir))
});
for param in params {
let id = BenchmarkId::new(
format!(
"depth{}_max{}_verbose{}",
param.depth, param.max_results, param.verbose
),
param.depth,
);
group.bench_with_input(id, &param, |b, param| {
b.iter(|| {
run_binary_with_args(temp_dir, param.depth, param.max_results, param.verbose)
.expect("Failed to run binary")
})
});
}
group.finish();
}

View File

@ -0,0 +1,6 @@
use criterion::Criterion;
pub fn benchmark_edge_cases(c: &mut Criterion) {
let mut group = c.benchmark_group("edge_cases");
group.finish();
}

3
benches/scenarios/mod.rs Normal file
View File

@ -0,0 +1,3 @@
pub mod basic;
pub mod edge_cases;
pub mod specific;

View File

@ -0,0 +1,6 @@
use criterion::Criterion;
pub fn benchmark_specific_scenarios(c: &mut Criterion) {
let mut group = c.benchmark_group("specific_scenarios");
group.finish();
}