feat: improve execution speed

This commit is contained in:
Kristofers Solo 2025-03-21 21:13:15 +02:00
parent 4711a77f7b
commit 859bd1135e
8 changed files with 199 additions and 119 deletions

41
Cargo.lock generated
View File

@ -17,6 +17,15 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "anstream" name = "anstream"
version = "0.6.18" version = "0.6.18"
@ -424,11 +433,12 @@ dependencies = [
[[package]] [[package]]
name = "project-finder" name = "project-finder"
version = "0.1.0" version = "0.1.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
"futures", "futures",
"regex",
"thiserror", "thiserror",
"tokio", "tokio",
"tracing", "tracing",
@ -454,6 +464,35 @@ dependencies = [
"bitflags", "bitflags",
] ]
[[package]]
name = "regex"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]] [[package]]
name = "rustc-demangle" name = "rustc-demangle"
version = "0.1.24" version = "0.1.24"

View File

@ -1,7 +1,7 @@
[package] [package]
name = "project-finder" name = "project-finder"
authors = ["Kristofers Solo <dev@kristofers.xyz>"] authors = ["Kristofers Solo <dev@kristofers.xyz>"]
version = "0.1.0" version = "0.1.1"
edition = "2024" edition = "2024"
description = "Fast project finder for developers" description = "Fast project finder for developers"
repository = "https://github.com/kristoferssolo/project-finder" repository = "https://github.com/kristoferssolo/project-finder"
@ -17,6 +17,7 @@ readme = "README.md"
anyhow = "1" anyhow = "1"
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
futures = "0.3" futures = "0.3"
regex = "1.11"
thiserror = "2.0" thiserror = "2.0"
tokio = { version = "1", features = ["full"] } tokio = { version = "1", features = ["full"] }
tracing = "0.1" tracing = "0.1"

View File

@ -23,8 +23,6 @@ To use Project Finder, you need the following dependencies installed on your sys
* **fd:** A simple, fast, and user-friendly alternative to `find`. * **fd:** A simple, fast, and user-friendly alternative to `find`.
* Installation instructions: [https://github.com/sharkdp/fd#installation](https://github.com/sharkdp/fd#installation) * Installation instructions: [https://github.com/sharkdp/fd#installation](https://github.com/sharkdp/fd#installation)
* **ripgrep (rg):** A line-oriented search tool that recursively searches directories for a regex pattern.
* Installation instructions: [https://github.com/BurntSushi/ripgrep#installation](https://github.com/BurntSushi/ripgrep#installation)
These tools must be available in your system's PATH. These tools must be available in your system's PATH.

View File

@ -1,20 +1,36 @@
use crate::errors::{ProjectFinderError, Result}; use crate::{
dependencies::Dependencies,
errors::{ProjectFinderError, Result},
};
use regex::{Regex, escape};
use std::{ use std::{
collections::HashMap,
path::{Path, PathBuf}, path::{Path, PathBuf},
process::Stdio, process::Stdio,
}; };
use tokio::process::Command; use tokio::{
fs::read_to_string,
io::{AsyncBufReadExt, BufReader},
process::Command,
};
use tracing::{debug, warn}; use tracing::{debug, warn};
use crate::dependencies::Dependencies;
/// Run fd command to find files and directories /// Run fd command to find files and directories
pub async fn find_files( pub async fn find_files(
deps: &Dependencies, deps: &Dependencies,
dir: &Path, dir: &Path,
pattern: &str, patterns: &[&str],
max_depth: usize, max_depth: usize,
) -> Result<Vec<PathBuf>> { ) -> Result<HashMap<String, Vec<PathBuf>>> {
let combined_patterns = format!(
"({})",
patterns
.iter()
.map(|pattern| escape(pattern))
.collect::<Vec<_>>()
.join("|")
);
let mut cmd = Command::new(&deps.fd_path); let mut cmd = Command::new(&deps.fd_path);
cmd.arg("--hidden") cmd.arg("--hidden")
@ -23,26 +39,49 @@ pub async fn find_files(
.arg("f") .arg("f")
.arg("--max-depth") .arg("--max-depth")
.arg(max_depth.to_string()) .arg(max_depth.to_string())
.arg(pattern) .arg(&combined_patterns)
.arg(dir) .arg(dir)
.stdout(Stdio::piped()); .stdout(Stdio::piped());
debug!("Running: fd {} in {}", pattern, dir.display()); debug!("Running: fd with combined pattern in {}", dir.display());
let output = cmd.output().await.map_err(|e| { let mut child = cmd.spawn().map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Failed to execute fd: {e}")) ProjectFinderError::CommandExecutionFailed(format!("Failed to spawn fd: {e}"))
})?; })?;
if !output.status.success() { // Take the stdout and wrap it with a buffered reader.
let stderr = String::from_utf8_lossy(&output.stderr); let stdout = child.stdout.take().ok_or_else(|| {
warn!("fd command failed: {stderr}"); ProjectFinderError::CommandExecutionFailed("Failed to capture stdout".into())
return Ok(Vec::new()); })?;
let reader = BufReader::new(stdout);
let mut lines = reader.lines();
let mut results = patterns
.iter()
.map(|pattern| ((*pattern).to_string(), Vec::new()))
.collect::<HashMap<_, _>>();
// Process output as lines arrive.
while let Some(line) = lines.next_line().await.map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Failed to read stdout: {e}"))
})? {
let path = PathBuf::from(line);
if let Some(file_name) = path.file_name().and_then(|f| f.to_str()) {
if let Some(entries) = results.get_mut(file_name) {
entries.push(path);
}
}
} }
let stdout = String::from_utf8(output.stdout).map_err(ProjectFinderError::Utf8Error)?; // Ensure the process has finished.
let status = child.wait().await.map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Failed to wait process: {e}"))
})?;
if !status.success() {
warn!("fd command exited with non-zero status: {status}");
}
let paths = stdout.lines().map(PathBuf::from).collect(); Ok(results)
Ok(paths)
} }
/// Find Git repositories /// Find Git repositories
@ -88,18 +127,17 @@ pub async fn find_git_repos(
Ok(paths) Ok(paths)
} }
/// Run grep on a file to check for a pattern pub async fn grep_file_in_memory(file: &Path, pattern: &str) -> Result<bool> {
pub async fn grep_file(deps: &Dependencies, file: &Path, pattern: &str) -> Result<bool> { let contents = read_to_string(file).await.map_err(|e| {
let mut cmd = Command::new(&deps.rg_path); ProjectFinderError::CommandExecutionFailed(format!(
"Failed to read file {}: {e}",
cmd.arg("-q") // quiet mode, just return exit code file.display()
.arg("-e") // explicitly specify pattern ))
.arg(pattern)
.arg(file);
let status = cmd.status().await.map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Failed to execute ripgrep: {e}"))
})?; })?;
Ok(status.success()) let re = Regex::new(pattern).map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Invalid regex patter {pattern}: {e}"))
})?;
Ok(re.is_match(&contents))
} }

View File

@ -5,14 +5,12 @@ use which::which;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Dependencies { pub struct Dependencies {
pub fd_path: String, pub fd_path: String,
pub rg_path: String,
} }
impl Dependencies { impl Dependencies {
pub fn new(fd_path: impl Into<String>, rg_path: impl Into<String>) -> Self { pub fn new(fd_path: impl Into<String>) -> Self {
Self { Self {
fd_path: fd_path.into(), fd_path: fd_path.into(),
rg_path: rg_path.into(),
} }
} }
@ -25,17 +23,8 @@ impl Dependencies {
) )
})?; })?;
let rg_path = which("rg").map_err(|_| {
ProjectFinderError::DependencyNotFound(
"ripgrep (rg) - install from https://github.com/BurntSushi/ripgrep".into(),
)
})?;
info!("Found fd at: {}", fd_path.display()); info!("Found fd at: {}", fd_path.display());
info!("Found ripgrep at: {}", rg_path.display());
Ok(Self::new( Ok(Self::new(fd_path.to_string_lossy()))
fd_path.to_string_lossy(),
rg_path.to_string_lossy(),
))
} }
} }

View File

@ -1,8 +1,9 @@
use crate::{ use crate::{
commands::{find_files, find_git_repos, grep_file}, commands::{find_files, find_git_repos, grep_file_in_memory},
config::Config, config::Config,
dependencies::Dependencies, dependencies::Dependencies,
errors::{ProjectFinderError, Result}, errors::{ProjectFinderError, Result},
marker::MarkerType,
}; };
use futures::future::join_all; use futures::future::join_all;
use std::{ use std::{
@ -10,20 +11,35 @@ use std::{
path::{Path, PathBuf}, path::{Path, PathBuf},
sync::Arc, sync::Arc,
}; };
use tokio::sync::Mutex; use tokio::{
fs::metadata,
spawn,
sync::{RwLock, Semaphore},
};
use tracing::{debug, info}; use tracing::{debug, info};
type ProjectSet = Arc<Mutex<HashSet<PathBuf>>>; type ProjectSet = Arc<RwLock<HashSet<PathBuf>>>;
type WorkspaceCache = Arc<Mutex<HashMap<PathBuf, bool>>>; type WorkspaceCache = Arc<RwLock<HashMap<PathBuf, bool>>>;
type RootCache = Arc<Mutex<HashMap<(PathBuf, String), PathBuf>>>; type RootCache = Arc<RwLock<HashMap<(PathBuf, String), PathBuf>>>;
#[derive(Debug, Clone, PartialEq, Eq)] const MARKER_PATTERNS: [&str; 13] = [
pub enum MarkerType { "package.json",
PackageJson, "pnpm-workspace.yaml",
CargoToml, "lerna.json",
DenoJson, "Cargo.toml",
BuildFile(String), "go.mod",
OtherConfig(String), "pyproject.toml",
"CMakeLists.txt",
"Makefile",
"justfile",
"Justfile",
"deno.json",
"deno.jsonc",
"bunfig.toml",
];
async fn path_exists(path: &Path) -> bool {
metadata(path).await.is_ok()
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -40,14 +56,14 @@ impl ProjectFinder {
Self { Self {
config, config,
deps, deps,
discovered_projects: Arc::new(Mutex::new(HashSet::new())), discovered_projects: Arc::new(RwLock::new(HashSet::new())),
workspace_cache: Arc::new(Mutex::new(HashMap::new())), workspace_cache: Arc::new(RwLock::new(HashMap::new())),
root_cache: Arc::new(Mutex::new(HashMap::new())), root_cache: Arc::new(RwLock::new(HashMap::new())),
} }
} }
pub async fn find_projects(&self) -> Result<Vec<PathBuf>> { pub async fn find_projects(&self) -> Result<Vec<PathBuf>> {
let semaphore = Arc::new(tokio::sync::Semaphore::new(8)); // Limit to 8 concurrent tasks let semaphore = Arc::new(Semaphore::new(8)); // Limit to 8 concurrent tasks
let mut handles = vec![]; let mut handles = vec![];
for path in &self.config.paths { for path in &self.config.paths {
@ -65,7 +81,7 @@ impl ProjectFinder {
let semaphore_clone = Arc::clone(&semaphore); let semaphore_clone = Arc::clone(&semaphore);
// Spawn a task for each directory with semaphore permit // Spawn a task for each directory with semaphore permit
let handle = tokio::spawn(async move { let handle = spawn(async move {
let _permit = semaphore_clone.acquire().await.map_err(|e| { let _permit = semaphore_clone.acquire().await.map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!( ProjectFinderError::CommandExecutionFailed(format!(
"Failed to aquire semaphore: {e}" "Failed to aquire semaphore: {e}"
@ -73,21 +89,19 @@ impl ProjectFinder {
})?; })?;
finder_clone.process_directory(&path_clone).await finder_clone.process_directory(&path_clone).await
}); });
handles.push(handle); handles.push(handle);
} }
let handle_results = join_all(handles).await; let handle_results = join_all(handles).await;
let mut errors = handle_results let mut errors = handle_results
.into_iter() .into_iter()
.filter_map(|handle_result| match handle_result { .filter_map(|handle_result| match handle_result {
Ok(task_result) => task_result.err().map(|e| { Ok(task_result) => task_result.err().map(|e| {
debug!("Task failed: {}", e); debug!("Task failed: {e}");
e e
}), }),
Err(e) => { Err(e) => {
debug!("Task join error: {}", e); debug!("Task join error: {e}");
Some(ProjectFinderError::CommandExecutionFailed(format!( Some(ProjectFinderError::CommandExecutionFailed(format!(
"Task panicked: {e}", "Task panicked: {e}",
))) )))
@ -96,16 +110,19 @@ impl ProjectFinder {
.collect::<Vec<_>>(); .collect::<Vec<_>>();
// Return first error if any occurred // Return first error if any occurred
// Only fail if all tasks failed
if !errors.is_empty() && errors.len() == self.config.paths.len() { if !errors.is_empty() && errors.len() == self.config.paths.len() {
// Only fail if all tasks failed
return Err(errors.remove(0)); return Err(errors.remove(0));
} }
// Return sorted results // Return sorted results
let mut projects: Vec<PathBuf> = { let mut projects = self
let projects_guard = self.discovered_projects.lock().await; .discovered_projects
projects_guard.iter().cloned().collect() .read()
}; .await
.iter()
.cloned()
.collect::<Vec<PathBuf>>();
projects.sort(); projects.sort();
@ -122,33 +139,15 @@ impl ProjectFinder {
let git_repos = find_git_repos(&self.deps, dir, self.config.depth).await?; let git_repos = find_git_repos(&self.deps, dir, self.config.depth).await?;
{ {
let mut projects = self.discovered_projects.lock().await; self.discovered_projects.write().await.extend(git_repos);
projects.extend(git_repos);
} }
// Find relevant marker files let marker_map = find_files(&self.deps, dir, &MARKER_PATTERNS, self.config.depth).await?;
let marker_patterns = [
"package.json",
"pnpm-workspace.yaml",
"lerna.json",
"Cargo.toml",
"go.mod",
"pyproject.toml",
"CMakeLists.txt",
"Makefile",
"justfile",
"Justfile",
"deno.json",
"deno.jsonc",
"bunfig.toml",
];
for pattern in &marker_patterns {
let paths = find_files(&self.deps, dir, pattern, self.config.depth).await?;
for (pattern, paths) in marker_map {
for path in paths { for path in paths {
if let Some(parent_dir) = path.parent() { if let Some(parent_dir) = path.parent() {
self.process_marker(parent_dir, pattern).await?; self.process_marker(parent_dir, &pattern).await?;
} }
} }
} }
@ -158,15 +157,7 @@ impl ProjectFinder {
async fn process_marker(&self, dir: &Path, marker_name: &str) -> Result<()> { async fn process_marker(&self, dir: &Path, marker_name: &str) -> Result<()> {
// Determine marker type // Determine marker type
let marker_type = match marker_name { let marker_type = marker_name.parse().expect("How did we get here?");
"package.json" => MarkerType::PackageJson,
"Cargo.toml" => MarkerType::CargoToml,
"deno.json" | "deno.jsonc" => MarkerType::DenoJson,
"Makefile" | "CMakeLists.txt" | "justfile" | "Justfile" => {
MarkerType::BuildFile(marker_name.to_string())
}
_ => MarkerType::OtherConfig(marker_name.to_string()),
};
// Find project root // Find project root
let project_root = self.find_project_root(dir, &marker_type).await?; let project_root = self.find_project_root(dir, &marker_type).await?;
@ -176,7 +167,7 @@ impl ProjectFinder {
// valid nested projects of different types) // valid nested projects of different types)
let mut should_add = true; let mut should_add = true;
{ {
let projects = self.discovered_projects.lock().await; let projects = self.discovered_projects.read().await;
for known_project in projects.iter() { for known_project in projects.iter() {
// Check if this is a direct parent (not just any ancestor) // Check if this is a direct parent (not just any ancestor)
let is_direct_parent = project_root let is_direct_parent = project_root
@ -195,8 +186,7 @@ impl ProjectFinder {
} }
if should_add { if should_add {
let mut projects = self.discovered_projects.lock().await; self.discovered_projects.write().await.insert(project_root);
projects.insert(project_root);
} }
Ok(()) Ok(())
@ -206,7 +196,7 @@ impl ProjectFinder {
// Check cache // Check cache
let cache_key = (dir.to_path_buf(), format!("{marker_type:?}")); let cache_key = (dir.to_path_buf(), format!("{marker_type:?}"));
{ {
let cache = self.root_cache.lock().await; let cache = self.root_cache.read().await;
if let Some(root) = cache.get(&cache_key) { if let Some(root) = cache.get(&cache_key) {
return Ok(root.clone()); return Ok(root.clone());
} }
@ -246,8 +236,8 @@ impl ProjectFinder {
} }
let cargo_toml = parent.join("Cargo.toml"); let cargo_toml = parent.join("Cargo.toml");
if cargo_toml.exists() if path_exists(&cargo_toml).await
&& grep_file(&self.deps, &cargo_toml, r"^\[workspace\]").await? && grep_file_in_memory(&cargo_toml, r"^\[workspace\]").await?
{ {
result = parent.to_path_buf(); result = parent.to_path_buf();
break; break;
@ -309,7 +299,7 @@ impl ProjectFinder {
// Cache the result // Cache the result
self.root_cache self.root_cache
.lock() .write()
.await .await
.insert(cache_key, result.clone()); .insert(cache_key, result.clone());
@ -319,7 +309,7 @@ impl ProjectFinder {
async fn is_workspace_root(&self, dir: &Path) -> Result<bool> { async fn is_workspace_root(&self, dir: &Path) -> Result<bool> {
// Check cache // Check cache
{ {
let cache = self.workspace_cache.lock().await; let cache = self.workspace_cache.read().await;
if let Some(&result) = cache.get(dir) { if let Some(&result) = cache.get(dir) {
return Ok(result); return Ok(result);
} }
@ -348,9 +338,9 @@ impl ProjectFinder {
// Check for workspace by pattern matching // Check for workspace by pattern matching
for (file, pattern) in &workspace_patterns { for (file, pattern) in &workspace_patterns {
if file.exists() && grep_file(&self.deps, file, pattern).await? { if path_exists(file).await && grep_file_in_memory(file, pattern).await? {
self.workspace_cache self.workspace_cache
.lock() .write()
.await .await
.insert(dir.to_path_buf(), true); .insert(dir.to_path_buf(), true);
return Ok(true); return Ok(true);
@ -359,9 +349,9 @@ impl ProjectFinder {
// Check for workspace by file existence // Check for workspace by file existence
for file in &workspace_files { for file in &workspace_files {
if file.exists() { if path_exists(file).await {
self.workspace_cache self.workspace_cache
.lock() .write()
.await .await
.insert(dir.to_path_buf(), true); .insert(dir.to_path_buf(), true);
return Ok(true); return Ok(true);
@ -370,7 +360,7 @@ impl ProjectFinder {
// No workspace found // No workspace found
self.workspace_cache self.workspace_cache
.lock() .write()
.await .await
.insert(dir.to_path_buf(), false); .insert(dir.to_path_buf(), false);
Ok(false) Ok(false)

View File

@ -3,11 +3,10 @@ mod config;
mod dependencies; mod dependencies;
mod errors; mod errors;
mod finder; mod finder;
mod marker;
use crate::{config::Config, dependencies::Dependencies, finder::ProjectFinder};
use clap::Parser; use clap::Parser;
use config::Config;
use dependencies::Dependencies;
use finder::ProjectFinder;
use std::process::exit; use std::process::exit;
use tracing::{Level, error}; use tracing::{Level, error};
use tracing_subscriber::FmtSubscriber; use tracing_subscriber::FmtSubscriber;

26
src/marker.rs Normal file
View File

@ -0,0 +1,26 @@
use std::{convert::Infallible, str::FromStr};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MarkerType {
PackageJson,
CargoToml,
DenoJson,
BuildFile(String),
OtherConfig(String),
}
impl FromStr for MarkerType {
type Err = Infallible;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
Ok(match s {
"package.json" => Self::PackageJson,
"Cargo.toml" => Self::CargoToml,
"deno.json" | "deno.jsonc" => Self::DenoJson,
"Makefile" | "CMakeLists.txt" | "justfile" | "Justfile" => {
Self::BuildFile(s.to_string())
}
_ => Self::OtherConfig(s.to_string()),
})
}
}