feat: improve execution speed

This commit is contained in:
Kristofers Solo 2025-03-21 21:13:15 +02:00
parent 4711a77f7b
commit 859bd1135e
8 changed files with 199 additions and 119 deletions

41
Cargo.lock generated
View File

@ -17,6 +17,15 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "anstream"
version = "0.6.18"
@ -424,11 +433,12 @@ dependencies = [
[[package]]
name = "project-finder"
version = "0.1.0"
version = "0.1.1"
dependencies = [
"anyhow",
"clap",
"futures",
"regex",
"thiserror",
"tokio",
"tracing",
@ -454,6 +464,35 @@ dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rustc-demangle"
version = "0.1.24"

View File

@ -1,7 +1,7 @@
[package]
name = "project-finder"
authors = ["Kristofers Solo <dev@kristofers.xyz>"]
version = "0.1.0"
version = "0.1.1"
edition = "2024"
description = "Fast project finder for developers"
repository = "https://github.com/kristoferssolo/project-finder"
@ -17,6 +17,7 @@ readme = "README.md"
anyhow = "1"
clap = { version = "4.5", features = ["derive"] }
futures = "0.3"
regex = "1.11"
thiserror = "2.0"
tokio = { version = "1", features = ["full"] }
tracing = "0.1"

View File

@ -23,8 +23,6 @@ To use Project Finder, you need the following dependencies installed on your sys
* **fd:** A simple, fast, and user-friendly alternative to `find`.
* Installation instructions: [https://github.com/sharkdp/fd#installation](https://github.com/sharkdp/fd#installation)
* **ripgrep (rg):** A line-oriented search tool that recursively searches directories for a regex pattern.
* Installation instructions: [https://github.com/BurntSushi/ripgrep#installation](https://github.com/BurntSushi/ripgrep#installation)
These tools must be available in your system's PATH.

View File

@ -1,20 +1,36 @@
use crate::errors::{ProjectFinderError, Result};
use crate::{
dependencies::Dependencies,
errors::{ProjectFinderError, Result},
};
use regex::{Regex, escape};
use std::{
collections::HashMap,
path::{Path, PathBuf},
process::Stdio,
};
use tokio::process::Command;
use tokio::{
fs::read_to_string,
io::{AsyncBufReadExt, BufReader},
process::Command,
};
use tracing::{debug, warn};
use crate::dependencies::Dependencies;
/// Run fd command to find files and directories
pub async fn find_files(
deps: &Dependencies,
dir: &Path,
pattern: &str,
patterns: &[&str],
max_depth: usize,
) -> Result<Vec<PathBuf>> {
) -> Result<HashMap<String, Vec<PathBuf>>> {
let combined_patterns = format!(
"({})",
patterns
.iter()
.map(|pattern| escape(pattern))
.collect::<Vec<_>>()
.join("|")
);
let mut cmd = Command::new(&deps.fd_path);
cmd.arg("--hidden")
@ -23,26 +39,49 @@ pub async fn find_files(
.arg("f")
.arg("--max-depth")
.arg(max_depth.to_string())
.arg(pattern)
.arg(&combined_patterns)
.arg(dir)
.stdout(Stdio::piped());
debug!("Running: fd {} in {}", pattern, dir.display());
debug!("Running: fd with combined pattern in {}", dir.display());
let output = cmd.output().await.map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Failed to execute fd: {e}"))
let mut child = cmd.spawn().map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Failed to spawn fd: {e}"))
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
warn!("fd command failed: {stderr}");
return Ok(Vec::new());
// Take the stdout and wrap it with a buffered reader.
let stdout = child.stdout.take().ok_or_else(|| {
ProjectFinderError::CommandExecutionFailed("Failed to capture stdout".into())
})?;
let reader = BufReader::new(stdout);
let mut lines = reader.lines();
let mut results = patterns
.iter()
.map(|pattern| ((*pattern).to_string(), Vec::new()))
.collect::<HashMap<_, _>>();
// Process output as lines arrive.
while let Some(line) = lines.next_line().await.map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Failed to read stdout: {e}"))
})? {
let path = PathBuf::from(line);
if let Some(file_name) = path.file_name().and_then(|f| f.to_str()) {
if let Some(entries) = results.get_mut(file_name) {
entries.push(path);
}
}
}
let stdout = String::from_utf8(output.stdout).map_err(ProjectFinderError::Utf8Error)?;
// Ensure the process has finished.
let status = child.wait().await.map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Failed to wait process: {e}"))
})?;
if !status.success() {
warn!("fd command exited with non-zero status: {status}");
}
let paths = stdout.lines().map(PathBuf::from).collect();
Ok(paths)
Ok(results)
}
/// Find Git repositories
@ -88,18 +127,17 @@ pub async fn find_git_repos(
Ok(paths)
}
/// Run grep on a file to check for a pattern
pub async fn grep_file(deps: &Dependencies, file: &Path, pattern: &str) -> Result<bool> {
let mut cmd = Command::new(&deps.rg_path);
cmd.arg("-q") // quiet mode, just return exit code
.arg("-e") // explicitly specify pattern
.arg(pattern)
.arg(file);
let status = cmd.status().await.map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Failed to execute ripgrep: {e}"))
pub async fn grep_file_in_memory(file: &Path, pattern: &str) -> Result<bool> {
let contents = read_to_string(file).await.map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!(
"Failed to read file {}: {e}",
file.display()
))
})?;
Ok(status.success())
let re = Regex::new(pattern).map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!("Invalid regex patter {pattern}: {e}"))
})?;
Ok(re.is_match(&contents))
}

View File

@ -5,14 +5,12 @@ use which::which;
#[derive(Debug, Clone)]
pub struct Dependencies {
pub fd_path: String,
pub rg_path: String,
}
impl Dependencies {
pub fn new(fd_path: impl Into<String>, rg_path: impl Into<String>) -> Self {
pub fn new(fd_path: impl Into<String>) -> Self {
Self {
fd_path: fd_path.into(),
rg_path: rg_path.into(),
}
}
@ -25,17 +23,8 @@ impl Dependencies {
)
})?;
let rg_path = which("rg").map_err(|_| {
ProjectFinderError::DependencyNotFound(
"ripgrep (rg) - install from https://github.com/BurntSushi/ripgrep".into(),
)
})?;
info!("Found fd at: {}", fd_path.display());
info!("Found ripgrep at: {}", rg_path.display());
Ok(Self::new(
fd_path.to_string_lossy(),
rg_path.to_string_lossy(),
))
Ok(Self::new(fd_path.to_string_lossy()))
}
}

View File

@ -1,8 +1,9 @@
use crate::{
commands::{find_files, find_git_repos, grep_file},
commands::{find_files, find_git_repos, grep_file_in_memory},
config::Config,
dependencies::Dependencies,
errors::{ProjectFinderError, Result},
marker::MarkerType,
};
use futures::future::join_all;
use std::{
@ -10,20 +11,35 @@ use std::{
path::{Path, PathBuf},
sync::Arc,
};
use tokio::sync::Mutex;
use tokio::{
fs::metadata,
spawn,
sync::{RwLock, Semaphore},
};
use tracing::{debug, info};
type ProjectSet = Arc<Mutex<HashSet<PathBuf>>>;
type WorkspaceCache = Arc<Mutex<HashMap<PathBuf, bool>>>;
type RootCache = Arc<Mutex<HashMap<(PathBuf, String), PathBuf>>>;
type ProjectSet = Arc<RwLock<HashSet<PathBuf>>>;
type WorkspaceCache = Arc<RwLock<HashMap<PathBuf, bool>>>;
type RootCache = Arc<RwLock<HashMap<(PathBuf, String), PathBuf>>>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MarkerType {
PackageJson,
CargoToml,
DenoJson,
BuildFile(String),
OtherConfig(String),
const MARKER_PATTERNS: [&str; 13] = [
"package.json",
"pnpm-workspace.yaml",
"lerna.json",
"Cargo.toml",
"go.mod",
"pyproject.toml",
"CMakeLists.txt",
"Makefile",
"justfile",
"Justfile",
"deno.json",
"deno.jsonc",
"bunfig.toml",
];
async fn path_exists(path: &Path) -> bool {
metadata(path).await.is_ok()
}
#[derive(Debug, Clone)]
@ -40,14 +56,14 @@ impl ProjectFinder {
Self {
config,
deps,
discovered_projects: Arc::new(Mutex::new(HashSet::new())),
workspace_cache: Arc::new(Mutex::new(HashMap::new())),
root_cache: Arc::new(Mutex::new(HashMap::new())),
discovered_projects: Arc::new(RwLock::new(HashSet::new())),
workspace_cache: Arc::new(RwLock::new(HashMap::new())),
root_cache: Arc::new(RwLock::new(HashMap::new())),
}
}
pub async fn find_projects(&self) -> Result<Vec<PathBuf>> {
let semaphore = Arc::new(tokio::sync::Semaphore::new(8)); // Limit to 8 concurrent tasks
let semaphore = Arc::new(Semaphore::new(8)); // Limit to 8 concurrent tasks
let mut handles = vec![];
for path in &self.config.paths {
@ -65,7 +81,7 @@ impl ProjectFinder {
let semaphore_clone = Arc::clone(&semaphore);
// Spawn a task for each directory with semaphore permit
let handle = tokio::spawn(async move {
let handle = spawn(async move {
let _permit = semaphore_clone.acquire().await.map_err(|e| {
ProjectFinderError::CommandExecutionFailed(format!(
"Failed to aquire semaphore: {e}"
@ -73,21 +89,19 @@ impl ProjectFinder {
})?;
finder_clone.process_directory(&path_clone).await
});
handles.push(handle);
}
let handle_results = join_all(handles).await;
let mut errors = handle_results
.into_iter()
.filter_map(|handle_result| match handle_result {
Ok(task_result) => task_result.err().map(|e| {
debug!("Task failed: {}", e);
debug!("Task failed: {e}");
e
}),
Err(e) => {
debug!("Task join error: {}", e);
debug!("Task join error: {e}");
Some(ProjectFinderError::CommandExecutionFailed(format!(
"Task panicked: {e}",
)))
@ -96,16 +110,19 @@ impl ProjectFinder {
.collect::<Vec<_>>();
// Return first error if any occurred
if !errors.is_empty() && errors.len() == self.config.paths.len() {
// Only fail if all tasks failed
if !errors.is_empty() && errors.len() == self.config.paths.len() {
return Err(errors.remove(0));
}
// Return sorted results
let mut projects: Vec<PathBuf> = {
let projects_guard = self.discovered_projects.lock().await;
projects_guard.iter().cloned().collect()
};
let mut projects = self
.discovered_projects
.read()
.await
.iter()
.cloned()
.collect::<Vec<PathBuf>>();
projects.sort();
@ -122,33 +139,15 @@ impl ProjectFinder {
let git_repos = find_git_repos(&self.deps, dir, self.config.depth).await?;
{
let mut projects = self.discovered_projects.lock().await;
projects.extend(git_repos);
self.discovered_projects.write().await.extend(git_repos);
}
// Find relevant marker files
let marker_patterns = [
"package.json",
"pnpm-workspace.yaml",
"lerna.json",
"Cargo.toml",
"go.mod",
"pyproject.toml",
"CMakeLists.txt",
"Makefile",
"justfile",
"Justfile",
"deno.json",
"deno.jsonc",
"bunfig.toml",
];
for pattern in &marker_patterns {
let paths = find_files(&self.deps, dir, pattern, self.config.depth).await?;
let marker_map = find_files(&self.deps, dir, &MARKER_PATTERNS, self.config.depth).await?;
for (pattern, paths) in marker_map {
for path in paths {
if let Some(parent_dir) = path.parent() {
self.process_marker(parent_dir, pattern).await?;
self.process_marker(parent_dir, &pattern).await?;
}
}
}
@ -158,15 +157,7 @@ impl ProjectFinder {
async fn process_marker(&self, dir: &Path, marker_name: &str) -> Result<()> {
// Determine marker type
let marker_type = match marker_name {
"package.json" => MarkerType::PackageJson,
"Cargo.toml" => MarkerType::CargoToml,
"deno.json" | "deno.jsonc" => MarkerType::DenoJson,
"Makefile" | "CMakeLists.txt" | "justfile" | "Justfile" => {
MarkerType::BuildFile(marker_name.to_string())
}
_ => MarkerType::OtherConfig(marker_name.to_string()),
};
let marker_type = marker_name.parse().expect("How did we get here?");
// Find project root
let project_root = self.find_project_root(dir, &marker_type).await?;
@ -176,7 +167,7 @@ impl ProjectFinder {
// valid nested projects of different types)
let mut should_add = true;
{
let projects = self.discovered_projects.lock().await;
let projects = self.discovered_projects.read().await;
for known_project in projects.iter() {
// Check if this is a direct parent (not just any ancestor)
let is_direct_parent = project_root
@ -195,8 +186,7 @@ impl ProjectFinder {
}
if should_add {
let mut projects = self.discovered_projects.lock().await;
projects.insert(project_root);
self.discovered_projects.write().await.insert(project_root);
}
Ok(())
@ -206,7 +196,7 @@ impl ProjectFinder {
// Check cache
let cache_key = (dir.to_path_buf(), format!("{marker_type:?}"));
{
let cache = self.root_cache.lock().await;
let cache = self.root_cache.read().await;
if let Some(root) = cache.get(&cache_key) {
return Ok(root.clone());
}
@ -246,8 +236,8 @@ impl ProjectFinder {
}
let cargo_toml = parent.join("Cargo.toml");
if cargo_toml.exists()
&& grep_file(&self.deps, &cargo_toml, r"^\[workspace\]").await?
if path_exists(&cargo_toml).await
&& grep_file_in_memory(&cargo_toml, r"^\[workspace\]").await?
{
result = parent.to_path_buf();
break;
@ -309,7 +299,7 @@ impl ProjectFinder {
// Cache the result
self.root_cache
.lock()
.write()
.await
.insert(cache_key, result.clone());
@ -319,7 +309,7 @@ impl ProjectFinder {
async fn is_workspace_root(&self, dir: &Path) -> Result<bool> {
// Check cache
{
let cache = self.workspace_cache.lock().await;
let cache = self.workspace_cache.read().await;
if let Some(&result) = cache.get(dir) {
return Ok(result);
}
@ -348,9 +338,9 @@ impl ProjectFinder {
// Check for workspace by pattern matching
for (file, pattern) in &workspace_patterns {
if file.exists() && grep_file(&self.deps, file, pattern).await? {
if path_exists(file).await && grep_file_in_memory(file, pattern).await? {
self.workspace_cache
.lock()
.write()
.await
.insert(dir.to_path_buf(), true);
return Ok(true);
@ -359,9 +349,9 @@ impl ProjectFinder {
// Check for workspace by file existence
for file in &workspace_files {
if file.exists() {
if path_exists(file).await {
self.workspace_cache
.lock()
.write()
.await
.insert(dir.to_path_buf(), true);
return Ok(true);
@ -370,7 +360,7 @@ impl ProjectFinder {
// No workspace found
self.workspace_cache
.lock()
.write()
.await
.insert(dir.to_path_buf(), false);
Ok(false)

View File

@ -3,11 +3,10 @@ mod config;
mod dependencies;
mod errors;
mod finder;
mod marker;
use crate::{config::Config, dependencies::Dependencies, finder::ProjectFinder};
use clap::Parser;
use config::Config;
use dependencies::Dependencies;
use finder::ProjectFinder;
use std::process::exit;
use tracing::{Level, error};
use tracing_subscriber::FmtSubscriber;

26
src/marker.rs Normal file
View File

@ -0,0 +1,26 @@
use std::{convert::Infallible, str::FromStr};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MarkerType {
PackageJson,
CargoToml,
DenoJson,
BuildFile(String),
OtherConfig(String),
}
impl FromStr for MarkerType {
type Err = Infallible;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
Ok(match s {
"package.json" => Self::PackageJson,
"Cargo.toml" => Self::CargoToml,
"deno.json" | "deno.jsonc" => Self::DenoJson,
"Makefile" | "CMakeLists.txt" | "justfile" | "Justfile" => {
Self::BuildFile(s.to_string())
}
_ => Self::OtherConfig(s.to_string()),
})
}
}