diff --git a/Cargo.lock b/Cargo.lock index 831d0fa..e4d5fc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,15 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.18" @@ -424,11 +433,12 @@ dependencies = [ [[package]] name = "project-finder" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "clap", "futures", + "regex", "thiserror", "tokio", "tracing", @@ -454,6 +464,35 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "rustc-demangle" version = "0.1.24" diff --git a/Cargo.toml b/Cargo.toml index afab3a8..f8e86d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "project-finder" authors = ["Kristofers Solo "] -version = "0.1.0" +version = "0.1.1" edition = "2024" description = "Fast project finder for developers" repository = "https://github.com/kristoferssolo/project-finder" @@ -17,6 +17,7 @@ readme = "README.md" anyhow = "1" clap = { version = "4.5", features = ["derive"] } futures = "0.3" +regex = "1.11" thiserror = "2.0" tokio = { version = "1", features = ["full"] } tracing = "0.1" diff --git a/README.md b/README.md index 233b513..575a281 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,6 @@ To use Project Finder, you need the following dependencies installed on your sys * **fd:** A simple, fast, and user-friendly alternative to `find`. * Installation instructions: [https://github.com/sharkdp/fd#installation](https://github.com/sharkdp/fd#installation) -* **ripgrep (rg):** A line-oriented search tool that recursively searches directories for a regex pattern. - * Installation instructions: [https://github.com/BurntSushi/ripgrep#installation](https://github.com/BurntSushi/ripgrep#installation) These tools must be available in your system's PATH. diff --git a/src/commands.rs b/src/commands.rs index d911a28..44f1ae6 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -1,20 +1,36 @@ -use crate::errors::{ProjectFinderError, Result}; +use crate::{ + dependencies::Dependencies, + errors::{ProjectFinderError, Result}, +}; +use regex::{Regex, escape}; use std::{ + collections::HashMap, path::{Path, PathBuf}, process::Stdio, }; -use tokio::process::Command; +use tokio::{ + fs::read_to_string, + io::{AsyncBufReadExt, BufReader}, + process::Command, +}; use tracing::{debug, warn}; -use crate::dependencies::Dependencies; - /// Run fd command to find files and directories pub async fn find_files( deps: &Dependencies, dir: &Path, - pattern: &str, + patterns: &[&str], max_depth: usize, -) -> Result> { +) -> Result>> { + let combined_patterns = format!( + "({})", + patterns + .iter() + .map(|pattern| escape(pattern)) + .collect::>() + .join("|") + ); + let mut cmd = Command::new(&deps.fd_path); cmd.arg("--hidden") @@ -23,26 +39,49 @@ pub async fn find_files( .arg("f") .arg("--max-depth") .arg(max_depth.to_string()) - .arg(pattern) + .arg(&combined_patterns) .arg(dir) .stdout(Stdio::piped()); - debug!("Running: fd {} in {}", pattern, dir.display()); + debug!("Running: fd with combined pattern in {}", dir.display()); - let output = cmd.output().await.map_err(|e| { - ProjectFinderError::CommandExecutionFailed(format!("Failed to execute fd: {e}")) + let mut child = cmd.spawn().map_err(|e| { + ProjectFinderError::CommandExecutionFailed(format!("Failed to spawn fd: {e}")) })?; - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - warn!("fd command failed: {stderr}"); - return Ok(Vec::new()); + // Take the stdout and wrap it with a buffered reader. + let stdout = child.stdout.take().ok_or_else(|| { + ProjectFinderError::CommandExecutionFailed("Failed to capture stdout".into()) + })?; + let reader = BufReader::new(stdout); + let mut lines = reader.lines(); + + let mut results = patterns + .iter() + .map(|pattern| ((*pattern).to_string(), Vec::new())) + .collect::>(); + + // Process output as lines arrive. + while let Some(line) = lines.next_line().await.map_err(|e| { + ProjectFinderError::CommandExecutionFailed(format!("Failed to read stdout: {e}")) + })? { + let path = PathBuf::from(line); + if let Some(file_name) = path.file_name().and_then(|f| f.to_str()) { + if let Some(entries) = results.get_mut(file_name) { + entries.push(path); + } + } } - let stdout = String::from_utf8(output.stdout).map_err(ProjectFinderError::Utf8Error)?; + // Ensure the process has finished. + let status = child.wait().await.map_err(|e| { + ProjectFinderError::CommandExecutionFailed(format!("Failed to wait process: {e}")) + })?; + if !status.success() { + warn!("fd command exited with non-zero status: {status}"); + } - let paths = stdout.lines().map(PathBuf::from).collect(); - Ok(paths) + Ok(results) } /// Find Git repositories @@ -88,18 +127,17 @@ pub async fn find_git_repos( Ok(paths) } -/// Run grep on a file to check for a pattern -pub async fn grep_file(deps: &Dependencies, file: &Path, pattern: &str) -> Result { - let mut cmd = Command::new(&deps.rg_path); - - cmd.arg("-q") // quiet mode, just return exit code - .arg("-e") // explicitly specify pattern - .arg(pattern) - .arg(file); - - let status = cmd.status().await.map_err(|e| { - ProjectFinderError::CommandExecutionFailed(format!("Failed to execute ripgrep: {e}")) +pub async fn grep_file_in_memory(file: &Path, pattern: &str) -> Result { + let contents = read_to_string(file).await.map_err(|e| { + ProjectFinderError::CommandExecutionFailed(format!( + "Failed to read file {}: {e}", + file.display() + )) })?; - Ok(status.success()) + let re = Regex::new(pattern).map_err(|e| { + ProjectFinderError::CommandExecutionFailed(format!("Invalid regex patter {pattern}: {e}")) + })?; + + Ok(re.is_match(&contents)) } diff --git a/src/dependencies.rs b/src/dependencies.rs index 850fdc6..ca601b5 100644 --- a/src/dependencies.rs +++ b/src/dependencies.rs @@ -5,14 +5,12 @@ use which::which; #[derive(Debug, Clone)] pub struct Dependencies { pub fd_path: String, - pub rg_path: String, } impl Dependencies { - pub fn new(fd_path: impl Into, rg_path: impl Into) -> Self { + pub fn new(fd_path: impl Into) -> Self { Self { fd_path: fd_path.into(), - rg_path: rg_path.into(), } } @@ -25,17 +23,8 @@ impl Dependencies { ) })?; - let rg_path = which("rg").map_err(|_| { - ProjectFinderError::DependencyNotFound( - "ripgrep (rg) - install from https://github.com/BurntSushi/ripgrep".into(), - ) - })?; info!("Found fd at: {}", fd_path.display()); - info!("Found ripgrep at: {}", rg_path.display()); - Ok(Self::new( - fd_path.to_string_lossy(), - rg_path.to_string_lossy(), - )) + Ok(Self::new(fd_path.to_string_lossy())) } } diff --git a/src/finder.rs b/src/finder.rs index cd4506e..4051d17 100644 --- a/src/finder.rs +++ b/src/finder.rs @@ -1,8 +1,9 @@ use crate::{ - commands::{find_files, find_git_repos, grep_file}, + commands::{find_files, find_git_repos, grep_file_in_memory}, config::Config, dependencies::Dependencies, errors::{ProjectFinderError, Result}, + marker::MarkerType, }; use futures::future::join_all; use std::{ @@ -10,20 +11,35 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; -use tokio::sync::Mutex; +use tokio::{ + fs::metadata, + spawn, + sync::{RwLock, Semaphore}, +}; use tracing::{debug, info}; -type ProjectSet = Arc>>; -type WorkspaceCache = Arc>>; -type RootCache = Arc>>; +type ProjectSet = Arc>>; +type WorkspaceCache = Arc>>; +type RootCache = Arc>>; -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum MarkerType { - PackageJson, - CargoToml, - DenoJson, - BuildFile(String), - OtherConfig(String), +const MARKER_PATTERNS: [&str; 13] = [ + "package.json", + "pnpm-workspace.yaml", + "lerna.json", + "Cargo.toml", + "go.mod", + "pyproject.toml", + "CMakeLists.txt", + "Makefile", + "justfile", + "Justfile", + "deno.json", + "deno.jsonc", + "bunfig.toml", +]; + +async fn path_exists(path: &Path) -> bool { + metadata(path).await.is_ok() } #[derive(Debug, Clone)] @@ -40,14 +56,14 @@ impl ProjectFinder { Self { config, deps, - discovered_projects: Arc::new(Mutex::new(HashSet::new())), - workspace_cache: Arc::new(Mutex::new(HashMap::new())), - root_cache: Arc::new(Mutex::new(HashMap::new())), + discovered_projects: Arc::new(RwLock::new(HashSet::new())), + workspace_cache: Arc::new(RwLock::new(HashMap::new())), + root_cache: Arc::new(RwLock::new(HashMap::new())), } } pub async fn find_projects(&self) -> Result> { - let semaphore = Arc::new(tokio::sync::Semaphore::new(8)); // Limit to 8 concurrent tasks + let semaphore = Arc::new(Semaphore::new(8)); // Limit to 8 concurrent tasks let mut handles = vec![]; for path in &self.config.paths { @@ -65,7 +81,7 @@ impl ProjectFinder { let semaphore_clone = Arc::clone(&semaphore); // Spawn a task for each directory with semaphore permit - let handle = tokio::spawn(async move { + let handle = spawn(async move { let _permit = semaphore_clone.acquire().await.map_err(|e| { ProjectFinderError::CommandExecutionFailed(format!( "Failed to aquire semaphore: {e}" @@ -73,21 +89,19 @@ impl ProjectFinder { })?; finder_clone.process_directory(&path_clone).await }); - handles.push(handle); } let handle_results = join_all(handles).await; - let mut errors = handle_results .into_iter() .filter_map(|handle_result| match handle_result { Ok(task_result) => task_result.err().map(|e| { - debug!("Task failed: {}", e); + debug!("Task failed: {e}"); e }), Err(e) => { - debug!("Task join error: {}", e); + debug!("Task join error: {e}"); Some(ProjectFinderError::CommandExecutionFailed(format!( "Task panicked: {e}", ))) @@ -96,16 +110,19 @@ impl ProjectFinder { .collect::>(); // Return first error if any occurred + // Only fail if all tasks failed if !errors.is_empty() && errors.len() == self.config.paths.len() { - // Only fail if all tasks failed return Err(errors.remove(0)); } // Return sorted results - let mut projects: Vec = { - let projects_guard = self.discovered_projects.lock().await; - projects_guard.iter().cloned().collect() - }; + let mut projects = self + .discovered_projects + .read() + .await + .iter() + .cloned() + .collect::>(); projects.sort(); @@ -122,33 +139,15 @@ impl ProjectFinder { let git_repos = find_git_repos(&self.deps, dir, self.config.depth).await?; { - let mut projects = self.discovered_projects.lock().await; - projects.extend(git_repos); + self.discovered_projects.write().await.extend(git_repos); } - // Find relevant marker files - let marker_patterns = [ - "package.json", - "pnpm-workspace.yaml", - "lerna.json", - "Cargo.toml", - "go.mod", - "pyproject.toml", - "CMakeLists.txt", - "Makefile", - "justfile", - "Justfile", - "deno.json", - "deno.jsonc", - "bunfig.toml", - ]; - - for pattern in &marker_patterns { - let paths = find_files(&self.deps, dir, pattern, self.config.depth).await?; + let marker_map = find_files(&self.deps, dir, &MARKER_PATTERNS, self.config.depth).await?; + for (pattern, paths) in marker_map { for path in paths { if let Some(parent_dir) = path.parent() { - self.process_marker(parent_dir, pattern).await?; + self.process_marker(parent_dir, &pattern).await?; } } } @@ -158,15 +157,7 @@ impl ProjectFinder { async fn process_marker(&self, dir: &Path, marker_name: &str) -> Result<()> { // Determine marker type - let marker_type = match marker_name { - "package.json" => MarkerType::PackageJson, - "Cargo.toml" => MarkerType::CargoToml, - "deno.json" | "deno.jsonc" => MarkerType::DenoJson, - "Makefile" | "CMakeLists.txt" | "justfile" | "Justfile" => { - MarkerType::BuildFile(marker_name.to_string()) - } - _ => MarkerType::OtherConfig(marker_name.to_string()), - }; + let marker_type = marker_name.parse().expect("How did we get here?"); // Find project root let project_root = self.find_project_root(dir, &marker_type).await?; @@ -176,7 +167,7 @@ impl ProjectFinder { // valid nested projects of different types) let mut should_add = true; { - let projects = self.discovered_projects.lock().await; + let projects = self.discovered_projects.read().await; for known_project in projects.iter() { // Check if this is a direct parent (not just any ancestor) let is_direct_parent = project_root @@ -195,8 +186,7 @@ impl ProjectFinder { } if should_add { - let mut projects = self.discovered_projects.lock().await; - projects.insert(project_root); + self.discovered_projects.write().await.insert(project_root); } Ok(()) @@ -206,7 +196,7 @@ impl ProjectFinder { // Check cache let cache_key = (dir.to_path_buf(), format!("{marker_type:?}")); { - let cache = self.root_cache.lock().await; + let cache = self.root_cache.read().await; if let Some(root) = cache.get(&cache_key) { return Ok(root.clone()); } @@ -246,8 +236,8 @@ impl ProjectFinder { } let cargo_toml = parent.join("Cargo.toml"); - if cargo_toml.exists() - && grep_file(&self.deps, &cargo_toml, r"^\[workspace\]").await? + if path_exists(&cargo_toml).await + && grep_file_in_memory(&cargo_toml, r"^\[workspace\]").await? { result = parent.to_path_buf(); break; @@ -309,7 +299,7 @@ impl ProjectFinder { // Cache the result self.root_cache - .lock() + .write() .await .insert(cache_key, result.clone()); @@ -319,7 +309,7 @@ impl ProjectFinder { async fn is_workspace_root(&self, dir: &Path) -> Result { // Check cache { - let cache = self.workspace_cache.lock().await; + let cache = self.workspace_cache.read().await; if let Some(&result) = cache.get(dir) { return Ok(result); } @@ -348,9 +338,9 @@ impl ProjectFinder { // Check for workspace by pattern matching for (file, pattern) in &workspace_patterns { - if file.exists() && grep_file(&self.deps, file, pattern).await? { + if path_exists(file).await && grep_file_in_memory(file, pattern).await? { self.workspace_cache - .lock() + .write() .await .insert(dir.to_path_buf(), true); return Ok(true); @@ -359,9 +349,9 @@ impl ProjectFinder { // Check for workspace by file existence for file in &workspace_files { - if file.exists() { + if path_exists(file).await { self.workspace_cache - .lock() + .write() .await .insert(dir.to_path_buf(), true); return Ok(true); @@ -370,7 +360,7 @@ impl ProjectFinder { // No workspace found self.workspace_cache - .lock() + .write() .await .insert(dir.to_path_buf(), false); Ok(false) diff --git a/src/main.rs b/src/main.rs index 39efc91..c041e6e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,11 +3,10 @@ mod config; mod dependencies; mod errors; mod finder; +mod marker; +use crate::{config::Config, dependencies::Dependencies, finder::ProjectFinder}; use clap::Parser; -use config::Config; -use dependencies::Dependencies; -use finder::ProjectFinder; use std::process::exit; use tracing::{Level, error}; use tracing_subscriber::FmtSubscriber; diff --git a/src/marker.rs b/src/marker.rs new file mode 100644 index 0000000..8c9212a --- /dev/null +++ b/src/marker.rs @@ -0,0 +1,26 @@ +use std::{convert::Infallible, str::FromStr}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum MarkerType { + PackageJson, + CargoToml, + DenoJson, + BuildFile(String), + OtherConfig(String), +} + +impl FromStr for MarkerType { + type Err = Infallible; + + fn from_str(s: &str) -> std::result::Result { + Ok(match s { + "package.json" => Self::PackageJson, + "Cargo.toml" => Self::CargoToml, + "deno.json" | "deno.jsonc" => Self::DenoJson, + "Makefile" | "CMakeLists.txt" | "justfile" | "Justfile" => { + Self::BuildFile(s.to_string()) + } + _ => Self::OtherConfig(s.to_string()), + }) + } +}