mirror of
https://github.com/kristoferssolo/project-finder.git
synced 2025-10-21 19:50:35 +00:00
feat(snapshot): add filesystem snapshot creator
This commit is contained in:
parent
f15727203d
commit
bee7bd2097
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
/target
|
/target
|
||||||
|
/benches/fixtures
|
||||||
|
|||||||
2
justfile
Normal file
2
justfile
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
snapshot +PATHS:
|
||||||
|
./scripts/snapshot -o benches/fixtures/"snapshot-{TIMESTAMP}.csv" -f csv {{PATHS}}
|
||||||
222
scripts/snapshot
Executable file
222
scripts/snapshot
Executable file
@ -0,0 +1,222 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Directory structure snapshot tool using fd (https://github.com/sharkdp/fd)
|
||||||
|
|
||||||
|
# Check if fd is installed
|
||||||
|
if ! command -v fd &>/dev/null; then
|
||||||
|
echo "Error: fd is not installed. Please install it first:"
|
||||||
|
echo " - Debian/Ubuntu: sudo apt install fd-find"
|
||||||
|
echo " - Fedora: sudo dnf install fd-find"
|
||||||
|
echo " - Arch: sudo pacman -S fd"
|
||||||
|
echo " - macOS: brew install fd"
|
||||||
|
echo " - Cargo: cargo install fd-find"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if ripgrep is installed, fall back to grep if not
|
||||||
|
if command -v rg &>/dev/null; then
|
||||||
|
GREP_CMD="rg"
|
||||||
|
else
|
||||||
|
GREP_CMD="grep"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Usage information
|
||||||
|
usage() {
|
||||||
|
echo "Usage: $(basename "$0") [OPTIONS] DIRECTORY [DIRECTORY...]"
|
||||||
|
echo "Create a snapshot of directory structure(s) for benchmarking"
|
||||||
|
echo
|
||||||
|
echo "Options:"
|
||||||
|
echo " -o, --output FILE Output file (default: stdout)"
|
||||||
|
echo " Use {DATE} or {TIMESTAMP} for dynamic naming"
|
||||||
|
echo " -f, --format FORMAT Output format: csv or json (default: csv)"
|
||||||
|
echo " -h, --help Display this help message"
|
||||||
|
echo
|
||||||
|
echo "Examples:"
|
||||||
|
echo " $(basename "$0") ~/projects"
|
||||||
|
echo " $(basename "$0") -o snapshot.csv -f csv /path/to/dir"
|
||||||
|
echo " $(basename "$0") -o snapshot-{DATE}.csv ~/dir1 ~/dir2"
|
||||||
|
echo " $(basename "$0") -f json > snapshot.json"
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
DIRECTORIES=()
|
||||||
|
OUTPUT="/dev/stdout"
|
||||||
|
FORMAT="csv"
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
-o | --output)
|
||||||
|
OUTPUT="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-f | --format)
|
||||||
|
FORMAT="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-h | --help)
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
if [[ -d "$1" ]]; then
|
||||||
|
# Use cd + pwd instead of realpath for better performance
|
||||||
|
DIRECTORIES+=("$(cd "$1" && pwd)")
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
echo "Error: Unknown option or invalid directory: $1"
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Check if at least one directory was provided
|
||||||
|
if [ ${#DIRECTORIES[@]} -eq 0 ]; then
|
||||||
|
DIRECTORIES=("$(pwd)")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Replace template variables in output filename
|
||||||
|
DATE_SHORT=$(date +"%Y%m%d")
|
||||||
|
TIMESTAMP=$(date +"%Y-%m-%d_%H-%M-%S")
|
||||||
|
OUTPUT="${OUTPUT//\{DATE\}/$DATE_SHORT}"
|
||||||
|
OUTPUT="${OUTPUT//\{TIMESTAMP\}/$TIMESTAMP}"
|
||||||
|
|
||||||
|
# Ensure the output directory exists
|
||||||
|
OUTPUT_DIR=$(dirname "$OUTPUT")
|
||||||
|
[ "$OUTPUT_DIR" != "/dev" ] && mkdir -p "$OUTPUT_DIR"
|
||||||
|
|
||||||
|
# Timestamp for the snapshot
|
||||||
|
TIMESTAMP_HUMAN=$(date +"%Y-%m-%d %H:%M:%S")
|
||||||
|
EPOCH=$(date +%s)
|
||||||
|
|
||||||
|
# Create a temporary file for processing
|
||||||
|
TEMP_DATA=$(mktemp)
|
||||||
|
trap 'rm -f "$TEMP_DATA"' EXIT
|
||||||
|
|
||||||
|
# Collect data from all directories more efficiently
|
||||||
|
for DIR in "${DIRECTORIES[@]}"; do
|
||||||
|
# Process entries directly without the separate xargs+stat calls
|
||||||
|
fd . "$DIR" -H -t f -t d -t l -0 | perl -0 -ne '
|
||||||
|
chomp;
|
||||||
|
$dir = "'$DIR'";
|
||||||
|
$path = $_;
|
||||||
|
|
||||||
|
if (-f $path) { $type = "file"; }
|
||||||
|
elsif (-d $path) { $type = "dir"; }
|
||||||
|
elsif (-l $path) { $type = "symlink"; }
|
||||||
|
else { $type = "other"; }
|
||||||
|
|
||||||
|
$rel_path = $path;
|
||||||
|
$rel_path =~ s/^\Q$dir\E\/?//;
|
||||||
|
$rel_path = "." if $rel_path eq "";
|
||||||
|
|
||||||
|
($size, $modified, $perms) = (stat($path))[7, 9, 2];
|
||||||
|
$perms = sprintf("%o", $perms & 07777);
|
||||||
|
|
||||||
|
print "$type|$dir|$path|$size|$modified|$perms\n";
|
||||||
|
' >>"$TEMP_DATA"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Create output based on format
|
||||||
|
case "$FORMAT" in
|
||||||
|
csv)
|
||||||
|
{
|
||||||
|
echo "type,directory,path,size,modified,permissions"
|
||||||
|
|
||||||
|
# Process the collected data with proper CSV quoting and full directory paths
|
||||||
|
awk -F'|' '{
|
||||||
|
# Get the full directory path from column 2
|
||||||
|
dir_full = $2;
|
||||||
|
|
||||||
|
# Get relative path (path - dir prefix)
|
||||||
|
rel_path = $3;
|
||||||
|
gsub("^"$2"/", "", rel_path);
|
||||||
|
if (rel_path == $2) rel_path = ".";
|
||||||
|
|
||||||
|
# Properly quote fields that might contain commas
|
||||||
|
printf "%s,\"%s\",\"%s\",%s,%s,%s\n",
|
||||||
|
$1, dir_full, rel_path, $4, $5, $6;
|
||||||
|
}' "$TEMP_DATA"
|
||||||
|
} >"$OUTPUT"
|
||||||
|
;;
|
||||||
|
json)
|
||||||
|
{
|
||||||
|
echo "{"
|
||||||
|
echo " \"timestamp\": \"$TIMESTAMP_HUMAN\","
|
||||||
|
echo " \"epoch\": $EPOCH,"
|
||||||
|
echo " \"directories\": ["
|
||||||
|
|
||||||
|
# First output the list of directories
|
||||||
|
first_dir=true
|
||||||
|
for DIR in "${DIRECTORIES[@]}"; do
|
||||||
|
if $first_dir; then
|
||||||
|
first_dir=false
|
||||||
|
else
|
||||||
|
echo ","
|
||||||
|
fi
|
||||||
|
echo " {"
|
||||||
|
echo " \"path\": \"$DIR\","
|
||||||
|
echo " \"name\": \"$(basename "$DIR")\""
|
||||||
|
echo -n " }"
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
echo " ],"
|
||||||
|
echo " \"entries\": ["
|
||||||
|
|
||||||
|
# Process entries for JSON output with proper null handling
|
||||||
|
awk -F'|' '
|
||||||
|
BEGIN { first = 1 }
|
||||||
|
{
|
||||||
|
if (!first) printf ",\n"
|
||||||
|
type = $1
|
||||||
|
dir = $2 # Full directory path
|
||||||
|
path = $3
|
||||||
|
size = $4
|
||||||
|
modified = $5
|
||||||
|
perms = $6
|
||||||
|
|
||||||
|
# Get relative path
|
||||||
|
rel_path = path
|
||||||
|
gsub("^"dir"/", "", rel_path)
|
||||||
|
if (rel_path == dir) rel_path = "."
|
||||||
|
|
||||||
|
# Format with null for empty values
|
||||||
|
printf " {\n \"type\": \"%s\",\n \"directory\": \"%s\",\n \"path\": \"%s\"",
|
||||||
|
type, dir, rel_path
|
||||||
|
|
||||||
|
# Handle potentially null values
|
||||||
|
if (size == "" || size == 0)
|
||||||
|
printf ",\n \"size\": null"
|
||||||
|
else
|
||||||
|
printf ",\n \"size\": %s", size
|
||||||
|
|
||||||
|
if (modified == "")
|
||||||
|
printf ",\n \"modified\": null"
|
||||||
|
else
|
||||||
|
printf ",\n \"modified\": %s", modified
|
||||||
|
|
||||||
|
if (perms == "")
|
||||||
|
printf ",\n \"permissions\": null"
|
||||||
|
else
|
||||||
|
printf ",\n \"permissions\": \"%s\"", perms
|
||||||
|
|
||||||
|
printf "\n }"
|
||||||
|
first = 0
|
||||||
|
}' "$TEMP_DATA"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo " ]"
|
||||||
|
echo "}"
|
||||||
|
} >"$OUTPUT"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Error: Unknown format: $FORMAT"
|
||||||
|
echo "Supported formats: csv, json"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# If output is not stdout, print a confirmation message
|
||||||
|
if [[ "$OUTPUT" != "/dev/stdout" ]]; then
|
||||||
|
echo "Snapshot created: $OUTPUT"
|
||||||
|
fi
|
||||||
Loading…
Reference in New Issue
Block a user