1+ #! /bin/bash
2+
3+ if [ " $# " -ne 2 ]; then
4+ echo " Usage: $0 <source_directory> <output_directory>"
5+ echo " Example: $0 ./my_files ./output"
6+ exit 1
7+ fi
8+
9+ SOURCE_DIR=$( realpath " $1 " )
10+ OUTPUT_DIR=$( realpath " $2 " )
11+ INFO_JSON=" $OUTPUT_DIR /info.json"
12+
13+ echo " Processing files from: $SOURCE_DIR "
14+ echo " Output directory: $OUTPUT_DIR "
15+ echo " "
16+
17+ mkdir -p " $OUTPUT_DIR "
18+
19+ PROCESSED_HASHES=$( mktemp)
20+ JSON_CONTENT=$( mktemp)
21+ DELETE_LIST=$( mktemp)
22+ PROCESSED_PATHS=$( mktemp)
23+ trap ' rm -f "$PROCESSED_HASHES" "$JSON_CONTENT" "$DELETE_LIST" "$PROCESSED_PATHS"' EXIT
24+
25+ if [ -f " $INFO_JSON " ]; then
26+ echo " Found existing info.json, using as base..."
27+ cp " $INFO_JSON " " $JSON_CONTENT "
28+ else
29+ echo " Creating new info.json..."
30+ echo ' {"delete_list":[],"files":[]}' > " $JSON_CONTENT "
31+ fi
32+
33+ cd " $SOURCE_DIR " || exit 1
34+
35+ echo " Finding and processing files..."
36+ find . -type f ! -path " $OUTPUT_DIR /*" -print0 | \
37+ while IFS= read -r -d $' \0' file; do
38+ echo " Processing: $file "
39+ rel_path=" ${file# ./ } "
40+ echo " $rel_path " >> " $PROCESSED_PATHS "
41+ current_hash=$( b3sum " $file " | cut -d ' ' -f 1)
42+ size=$( stat -f%z " $file " 2> /dev/null || stat -c%s " $file " )
43+
44+ existing_entry=$( jq -r --arg path " $rel_path " ' .files[] | select(.name == $path)' " $JSON_CONTENT " )
45+
46+ if [ -n " $existing_entry " ]; then
47+ old_hash=$( echo " $existing_entry " | jq -r ' .hash' )
48+ if [ " $current_hash " = " $old_hash " ] && [ -f " $OUTPUT_DIR /$old_hash " ]; then
49+ echo " Unchanged, skipping..."
50+ echo " $old_hash " >> " $PROCESSED_HASHES "
51+ continue
52+ else
53+ echo " Changed, updating..."
54+ echo " $old_hash " >> " $DELETE_LIST "
55+ fi
56+ else
57+ echo " New file..."
58+ fi
59+
60+ cp " $file " " $OUTPUT_DIR /$current_hash "
61+ echo " $current_hash " >> " $PROCESSED_HASHES "
62+
63+ jq --arg path " $rel_path " ' del(.files[] | select(.name == $path))' " $JSON_CONTENT " > " $JSON_CONTENT .tmp" && mv " $JSON_CONTENT .tmp" " $JSON_CONTENT "
64+
65+ jq --arg path " $rel_path " \
66+ --arg size " $size " \
67+ --arg hash " $current_hash " \
68+ ' .files += [{"name": $path, "size": ($size|tonumber), "hash": $hash}]' \
69+ " $JSON_CONTENT " > " $JSON_CONTENT .tmp" && mv " $JSON_CONTENT .tmp" " $JSON_CONTENT "
70+ done
71+
72+ echo " Removing entries for deleted files..."
73+ while IFS= read -r entry; do
74+ path=$( echo " $entry " | jq -r ' .name' )
75+ hash=$( echo " $entry " | jq -r ' .hash' )
76+ if ! grep -Fxq " $path " " $PROCESSED_PATHS " ; then
77+ echo " Removing entry for deleted file: $path "
78+ jq --arg path " $path " ' del(.files[] | select(.name == $path))' " $JSON_CONTENT " > " $JSON_CONTENT .tmp" && mv " $JSON_CONTENT .tmp" " $JSON_CONTENT "
79+ if [ -f " $OUTPUT_DIR /$hash " ]; then
80+ echo " Marking for deletion: $hash "
81+ echo " $hash " >> " $DELETE_LIST "
82+ fi
83+ fi
84+ done < <( jq -c ' .files[]' " $JSON_CONTENT " )
85+
86+ echo " Writing info.json..."
87+ cp " $JSON_CONTENT " " $INFO_JSON "
88+
89+ echo " Cleaning up unused files..."
90+ for file in " $OUTPUT_DIR " /* ; do
91+ if [ " $file " = " $INFO_JSON " ]; then
92+ continue
93+ fi
94+ if [ ! -f " $file " ]; then
95+ continue
96+ fi
97+ basename=$( basename " $file " )
98+ if ! grep -q " ^${basename} $" " $PROCESSED_HASHES " ; then
99+ echo " Removing unused file: $basename "
100+ rm " $file "
101+ fi
102+ done
103+
104+ echo " Removing old and deleted files..."
105+ sort -u " $DELETE_LIST " | while read -r hash ; do
106+ if [ -f " $OUTPUT_DIR /$hash " ]; then
107+ echo " Removing: $hash "
108+ rm " $OUTPUT_DIR /$hash "
109+ fi
110+ done
111+
112+ echo " "
113+ echo " Done! Files processed and info.json updated."
0 commit comments