Skip to content

Commit beb9d4b

Browse files
committed
fix(clean): speed up large cleanup path batches
1 parent ad15f1f commit beb9d4b

File tree

2 files changed

+81
-0
lines changed

2 files changed

+81
-0
lines changed

bin/clean.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,21 @@ end_section() {
222222
# shellcheck disable=SC2329
223223
normalize_paths_for_cleanup() {
224224
local -a input_paths=("$@")
225+
226+
# Fast path for large batches: O(n log n) via sort|awk instead of O(n²) bash loops.
227+
# Lex sort guarantees every parent path precedes its children, so a single-pass
228+
# awk can filter child paths by tracking only the last kept path.
229+
if [[ ${#input_paths[@]} -gt 500 ]]; then
230+
printf '%s\n' "${input_paths[@]}" |
231+
awk '{sub(/\/$/, ""); if ($0 != "") print}' |
232+
LC_ALL=C sort -u |
233+
awk 'BEGIN { last = "" } {
234+
if (last != "" && substr($0, 1, length(last) + 1) == last "/") next
235+
last = $0; print
236+
}'
237+
return
238+
fi
239+
225240
local -a unique_paths=()
226241

227242
for path in "${input_paths[@]}"; do

tests/regression.bats

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,69 @@ EOF
187187
")
188188
[[ "$result" == "loaded" ]]
189189
}
190+
191+
@test "normalize_paths_for_cleanup handles large nested batches without hanging" {
192+
local limit_ms="${MOLE_PERF_NORMALIZE_PATHS_LIMIT_MS:-4000}"
193+
194+
run env PROJECT_ROOT="$PROJECT_ROOT" LIMIT_MS="$limit_ms" bash --noprofile --norc <<'EOF'
195+
set -euo pipefail
196+
197+
python - <<'PY'
198+
from pathlib import Path
199+
import os
200+
project_root = Path(os.environ["PROJECT_ROOT"])
201+
text = (project_root / "bin/clean.sh").read_text()
202+
start = text.index("normalize_paths_for_cleanup() {")
203+
depth = 0
204+
end = None
205+
for i in range(start, len(text)):
206+
ch = text[i]
207+
if ch == "{":
208+
depth += 1
209+
elif ch == "}":
210+
depth -= 1
211+
if depth == 0:
212+
end = i + 1
213+
break
214+
Path("/tmp/normalize_paths_for_cleanup.sh").write_text(text[start:end] + "\n")
215+
PY
216+
217+
source /tmp/normalize_paths_for_cleanup.sh
218+
219+
paths=(
220+
"$HOME/Library/Containers/com.microsoft.Word/Data/Library/Caches"
221+
"$HOME/Library/Containers/com.microsoft.Excel/Data/Library/Caches/"
222+
)
223+
for i in $(seq 1 6000); do
224+
paths+=("$HOME/Library/Containers/com.microsoft.Word/Data/Library/Caches/item-$i")
225+
paths+=("$HOME/Library/Containers/com.microsoft.Excel/Data/Library/Caches/item-$i")
226+
done
227+
228+
start_ns=$(python - <<'PY'
229+
import time
230+
print(time.time_ns())
231+
PY
232+
)
233+
normalized=()
234+
while IFS= read -r line; do
235+
normalized+=("$line")
236+
done < <(normalize_paths_for_cleanup "${paths[@]}")
237+
end_ns=$(python - <<'PY'
238+
import time
239+
print(time.time_ns())
240+
PY
241+
)
242+
elapsed_ms=$(( (end_ns - start_ns) / 1000000 ))
243+
244+
printf 'COUNT=%s ELAPSED_MS=%s\n' "${#normalized[@]}" "$elapsed_ms"
245+
printf '%s\n' "${normalized[@]}"
246+
247+
[[ ${#normalized[@]} -eq 2 ]]
248+
[[ "${normalized[0]}" == "$HOME/Library/Containers/com.microsoft.Excel/Data/Library/Caches" || "${normalized[1]}" == "$HOME/Library/Containers/com.microsoft.Excel/Data/Library/Caches" ]]
249+
[[ "${normalized[0]}" == "$HOME/Library/Containers/com.microsoft.Word/Data/Library/Caches" || "${normalized[1]}" == "$HOME/Library/Containers/com.microsoft.Word/Data/Library/Caches" ]]
250+
(( elapsed_ms < LIMIT_MS ))
251+
EOF
252+
253+
[ "$status" -eq 0 ]
254+
[[ "$output" == *"COUNT=2"* ]]
255+
}

0 commit comments

Comments
 (0)