|
| 1 | +(ns eca.features.tools.smart-edit |
| 2 | + "Smart file editing with advanced matching strategies. |
| 3 | +
|
| 4 | + This namespace implements multi-tier matching for file edits: |
| 5 | + - Flexible matching (whitespace-agnostic with indentation preservation) |
| 6 | + - Regex matching (tokenized matching with flexible whitespace) |
| 7 | +
|
| 8 | + Note: This only supports SINGLE replacement (no multiple occurrences). |
| 9 | + For multiple replacements, use eca.features.tools.text-match instead." |
| 10 | + (:require [clojure.string :as string] |
| 11 | + [eca.features.tools.text-match :as text-match] |
| 12 | + [eca.logger :as logger]) |
| 13 | + (:import |
| 14 | + [java.util.regex Pattern])) |
| 15 | + |
| 16 | +(set! *warn-on-reflection* true) |
| 17 | + |
| 18 | +;;; Flexible Matching (Whitespace-Agnostic) |
| 19 | + |
| 20 | +(defn- try-flexible-match |
| 21 | + "Match content ignoring whitespace differences, preserving original indentation. |
| 22 | + Ambiguity prevention: if more than one region matches, return {:error :ambiguous}. |
| 23 | + Returns: |
| 24 | + - {:new-full-content ... :strategy :flexible} on exactly one match |
| 25 | + - {:error :ambiguous, :match-count n, :original-full-content content} if n>1 |
| 26 | + - nil if no matches" |
| 27 | + [file-content original-content new-content path] |
| 28 | + (let [file-lines (vec (string/split-lines file-content)) |
| 29 | + search-lines (string/split-lines original-content) |
| 30 | + search-lines-trimmed (mapv string/trim search-lines) |
| 31 | + new-lines (string/split-lines new-content) |
| 32 | + search-len (count search-lines)] |
| 33 | + (when (pos? search-len) |
| 34 | + (let [match-indexes (loop [idx 0 acc []] |
| 35 | + (if (<= (+ idx search-len) (count file-lines)) |
| 36 | + (let [window (subvec file-lines idx (+ idx search-len)) |
| 37 | + window-trimmed (mapv string/trim window)] |
| 38 | + (recur (inc idx) |
| 39 | + (if (= window-trimmed search-lines-trimmed) |
| 40 | + (conj acc idx) |
| 41 | + acc))) |
| 42 | + acc)) |
| 43 | + cnt (count match-indexes)] |
| 44 | + (case cnt |
| 45 | + 0 nil |
| 46 | + 1 (let [idx (first match-indexes) |
| 47 | + window (subvec file-lines idx (+ idx search-len)) |
| 48 | + ;; Use indentation from the first non-blank line; fallback to first line |
| 49 | + indentation (->> window |
| 50 | + (drop-while string/blank?) |
| 51 | + first |
| 52 | + (or (first window)) |
| 53 | + (text-match/detect-indentation)) |
| 54 | + indented-new (text-match/apply-indentation (string/join "\n" new-lines) indentation) |
| 55 | + indented-new-lines (string/split-lines indented-new) |
| 56 | + result-lines (concat (take idx file-lines) |
| 57 | + indented-new-lines |
| 58 | + (drop (+ idx search-len) file-lines))] |
| 59 | + (logger/debug "Content matched using flexible matching for" path) |
| 60 | + {:original-full-content file-content |
| 61 | + :new-full-content (string/join "\n" result-lines) |
| 62 | + :strategy :flexible}) |
| 63 | + (do (logger/debug "Flexible match ambiguous for" path "- matches:" cnt) |
| 64 | + {:error :ambiguous |
| 65 | + :match-count cnt |
| 66 | + :original-full-content file-content})))))) |
| 67 | + |
| 68 | +;;; Regex Matching (Tokenized) |
| 69 | + |
| 70 | +(defn- tokenize-by-delimiters |
| 71 | + "Tokenize a search string for flexible regex matching. |
| 72 | + Strategy (similar to Gemini CLI): |
| 73 | + - Insert spaces around common code delimiters so they become separate tokens |
| 74 | + - Split by any whitespace to get minimal tokens |
| 75 | + - Remove empty tokens |
| 76 | + Returns a vector of tokens." |
| 77 | + [s] |
| 78 | + (let [delims ["(" ")" ":" "[" "]" "{" "}" ">" "<" "=" "," ";"] |
| 79 | + spaced (when s (reduce (fn [acc d] |
| 80 | + (string/replace acc d (str " " d " "))) |
| 81 | + s delims))] |
| 82 | + (->> (or (some-> spaced (string/split #"\s+")) []) |
| 83 | + (remove string/blank?) |
| 84 | + vec))) |
| 85 | + |
| 86 | +(defn- try-regex-match |
| 87 | + "Tokenized regex matching with ambiguity prevention. |
| 88 | + - Build a multiline pattern anchored at start-of-line with flexible \\s* between tokens |
| 89 | + - Count all matches across the file |
| 90 | + - If exactly one match, replace it (first only) |
| 91 | + - If >1, return {:error :ambiguous} |
| 92 | + - If 0, return nil" |
| 93 | + [file-content original-content new-content path] |
| 94 | + (let [tokens (tokenize-by-delimiters original-content)] |
| 95 | + (when (seq tokens) |
| 96 | + (let [escaped-tokens (map #(Pattern/quote %) tokens) |
| 97 | + pattern-str (str "(?m)^([ \\t]*)" (string/join "\\s*" escaped-tokens)) |
| 98 | + pattern (re-pattern pattern-str) |
| 99 | + matches (re-seq pattern file-content) |
| 100 | + cnt (count matches)] |
| 101 | + (cond |
| 102 | + (= cnt 0) nil |
| 103 | + (> cnt 1) (do (logger/debug "Regex match ambiguous for" path "- matches:" cnt) |
| 104 | + {:error :ambiguous |
| 105 | + :match-count cnt |
| 106 | + :original-full-content file-content}) |
| 107 | + :else (let [indentation (some-> matches first second) |
| 108 | + indented-new (text-match/apply-indentation new-content (or indentation "")) |
| 109 | + quoted (java.util.regex.Matcher/quoteReplacement indented-new) |
| 110 | + new-content-str (string/replace-first file-content pattern quoted)] |
| 111 | + (logger/debug "Content matched using regex matching for" path) |
| 112 | + {:original-full-content file-content |
| 113 | + :new-full-content new-content-str |
| 114 | + :strategy :regex})))))) |
| 115 | + |
| 116 | +(defn apply-smart-edit |
| 117 | + "Apply smart edit with multi-tier matching. |
| 118 | + SINGLE REPLACEMENT ONLY - does not support multiple occurrences. |
| 119 | +
|
| 120 | + Matching order: |
| 121 | + 1. Exact match (via text-match) |
| 122 | + 2. Normalized match (via text-match) |
| 123 | + 3. Flexible match (whitespace-agnostic) |
| 124 | + 4. Regex match (tokenized) |
| 125 | +
|
| 126 | + Line ending style (CRLF vs LF) is automatically preserved." |
| 127 | + [file-content original-content new-content path] |
| 128 | + ;; Detect original line ending |
| 129 | + (let [line-ending (text-match/detect-line-ending file-content) |
| 130 | + ;; Normalize to LF for processing |
| 131 | + norm-file (text-match/normalize-to-lf file-content) |
| 132 | + norm-orig (text-match/normalize-to-lf original-content) |
| 133 | + norm-new (text-match/normalize-to-lf new-content) |
| 134 | + |
| 135 | + ;; Try text-match strategies first (exact + normalized) |
| 136 | + text-match-result (text-match/apply-content-change-to-string file-content original-content new-content false path) |
| 137 | + |
| 138 | + ;; Try advanced matching if text-match failed |
| 139 | + result (cond |
| 140 | + (:new-full-content text-match-result) |
| 141 | + text-match-result |
| 142 | + |
| 143 | + (= :ambiguous (:error text-match-result)) |
| 144 | + text-match-result |
| 145 | + |
| 146 | + :else |
| 147 | + (or |
| 148 | + (try-flexible-match norm-file norm-orig norm-new path) |
| 149 | + (try-regex-match norm-file norm-orig norm-new path) |
| 150 | + text-match-result))] |
| 151 | + |
| 152 | + ;; Restore original line endings and trailing newline if successful, |
| 153 | + ;; and ensure original-full-content reflects the exact pre-edit content |
| 154 | + (if (:new-full-content result) |
| 155 | + (-> result |
| 156 | + (assoc :original-full-content file-content) |
| 157 | + (update :new-full-content #(text-match/restore-trailing-newline file-content %)) |
| 158 | + (update :new-full-content text-match/restore-line-ending line-ending)) |
| 159 | + result))) |
0 commit comments