Skip to content

Commit f825ef4

Browse files
committed
Improve performance of glob path matcher
This replaces the usage of `fs/glob` with a besboke implementation based on the original. Primarily this implementation allows combining multiple glob patterns to avoid traversing the file tree multiple times. This also directly matches for `deps.edn` files which avoids the need for a second pass on the final results.
1 parent 7aaf056 commit f825ef4

File tree

2 files changed

+90
-11
lines changed

2 files changed

+90
-11
lines changed

packages/kmono-core/src/k16/kmono/core/fs.clj

Lines changed: 89 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22
(:require
33
[babashka.fs :as fs]
44
[clojure.edn :as edn]
5-
[clojure.string :as str]))
5+
[clojure.string :as str])
6+
(:import
7+
java.nio.file.FileSystems
8+
java.nio.file.Path
9+
java.nio.file.PathMatcher))
610

711
(set! *warn-on-reflection* true)
812

@@ -69,13 +73,89 @@
6973
{:file-path file-path}
7074
ex)))))
7175

72-
(defn find-package-directories [root packages-glob]
76+
(defn- escape-glob-chars
77+
"Escapes special glob characters in the input string."
78+
[s]
79+
(let [special-chars #{\\ \* \? \[ \] \{ \}}
80+
escape-char (fn [c]
81+
(if (contains? special-chars c)
82+
(str "\\" c)
83+
(str c)))]
84+
(apply str (mapv escape-char s))))
85+
86+
(defn- glob->matcher ^PathMatcher [root glob]
87+
(let [base-path (-> root fs/absolutize fs/normalize str)
88+
escaped-base-path (escape-glob-chars base-path)
89+
pattern (let [separator (when-not (str/ends-with? base-path fs/file-separator)
90+
(str fs/file-separator))]
91+
(str escaped-base-path separator glob))
92+
pattern (str "glob:" pattern)]
93+
(.getPathMatcher
94+
(FileSystems/getDefault)
95+
pattern)))
96+
97+
(defn- globs->matcher ^PathMatcher [root globs]
98+
(let [matchers (mapv #(glob->matcher root %) globs)]
99+
(proxy [PathMatcher] []
100+
(matches [^Path path]
101+
(reduce
102+
(fn [acc matcher]
103+
(if (PathMatcher/.matches matcher path)
104+
(reduced true)
105+
acc))
106+
false
107+
matchers)))))
108+
109+
(def ^:no-doc ?Path
110+
[:fn {:error/message "Should be an instance of java.nio.file.Path"}
111+
(partial instance? Path)])
112+
113+
;; TODO: It would be nice if this could skip/not traverse into directories that
114+
;; are included in gitignored files
115+
;;
116+
;; This would require either a way to convert `.gitignore` syntax to Java
117+
;; PathMatcher compatible globs or using something like JGit.
118+
(defn find-package-directories
119+
"Find packages in a given `root` that are described by the given set of
120+
`package-globs`."
121+
{:malli/schema [:-> :string [:or :string [:set :string]] [:vector ?Path]]}
122+
[root package-globs]
73123
(let [root (-> (fs/path root)
74124
fs/normalize
75-
fs/absolutize)]
76-
(into [root]
77-
(comp
78-
(filter (fn [path]
79-
(= "deps.edn" (fs/file-name path))))
80-
(map fs/parent))
81-
(fs/glob root packages-glob))))
125+
fs/absolutize)
126+
127+
matcher (if (string? package-globs)
128+
(glob->matcher root package-globs)
129+
(globs->matcher root package-globs))
130+
131+
base-path (-> root fs/absolutize fs/normalize str)
132+
results (atom (transient #{root}))
133+
past-root? (volatile! nil)
134+
135+
match (fn match-path [^Path path]
136+
(when (and (PathMatcher/.matches matcher path)
137+
(= "deps.edn" (fs/file-name path)))
138+
(swap! results conj! (fs/parent path)))
139+
nil)]
140+
141+
(fs/walk-file-tree
142+
base-path
143+
{:follow-links false
144+
:pre-visit-dir (fn [dir _attrs]
145+
(cond
146+
(and @past-root?
147+
(fs/hidden? dir))
148+
:skip-subtree
149+
150+
(not @past-root?)
151+
(do (vreset! past-root? true)
152+
:continue)
153+
154+
:else :continue))
155+
156+
:visit-file (fn [path _attrs]
157+
(when-not (fs/hidden? path)
158+
(match path))
159+
:continue)})
160+
161+
(persistent! @results)))

packages/kmono-core/src/k16/kmono/core/packages.clj

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,7 @@
142142
(let [globs (:packages workspace-config)
143143
globs (if (string? globs) #{globs} globs)
144144

145-
dirs (mapcat #(core.fs/find-package-directories project-root %)
146-
globs)
145+
dirs (core.fs/find-package-directories project-root globs)
147146

148147
packages
149148
(into {}

0 commit comments

Comments
 (0)