Skip to content

Commit 70948a2

Browse files
mkphilippamarkovics
authored andcommitted
Speed up analysis of gitlibs using git sha, resolve protocol methods (#509)
This significantly speeds up analysis for gitlibs by using the git sha as a hash (thus treating them as immutable) instead of handling them on a per-form level. Also resolve protocol methods to the defining protocol, which would previously not be detected. Lastly drop the location cache which is no longer needed.
1 parent 8cef855 commit 70948a2

File tree

6 files changed

+66
-70
lines changed

6 files changed

+66
-70
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@ Changes can be:
1818

1919
Use these features to build a new welcome page that gives more useful information, including links to potential notebooks in the project.
2020

21+
* ⚡️ Speed up analysis of gitlibs using git sha, resolve protocol methods
22+
23+
This significantly speeds up analysis for gitlibs by using the git sha as a hash (thus treating them as immutable) instead of handling them on a per-form level.
24+
25+
Also resolve protocol methods to the defining protocol, which would previously not be detected.
26+
27+
Lastly drop the location cache which is no longer needed.
28+
2129
* 🍕 `clerk/fragment` for splicing a seq of values into the document as if it were produced by results of individual cells. Useful when programmatically generating content.
2230

2331
* 🚨 Change `nextjournal.clerk.render/clerk-eval` to not recompute the currently shown document when using the 1-arity version. Added a second arity that takes an opts map with a `:recompute?` key.

notebooks/emmy.clj

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,39 @@
1-
(ns emmy-repro
1+
(ns emmy
22
(:require [emmy.env :as e]
33
[emmy.mechanics.lagrange]))
44

5-
;; ## BUG 1:
5+
;; `emmy.env` is importing vars via potemkin. This notebook exists to
6+
;; test that the analysis finds the correct locations for imported
7+
;; vars.
68

7-
;; This notebook takes close to 2 seconds to evaluate:
8-
9-
;; Clerk evaluated '/Users/sritchie/code/clj/clerk-demo/notebooks/emmy_repro.clj' in 1853.674042ms.
10-
11-
;; For the final Langrangian in generalized coordinates (the angles of each
12-
;; segment) by composing `L-rect` with a properly transformed `angles->rect`
13-
;; coordinate transform!
14-
15-
;; ## BUG 2:
16-
;;
17-
;; The following form:
18-
19-
#_
209
(let [L (emmy.mechanics.lagrange/L-pendulum 'g 'm 'l)]
2110
(((e/Lagrange-equations L)
2211
(e/literal-function 'theta_1))
2312
't))
2413

25-
;; Evaluates to this:
26-
(e/literal-number
27-
'(- (* 1/2 m 2 l (((expt D 2) theta_1) t) l) (* g m l (- (sin (theta_1 t))))))
14+
(comment
15+
(require '[nextjournal.clerk.analyzer :as analyzer]
16+
'[clj-async-profiler.core :as prof])
17+
18+
19+
(analyzer/unhashed-deps (:->analysis-info @nextjournal.clerk.webserver/!doc))
20+
21+
(analyzer/find-location 'emmy.util.stopwatch/repr)
22+
(analyzer/find-location 'emmy.value/zero?)
23+
(meta #'emmy.value/zero?)
24+
25+
(meta #'emmy.value/Value)
26+
27+
(def parsed
28+
(nextjournal.clerk.parser/parse-file "notebooks/emmy.clj"))
2829

30+
(def analyzed
31+
(analyzer/analyze-doc parsed))
32+
33+
(time (do
34+
(prof/profile (dotimes [i 10] (analyzer/build-graph analyzed)))
35+
:done))
2936

30-
;; But if I include it in a notebook, I get this:
37+
)
3138

32-
;; Execution error (NullPointerException) at clojure.tools.analyzer.jvm.utils/members* (utils.clj:272).
33-
;; Cannot invoke "java.lang.Class.getName()" because the return value of "clojure.lang.IFn.invoke(Object)" is null
39+
#_(nextjournal.clerk/clear-cache!)

notebooks/profile.clj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
(def parsed
1111
(parser/parse-file "notebooks/rule_30.clj"))
1212

13+
14+
1315
(def analyzed
1416
(analyzer/analyze-doc parsed))
1517

@@ -19,6 +21,8 @@
1921

2022

2123

24+
(prof/profile (analyzer/build-graph analyzed))
25+
2226
(prof/profile (analyzer/build-graph analyzed))
2327

2428
(if-not @prof.ui/current-server

src/nextjournal/clerk.clj

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,8 +526,6 @@
526526
* the form of an anonymous expression"
527527
([]
528528
(swap! webserver/!doc dissoc :blob->result)
529-
(reset! analyzer/!file->analysis-cache {})
530-
(reset! analyzer/!ns->loc-cache {})
531529
(if (fs/exists? config/cache-dir)
532530
(do (fs/delete-tree config/cache-dir)
533531
(prn :cache-dir/deleted config/cache-dir))

src/nextjournal/clerk/analyzer.clj

Lines changed: 22 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,10 @@
145145
(form->ex-data form)
146146
e)))))))
147147

148+
(defn ^:private var->protocol [v]
149+
(or (:protocol (meta v))
150+
v))
151+
148152
(defn analyze [form]
149153
(let [!deps (atom #{})
150154
mexpander (fn [form env]
@@ -173,7 +177,7 @@
173177
(when-not (= op :the-var)
174178
(list `deref (symbol var))))))
175179
nodes)
176-
deps (set/union (set/difference (into #{} (map symbol) @!deps) vars)
180+
deps (set/union (set/difference (into #{} (map (comp symbol var->protocol)) @!deps) vars)
177181
deref-deps
178182
(class-deps analyzed)
179183
(when (var? form) #{(symbol form)}))
@@ -227,7 +231,6 @@
227231
#_(type (first (:deps (analyze #'analyze-form))))
228232
#_(-> (analyze '(proxy [clojure.lang.ISeq] [])))
229233

230-
231234
(defn- circular-dependency-error? [e]
232235
(-> e ex-data :reason #{::dep/circular-dependency}))
233236

@@ -371,12 +374,12 @@
371374
(defn analyze-file
372375
([file]
373376
(let [current-file-sha (sha1-base58 (fs/read-all-bytes file))]
374-
(or (when-let [{:as cached-analysis :keys [file-sha]} (@!file->analysis-cache file)]
375-
(when (= file-sha current-file-sha)
376-
cached-analysis))
377-
(let [analysis (analyze-doc {:file-sha current-file-sha} (parser/parse-file {} file))]
378-
(swap! !file->analysis-cache assoc file analysis)
379-
analysis))))
377+
(or (when-let [{:as cached-analysis :keys [file-sha]} (@!file->analysis-cache file)]
378+
(when (= file-sha current-file-sha)
379+
cached-analysis))
380+
(let [analysis (analyze-doc {:file-sha current-file-sha} (parser/parse-file {} file))]
381+
(swap! !file->analysis-cache assoc file analysis)
382+
analysis))))
380383
([state file]
381384
(analyze-doc state (parser/parse-file {} file))))
382385

@@ -470,8 +473,8 @@
470473
(ns->jar ns))
471474
(symbol->jar sym)))))
472475

473-
474476
#_(find-location `inc)
477+
#_(find-location `*print-dup*)
475478
#_(find-location '@nextjournal.clerk.webserver/!doc)
476479
#_(find-location `dep/depend)
477480
#_(find-location 'java.util.UUID)
@@ -480,25 +483,6 @@
480483
#_(find-location 'io.methvin.watcher.hashing.FileHasher/DEFAULT_FILE_HASHER)
481484
#_(find-location 'String)
482485

483-
(defn find-location+cache [!ns->loc sym]
484-
(if (deref? sym)
485-
(find-location+cache !ns->loc (second sym))
486-
(if-let [ns-sym (and (qualified-symbol? sym) (-> sym namespace symbol))]
487-
(or (@!ns->loc ns-sym)
488-
(when-let [loc (find-location sym)]
489-
(swap! !ns->loc assoc ns-sym loc)
490-
loc))
491-
(find-location sym))))
492-
493-
(def !ns->loc-cache (atom {}))
494-
495-
#_(reset! !ns->loc-cache {})
496-
497-
(defn find-location-cached [sym]
498-
(find-location+cache !ns->loc-cache sym))
499-
500-
#_(find-location-cached `inc)
501-
502486
(def hash-jar
503487
(memoize (fn [f]
504488
{:jar f :hash (sha1-base58 (io/input-stream f))})))
@@ -530,22 +514,24 @@
530514
(assoc :counter 0))]
531515
(let [unhashed (unhashed-deps ->analysis-info)
532516
loc->syms (apply dissoc
533-
(group-by find-location-cached unhashed)
517+
(group-by find-location unhashed)
534518
analyzed-file-set)]
535519
(if (and (seq loc->syms) (< counter 10))
536520
(recur (-> (reduce (fn [g [source symbols]]
537-
(if (or (nil? source)
538-
(str/ends-with? source ".jar"))
539-
(update g :->analysis-info merge (into {} (map (juxt identity (constantly (if source (hash-jar source) {})))) symbols))
540-
(-> g
541-
(update :analyzed-file-set conj source)
542-
(merge-analysis-info (analyze-file source)))))
521+
(let [jar? (or (nil? source)
522+
(str/ends-with? source ".jar"))
523+
gitlib-hash (and (not jar?)
524+
(second (re-find #".gitlibs/libs/.*/(\b[0-9a-f]{5,40}\b)/" (fs/unixify source))))]
525+
(if (or jar? gitlib-hash)
526+
(update g :->analysis-info merge (into {} (map (juxt identity (constantly (if source (or gitlib-hash (hash-jar source)) {})))) symbols))
527+
(-> g
528+
(update :analyzed-file-set conj source)
529+
(merge-analysis-info (analyze-file source))))))
543530
state
544531
loc->syms)
545532
(update :counter inc)))
546533
(dissoc state :analyzed-file-set :counter)))))
547534

548-
549535
(comment
550536
(def parsed (parser/parse-file {:doc? true} "src/nextjournal/clerk/webserver.clj"))
551537
(def analysis (time (-> parsed analyze-doc build-graph)))

test/nextjournal/clerk/analyzer_test.clj

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,11 @@
7575
(is (every? symbol? (:deps (ana/analyze '(.hashCode clojure.lang.Compiler)))))
7676

7777
(is (every? symbol? (:deps (ana/analyze '(defprotocol MyProtocol
78-
(-check [_]))))))))
78+
(-check [_])))))))
79+
80+
(testing "protocol methods are resolved to protocol in deps"
81+
(is (= '#{nextjournal.clerk.analyzer/BoundedCountCheck}
82+
(:deps (ana/analyze 'nextjournal.clerk.analyzer/-exceeds-bounded-count-limit?))))))
7983

8084
(deftest read-string-tests
8185
(testing "read-string should read regex's such that value equalility is preserved"
@@ -194,16 +198,6 @@
194198
(testing "weavejester.dependency/graph"
195199
(is (re-find #"dependency-.*\.jar" (ana/find-location 'weavejester.dependency/graph)))))
196200

197-
(deftest find-location+cache
198-
(let [!ns->loc (atom {})]
199-
(testing "populates the cache"
200-
(ana/find-location+cache !ns->loc 'clojure.core/inc)
201-
(is (= (@!ns->loc 'clojure.core) (ana/find-location 'clojure.core/inc))))
202-
203-
(testing "doesn't call `find-location` with cache populated"
204-
(with-redefs [ana/find-location (fn [_] :bogus/value)]
205-
(is (re-find #"clojure-1\..*\.jar" (ana/find-location+cache !ns->loc 'clojure.core/inc)))))))
206-
207201
(defn analyze-string [s]
208202
(-> (parser/parse-clojure-string {:doc? true} s)
209203
ana/analyze-doc

0 commit comments

Comments
 (0)