Skip to content

Commit 80b0770

Browse files
committed
Add more reliable resource leak detector for tests
See docstrings in `aleph.resource-leak-detector` for details. This patch also includes the following changes: * Rename the `:leak-level-paranoid` Leiningen profile to `:leak-detection` since it now also sets everything up to use the new custom detector. * Introduce new test selector `:leak` for tests which intentionally leak (not run by default since they require the `:leak-detection` profile to be active, too). * Introduce intentionally leaking `aleph.http-test/test-leak-in-raw-stream-handler` as an example and to document a case where users are responsible for releasing buffers. * Eliminate all `(netty/leak-detector-level! :paranoid)` calls from tests. Use new leak detection API instead (see below). * Place `(aleph.resource-leak-detector/instrument-tests!)` at the end of all relevant test namespaces to detect leaks (only effective when running with `:leak-detection` profile).
1 parent 1c67732 commit 80b0770

11 files changed

+258
-13
lines changed

project.clj

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,24 @@
5757
"-Dorg.slf4j.simpleLogger.log.io.netty.util=error"
5858
"-Dorg.slf4j.simpleLogger.log.io.netty.channel=warn"]}
5959
:test {:jvm-opts ["-Dorg.slf4j.simpleLogger.defaultLogLevel=off"]}
60-
:leak-level-paranoid {:jvm-opts ["-Dio.netty.leakDetectionLevel=PARANOID"]}
60+
:leak-detection {:aot [aleph.resource-leak-detector]
61+
:jvm-opts ["-Dio.netty.leakDetection.level=PARANOID"
62+
"-Dio.netty.customResourceLeakDetector=aleph.resource_leak_detector"
63+
"-Dorg.slf4j.simpleLogger.log.aleph.resource-leak-detector=info"
64+
;; These settings empirically make leak detection more reliable
65+
"-Dio.netty.leakDetection.targetRecords=1"
66+
"-Dio.netty.allocator.type=unpooled"]}
6167
:pedantic {:pedantic? :abort}
6268
:trace {:jvm-opts ["-Dorg.slf4j.simpleLogger.defaultLogLevel=trace"]}
6369
:profile {:dependencies [[com.clojure-goes-fast/clj-async-profiler "1.1.1"]]
6470
:jvm-opts ["-Djdk.attach.allowAttachSelf"]}}
6571
:java-source-paths ["src-java"]
6672
:test-selectors {:default #(not
67-
(some #{:benchmark :stress}
73+
(some #{:benchmark :stress :leak}
6874
(cons (:tag %) (keys %))))
6975
:benchmark :benchmark
7076
:stress :stress
77+
:leak :leak ; requires :leak-detection profile
7178
:all (constantly true)}
7279
:jvm-opts ^:replace ["-server"
7380
"-Xmx2g"

test/aleph/http_continue_test.clj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
[aleph.flow :as flow]
44
[aleph.http :as http]
55
[aleph.netty :as netty]
6+
[aleph.resource-leak-detector]
67
[aleph.tcp :as tcp]
78
[clj-commons.byte-streams :as bs]
89
[clojure.string :as str]
@@ -105,3 +106,5 @@
105106
(test-reject {:continue-handler
106107
(constantly (d/success-deferred {:status 401}))}
107108
"401 Unauthorized")))
109+
110+
(aleph.resource-leak-detector/instrument-tests!)

test/aleph/http_test.clj

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
[aleph.http :as http]
55
[aleph.http.core :as http.core]
66
[aleph.netty :as netty]
7+
[aleph.resource-leak-detector]
78
[aleph.ssl :as test-ssl]
89
[aleph.tcp :as tcp]
910
[aleph.testutils :refer [str=]]
@@ -58,7 +59,6 @@
5859
;;;
5960

6061
(set! *warn-on-reflection* false)
61-
(netty/leak-detector-level! :paranoid)
6262

6363
(def ^:dynamic ^IPool *pool* nil)
6464
(def ^:dynamic *connection-options* nil)
@@ -1438,3 +1438,18 @@
14381438
:http-versions [:http1]})]
14391439
(is (instance? IllegalArgumentException result))
14401440
(is (= "use-h2c? may only be true when HTTP/2 is enabled." (ex-message result))))))
1441+
1442+
(deftest ^:leak test-leak-in-raw-stream-handler
1443+
;; NOTE: Expecting 2 leaks because `with-raw-handler` will run its body for both http1 and
1444+
;; http2. It would be nicer to put this assertion into the body but the http1 server seems to
1445+
;; only really leak the buffer after shutting down.
1446+
(aleph.resource-leak-detector/with-expected-leaks 2
1447+
(with-raw-handler basic-handler
1448+
(let [resp @(http-put "/string"
1449+
;; NOTE: The request handler doesn't consume this body.
1450+
;; As per the :raw-stream? contract, this leads to a buffer leak.
1451+
{:body "Hello, world!"})]
1452+
(is (= 200 (:status resp)))
1453+
(is (= "String!" (bs/to-string (:body resp))))))))
1454+
1455+
(aleph.resource-leak-detector/instrument-tests!)

test/aleph/http_timeout_test.clj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
(ns aleph.http-timeout-test
22
(:require [aleph.http :as http]
3+
[aleph.resource-leak-detector]
34
[clj-commons.byte-streams :as bs]
45
[clojure.test :refer [deftest testing is]]
56
[manifold.stream :as s])
@@ -95,3 +96,4 @@
9596
(finally
9697
(.close ^AutoCloseable server))))))
9798

99+
(aleph.resource-leak-detector/instrument-tests!)

test/aleph/netty_test.clj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
(ns aleph.netty-test
22
(:require
33
[aleph.netty :as netty]
4+
[aleph.resource-leak-detector]
45
[clojure.test :refer [deftest is]]
56
[manifold.stream :as s])
67
(:import
@@ -52,3 +53,5 @@
5253
(is (-> config .isAutoRead))
5354
(netty/put! channel s 6)
5455
(is (not (-> config .isAutoRead)))))
56+
57+
(aleph.resource-leak-detector/instrument-tests!)

test/aleph/resource_leak_detector.clj

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
(ns aleph.resource-leak-detector
2+
"Provides a Netty leak detector which is more reliable than the default implementation.
3+
4+
Its chance of detecting leaks is around 95%. See `with-leak-collection` for details.
5+
6+
Since it adds considerable runtime overhead, its main purpose is to be used in tests to assert
7+
intentional presence of leaks and to check for unintended leaks. The main API entry points for
8+
this are `with-leak-collection`, `with-expected-leaks` and `instrument-tests!`.
9+
10+
To enable it, pass `-Dio.netty.customResourceLeakDetector=aleph.resource_leak_detector` to the
11+
JVM. You will also have to pass `-Dio.netty.leakDetection.level=PARANOID`.
12+
13+
Most reliable results are achieved by also passing the following options:
14+
`-Dio.netty.leakDetection.targetRecords=1`
15+
`-Dio.netty.allocator.type=unpooled`
16+
17+
All of the above is also provided by the `:leak-detection` Leiningen profile.
18+
19+
NOTE: Currently only improves reliability for detecting leaked ByteBufs. Other types of leaked
20+
resources will still be detected but not with the same reliability. Search Netty's codebase for
21+
invocations of `newResourceLeakDetector` to see which other candidate resources types there are."
22+
(:gen-class
23+
:extends io.netty.util.ResourceLeakDetector)
24+
(:require
25+
[clojure.test :as test]
26+
[clojure.tools.logging :as log])
27+
(:import
28+
(io.netty.buffer AbstractByteBufAllocator)
29+
(io.netty.util
30+
ResourceLeakDetector
31+
ResourceLeakDetector$Level
32+
ResourceLeakDetectorFactory)))
33+
34+
(defn enabled?
35+
"Checks whether the resource leak detector is enabled.
36+
37+
See `aleph.resource-leak-detector` docstring on how to enable it."
38+
[]
39+
(= "aleph.resource_leak_detector"
40+
(System/getProperty "io.netty.customResourceLeakDetector")))
41+
42+
(def active-resource-leak-detector-class
43+
(delay
44+
(class (.newResourceLeakDetector (ResourceLeakDetectorFactory/instance) String))))
45+
46+
(def active?
47+
(delay
48+
(= (Class/forName "aleph.resource_leak_detector")
49+
@active-resource-leak-detector-class)))
50+
51+
(defn ensure-consistent-config! []
52+
(when-not @active?
53+
(if (enabled?)
54+
(throw (RuntimeException.
55+
(str "`aleph.resource_leak_detector` is enabled but the active resource leak detector is "
56+
"`" (.getName ^Class @active-resource-leak-detector-class)"`. This indicates that "
57+
"`io.netty.util.ResourceLeakDetectorFactory` ran into an initialization error. "
58+
"Enable Netty debug logging to diagnose the cause.")))
59+
(throw (RuntimeException.
60+
(str "Attempted to use `aleph.resource-leak-detector` API but it is not enabled. "
61+
"Pass `-Dio.netty.customResourceLeakDetector=aleph.resource_leak_detector` to enable.")))))
62+
(when-not (= ResourceLeakDetector$Level/PARANOID (ResourceLeakDetector/getLevel))
63+
(throw (RuntimeException.
64+
(str "`aleph.resource_leak_detector` requires `-Dio.netty.leakDetection.level=PARANOID`. "
65+
"Current level is `" (ResourceLeakDetector/getLevel) "`.")))))
66+
67+
(def +max-probe-gc-runs+
68+
"Maximum number of times the GC will be run to detect a leaked probe."
69+
10)
70+
71+
(def +probe-hint-marker+
72+
"ALEPH LEAK DETECTOR PROBE")
73+
74+
(defn hint-record-pattern [hint-pattern]
75+
(re-pattern (str "(?m)^\\s*Hint: " hint-pattern "$")))
76+
77+
(def +probe-hint-pattern+
78+
(hint-record-pattern (str +probe-hint-marker+ " \\d+")))
79+
80+
(defn probe? [leak]
81+
(re-find +probe-hint-pattern+ (:records leak)))
82+
83+
(defn contains-hint? [hint leak]
84+
(re-find (hint-record-pattern hint) (:records leak)))
85+
86+
(defn remove-probes [leaks]
87+
(remove probe? leaks))
88+
89+
(let [cnt (atom 0)]
90+
(defn gen-probe-hint []
91+
(str +probe-hint-marker+ " " (swap! cnt inc))))
92+
93+
(defn leak-probe! [hint]
94+
(-> AbstractByteBufAllocator/DEFAULT
95+
(.buffer 1)
96+
(.touch hint)))
97+
98+
(def current-leaks)
99+
100+
(defn force-leak-detection! []
101+
(System/gc)
102+
(System/runFinalization)
103+
;; Transitively trigger a track() invocation which in turn works
104+
;; off the leaked references queue.
105+
(-> AbstractByteBufAllocator/DEFAULT (.buffer 1) .release))
106+
107+
(defn await-probe! [probe-hint]
108+
(loop [n +max-probe-gc-runs+]
109+
(force-leak-detection!)
110+
(if (zero? n)
111+
(throw (RuntimeException. "Gave up awaiting leak probe. Try increasing +max-probe-gc-runs+."))
112+
(when-not (some (partial contains-hint? probe-hint) @current-leaks)
113+
(recur (dec n))))))
114+
115+
(defn with-leak-collection
116+
"Invokes thunk `f` and tries hard to collect any resource leaks it may have caused.
117+
118+
It works as follows: After invoking `f`, it intentionally leaks a (small) buffer, marked as a
119+
probe. It then runs the garbage collector and polls the leak detector in a loop until it reports a
120+
leak which matches the probe. Eventually, it invokes `handle-leaks` with a sequence of any other
121+
detected leaks it collected along the way (empty when none were detected).
122+
123+
A leak is represented as a map with the following keys:
124+
- `:resource-type` is a string with the name of the leaked resource type (e.g. \"ByteBuf\")
125+
- `:records` is a multi-line string which holds the trace of the leak.
126+
127+
Requires the leak detector to be `enabled?`.
128+
129+
When nested, each child establishes a fresh leak collection scope. However, this only works within
130+
the same thread, so any asynchronous processes started by and outliving `f` will leak into the
131+
parent scope(s)."
132+
[f handle-leaks]
133+
(ensure-consistent-config!)
134+
(with-redefs [current-leaks (atom [])]
135+
(f)
136+
(let [hint (gen-probe-hint)]
137+
(leak-probe! hint)
138+
(await-probe! hint)
139+
(handle-leaks (remove-probes @current-leaks)))))
140+
141+
(defn -needReport [_this]
142+
true)
143+
144+
(defn -reportTracedLeak [_this resource-type records]
145+
(swap! current-leaks conj {:resource-type resource-type
146+
:records records}))
147+
148+
;; NOTE: Since we require level PARANOID, this should never be called in practice.
149+
(defn -reportUntracedLeak [_this resource-type]
150+
(swap! current-leaks conj {:resource-type resource-type
151+
:records "[untraced]"}))
152+
153+
(defn log-leaks! [leaks]
154+
(doseq [{:keys [resource-type records]} leaks]
155+
;; Log message cribbed from io.netty.util.ResourceLeakDetector's (protected) reportTracedLeak method
156+
(log/error (str "LEAK: " resource-type ".release() was not called before it's garbage-collected.")
157+
(str "See https://netty.io/wiki/reference-counted-objects.html for more information." records))))
158+
159+
(defmacro with-expected-leaks
160+
"Runs `body` and expects it to produce exactly `expected-leak-count` leaks. Intended for use in tests
161+
which intentionally leak resources.
162+
163+
Requires the leak detector to be `enabled?`."
164+
[expected-leak-count & body]
165+
`(with-leak-collection
166+
(fn [] ~@body)
167+
;; NOTE: Using a raw symbol here instead of a gensym to get nicer test failures.
168+
(fn [~'leaks]
169+
(when-not (test/is (= ~expected-leak-count (count ~'leaks)) "Unexpected leak count! See log output for details.")
170+
(log-leaks! ~'leaks)))))
171+
172+
(defn- report-test-leaks! [leaks]
173+
(when (seq leaks)
174+
(log-leaks! leaks)
175+
;; We include the assertion here within the `when` form so that we don't add a mystery assertion
176+
;; to every passing test (which is the common case).
177+
(test/is (zero? (count leaks))
178+
"Leak detected! See log output for details.")))
179+
180+
(defn- instrument-test-fn [tf]
181+
(if (::instrumented? tf)
182+
tf
183+
(with-meta
184+
(fn []
185+
(with-leak-collection tf report-test-leaks!))
186+
{::instrumented? true})))
187+
188+
(defn instrument-tests!
189+
"If `enabled?`, instruments all tests in the current namespace with leak detection by wrapping them
190+
in `with-leak-collection`. If leaks are detected, a corresponding (failing) assertion is injected
191+
into the test and the leak reports are logged at level `error`.
192+
193+
Usually placed at the end of a test namespace.
194+
195+
Note that this is intentionally not implemented as a fixture since there is no clean way to make a
196+
test fail from within a fixture: Neither a failing assertion nor throwing an exception will
197+
preserve which particular test caused it. See
198+
e.g. https://github.com/technomancy/leiningen/issues/2694 for an example of this."
199+
[]
200+
(when (enabled?)
201+
(->> (ns-interns *ns*)
202+
vals
203+
(filter (comp :test meta))
204+
(run! (fn [tv]
205+
(alter-meta! tv update :test instrument-test-fn))))))
206+
207+
(if (enabled?)
208+
(log/info "enabled.")
209+
(log/info "disabled. This means resource leaks will be reported less accurately."
210+
"Pass `-Dio.netty.customResourceLeakDetector=aleph.resource_leak_detector` to enable."))

test/aleph/ring_test.clj

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@
22
(:require
33
[aleph.http :as http]
44
[aleph.netty :as netty]
5+
[aleph.resource-leak-detector]
56
[clj-commons.byte-streams :as bs]
67
[clojure.test :refer [deftest is]]))
78

8-
(netty/leak-detector-level! :paranoid)
9-
109
(defn create-url
1110
([path]
1211
(create-url 8080 path))
@@ -92,3 +91,5 @@
9291
(deftest test-host-header
9392
(with-server [:headers :host]
9493
(is (= "localhost:8080" (request)))))
94+
95+
(aleph.resource-leak-detector/instrument-tests!)

test/aleph/tcp_ssl_test.clj

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
(ns aleph.tcp-ssl-test
22
(:require
33
[aleph.netty :as netty]
4+
[aleph.resource-leak-detector]
45
[aleph.ssl :as ssl]
56
[aleph.tcp :as tcp]
67
[aleph.tcp-test :refer [with-server]]
@@ -13,8 +14,6 @@
1314
(java.util.concurrent TimeoutException)
1415
(javax.net.ssl SSLHandshakeException)))
1516

16-
(netty/leak-detector-level! :paranoid)
17-
1817
(set! *warn-on-reflection* false)
1918

2019
(defn ssl-echo-handler [ssl-session]
@@ -148,3 +147,5 @@
148147
(is (not (d/realized? c)))
149148
(deliver continue-handshake true)
150149
(is (deref c 1000 false))))))
150+
151+
(aleph.resource-leak-detector/instrument-tests!)

test/aleph/tcp_test.clj

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
(ns aleph.tcp-test
22
(:require
33
[aleph.netty :as netty]
4+
[aleph.resource-leak-detector]
45
[aleph.tcp :as tcp]
56
[clj-commons.byte-streams :as bs]
67
[clojure.test :refer [deftest testing is]]
78
[manifold.stream :as s]))
89

9-
(netty/leak-detector-level! :paranoid)
10-
1110
(defn echo-handler [s _]
1211
(s/connect s s))
1312

@@ -55,3 +54,5 @@
5554
(is (= "foo" (bs/to-string @(s/take! c)))))))
5655
(catch Exception _
5756
(is (not (netty/io-uring-available?)))))))
57+
58+
(aleph.resource-leak-detector/instrument-tests!)

test/aleph/udp_test.clj

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@
22
(:require
33
[aleph.netty :as netty]
44
[aleph.udp :as udp]
5+
[aleph.resource-leak-detector]
56
[clj-commons.byte-streams :as bs]
67
[clojure.test :refer [deftest testing is use-fixtures]]
78
[manifold.stream :as s])
89
(:import
910
(java.net ServerSocket)))
1011

11-
(netty/leak-detector-level! :paranoid)
12-
1312
(def ^:dynamic *port* nil)
1413

1514
(defn rand-port []
@@ -73,3 +72,5 @@
7372
(s/close! s))))
7473
(catch Exception _
7574
(is (not (netty/io-uring-available?)))))))
75+
76+
(aleph.resource-leak-detector/instrument-tests!)

0 commit comments

Comments
 (0)