Skip to content

Commit 732cb0d

Browse files
Add cpu profiling start/stop endpoints (#11)
* Add cpu profiling start/stop endpoints * Group related code together * fixup! Add cpu profiling start/stop endpoints * Move work to be outside of profiling * Move more work to be outside of profiling * Test `pprof=true` param * Fix comment typo in test/runtests.jl
1 parent 74a92f5 commit 732cb0d

File tree

2 files changed

+131
-47
lines changed

2 files changed

+131
-47
lines changed

src/PerformanceProfilingHttpEndpoints.jl

Lines changed: 84 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,21 @@ using Serialization: serialize
2121
#
2222
#----------------------------------------------------------
2323

24+
function _http_response(binary_data, filename)
25+
return HTTP.Response(200, [
26+
"Content-Type" => "application/octet-stream"
27+
"Content-Disposition" => "attachment; filename=$(repr(filename))"
28+
], body = binary_data)
29+
end
30+
31+
###
32+
### CPU
33+
###
34+
2435
default_n() = "1e8"
2536
default_delay() = "0.01"
2637
default_duration() = "10.0"
2738
default_pprof() = "true"
28-
default_alloc_sample_rate() = "0.0001"
2939

3040
cpu_profile_error_message() = """Need to provide query params:
3141
- duration=$(default_duration())
@@ -50,24 +60,6 @@ controlled by `n=`. If you assume an average stack depth of 100, and you were ai
5060
5161
The default `n` is 1e8, which should be big enough for most profiles.
5262
"""
53-
allocs_profile_error_message() = """Need to provide query params:
54-
- duration=$(default_duration())
55-
- sample_rate=$(default_alloc_sample_rate())
56-
57-
Hint: A good goal is to shoot for around 1,000 to 10,000 samples. So if you know what
58-
duration you want to profile for, and you *already have an expectation for how much your
59-
program will allocate,* you can pick a sample_rate via `sample_rate = 1,000 / expected_allocations`.
60-
61-
For example, if you expect your program will actually perform 1 million allocations:
62-
1_000 / 1_000_000 = 0.001
63-
for `duration=30&sample_rate=0.001`
64-
65-
Note that if your sample_rate gets too large, you can really slow down the program you're
66-
profiling, and thus end up with an inaccurate profile.
67-
68-
Finally, if you think your program only allocates a small amount, you can capture *all*
69-
allocations by passing sample_rate=1.
70-
"""
7163

7264
function cpu_profile_endpoint(req::HTTP.Request)
7365
uri = HTTP.URI(req.target)
@@ -82,45 +74,90 @@ function cpu_profile_endpoint(req::HTTP.Request)
8274
delay = parse(Float64, get(qp, "delay", default_delay()))
8375
duration = parse(Float64, get(qp, "duration", default_duration()))
8476
with_pprof = parse(Bool, get(qp, "pprof", default_pprof()))
85-
8677
return _do_cpu_profile(n, delay, duration, with_pprof)
8778
end
8879

80+
function cpu_profile_start_endpoint(req::HTTP.Request)
81+
uri = HTTP.URI(req.target)
82+
qp = HTTP.queryparams(uri)
83+
84+
# Run the profile
85+
n = convert(Int, parse(Float64, get(qp, "n", default_n())))
86+
delay = parse(Float64, get(qp, "delay", default_delay()))
87+
return _start_cpu_profile(n, delay)
88+
end
89+
90+
function cpu_profile_stop_endpoint(req::HTTP.Request)
91+
Profile.stop_timer()
92+
@info "Stopping CPU Profiling from PerformanceProfilingHttpEndpoints"
93+
uri = HTTP.URI(req.target)
94+
qp = HTTP.queryparams(uri)
95+
with_pprof = parse(Bool, get(qp, "pprof", default_pprof()))
96+
filename = "cpu_profile"
97+
return _cpu_profile_response(filename; with_pprof)
98+
end
99+
89100
function _do_cpu_profile(n, delay, duration, with_pprof)
90101
@info "Starting CPU Profiling from PerformanceProfilingHttpEndpoints with configuration:" n delay duration
91-
92102
Profile.clear()
93-
94103
Profile.init(n, delay)
95-
96104
Profile.@profile sleep(duration)
105+
filename = "cpu_profile-duration=$duration&delay=$delay&n=$n"
106+
return _cpu_profile_response(filename; with_pprof)
107+
end
97108

98-
data = Profile.retrieve()
109+
function _start_cpu_profile(n, delay)
110+
@info "Starting CPU Profiling from PerformanceProfilingHttpEndpoints with configuration:" n delay
111+
resp = HTTP.Response(200, "CPU profiling started.")
112+
Profile.clear()
113+
Profile.init(n, delay)
114+
Profile.start_timer()
115+
return resp
116+
end
117+
118+
function _cpu_profile_response(filename; with_pprof::Bool)
99119
if with_pprof
100120
prof_name = tempname()
101121
PProf.pprof(out=prof_name, web=false)
102122
prof_name = "$prof_name.pb.gz"
103-
return _http_response(read(prof_name),
104-
"cpu_profile-duration=$duration&delay=$delay&n=$n.pb.gz")
123+
return _http_response(read(prof_name), "$filename.pb.gz")
105124
else
106125
iobuf = IOBuffer()
126+
data = Profile.retrieve()
107127
serialize(iobuf, data)
108-
return _http_response(take!(iobuf),
109-
"cpu_profile&duration=$duration&delay=$delay&n=$n.prof.bin")
128+
return _http_response(take!(iobuf), "$filename.prof.bin")
110129
end
111130
end
112131

113-
function _http_response(binary_data, filename)
114-
return HTTP.Response(200, [
115-
"Content-Type" => "application/octet-stream"
116-
"Content-Disposition" => "attachment; filename=$(repr(filename))"
117-
], body = binary_data)
118-
end
132+
###
133+
### Allocs
134+
###
119135

120136
function heap_snapshot_endpoint(req::HTTP.Request)
121137
# TODO: implement this once https://github.com/JuliaLang/julia/pull/42286 is merged
122138
end
123139

140+
default_alloc_sample_rate() = "0.0001"
141+
142+
allocs_profile_error_message() = """Need to provide query params:
143+
- duration=$(default_duration())
144+
- sample_rate=$(default_alloc_sample_rate())
145+
146+
Hint: A good goal is to shoot for around 1,000 to 10,000 samples. So if you know what
147+
duration you want to profile for, and you *already have an expectation for how much your
148+
program will allocate,* you can pick a sample_rate via `sample_rate = 1,000 / expected_allocations`.
149+
150+
For example, if you expect your program will actually perform 1 million allocations:
151+
1_000 / 1_000_000 = 0.001
152+
for `duration=30&sample_rate=0.001`
153+
154+
Note that if your sample_rate gets too large, you can really slow down the program you're
155+
profiling, and thus end up with an inaccurate profile.
156+
157+
Finally, if you think your program only allocates a small amount, you can capture *all*
158+
allocations by passing sample_rate=1.
159+
"""
160+
124161
@static if !(isdefined(Profile, :Allocs) && isdefined(PProf, :Allocs))
125162

126163
for f in (:allocations_profile_endpoint, :allocations_start_endpoint, :allocations_stop_endpoint)
@@ -170,9 +207,10 @@ end
170207

171208
function _start_alloc_profile(sample_rate)
172209
@info "Starting allocation Profiling from PerformanceProfilingHttpEndpoints with configuration:" sample_rate
210+
resp = HTTP.Response(200, "Allocation profiling started.")
173211
Profile.Allocs.clear()
174212
Profile.Allocs.start(; sample_rate)
175-
return HTTP.Response(200, "Allocation profiling started.")
213+
return resp
176214
end
177215

178216
function _stop_alloc_profile()
@@ -185,10 +223,16 @@ end
185223

186224
end # if isdefined
187225

226+
###
227+
### Server
228+
###
229+
188230
function serve_profiling_server(;addr="127.0.0.1", port=16825, verbose=false, kw...)
189231
verbose >= 0 && @info "Starting HTTP profiling server on port $port"
190232
router = HTTP.Router()
191233
HTTP.register!(router, "/profile", cpu_profile_endpoint)
234+
HTTP.register!(router, "/profile_start", cpu_profile_start_endpoint)
235+
HTTP.register!(router, "/profile_stop", cpu_profile_stop_endpoint)
192236
HTTP.register!(router, "/allocs_profile", allocations_profile_endpoint)
193237
HTTP.register!(router, "/allocs_profile_start", allocations_start_endpoint)
194238
HTTP.register!(router, "/allocs_profile_stop", allocations_stop_endpoint)
@@ -200,8 +244,13 @@ end
200244
# up profiling compilation!
201245
function __init__()
202246
precompile(serve_profiling_server, ()) || error("precompilation of package functions is not supposed to fail")
247+
203248
precompile(cpu_profile_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
249+
precompile(cpu_profile_start_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
250+
precompile(cpu_profile_stop_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
204251
precompile(_do_cpu_profile, (Int,Float64,Float64,Bool)) || error("precompilation of package functions is not supposed to fail")
252+
precompile(_start_cpu_profile, (Int,Float64,)) || error("precompilation of package functions is not supposed to fail")
253+
205254
precompile(allocations_profile_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
206255
precompile(allocations_start_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
207256
precompile(allocations_stop_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")

test/runtests.jl

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,61 @@ const url = "http://127.0.0.1:$port"
1818
@testset "CPU profiling" begin
1919
done = Threads.Atomic{Bool}(false)
2020
# Schedule some work that's known to be expensive, to profile it
21-
t = @async begin
22-
for _ in 1:200
21+
workload() = @async begin
22+
for _ in 1:1000
2323
if done[] return end
2424
InteractiveUtils.peakflops()
2525
yield() # yield to allow the tests to run
2626
end
2727
end
2828

29-
req = HTTP.get("$url/profile?duration=3&pprof=false")
30-
@test req.status == 200
31-
@test length(req.body) > 0
29+
@testset "profile endpoint" begin
30+
done[] = false
31+
t = workload()
32+
req = HTTP.get("$url/profile?duration=3&pprof=false")
33+
@test req.status == 200
34+
@test length(req.body) > 0
35+
36+
data, lidict = deserialize(IOBuffer(req.body))
37+
# Test that the profile seems like valid profile data
38+
@test data isa Vector{UInt64}
39+
@test lidict isa Dict{UInt64, Vector{Base.StackTraces.StackFrame}}
40+
41+
@info "Finished `profile` tests, waiting for peakflops workload to finish."
42+
done[] = true
43+
wait(t) # handle errors
44+
end
45+
46+
@testset "profile_start/stop endpoints" begin
47+
done[] = false
48+
t = workload()
49+
req = HTTP.get("$url/profile_start")
50+
@test req.status == 200
51+
@test String(req.body) == "CPU profiling started."
52+
53+
sleep(3) # Allow workload to run a while before we stop profiling.
3254

33-
data, lidict = deserialize(IOBuffer(req.body))
34-
# Test that the profile seems like valid profile data
35-
@test data isa Vector{UInt64}
36-
@test lidict isa Dict{UInt64, Vector{Base.StackTraces.StackFrame}}
55+
req = HTTP.get("$url/profile_stop?pprof=false")
56+
@test req.status == 200
57+
data, lidict = deserialize(IOBuffer(req.body))
58+
# Test that the profile seems like valid profile data
59+
@test data isa Vector{UInt64}
60+
@test lidict isa Dict{UInt64, Vector{Base.StackTraces.StackFrame}}
3761

38-
@info "Finished tests, waiting for peakflops workload to finish."
39-
done[] = true
40-
wait(t) # handle errors
62+
@info "Finished `profile_start/stop` tests, waiting for peakflops workload to finish."
63+
done[] = true
64+
wait(t) # handle errors
65+
66+
# We retrive data via PProf directly if `pprof=true`; make sure that path's tested.
67+
# This second call to `profile_stop` should still return the profile, even though
68+
# the profiler is already stopped, as it's `profile_start` that calls `clear()`.
69+
req = HTTP.get("$url/profile_stop?pprof=true")
70+
@test req.status == 200
71+
# Test that there's something here
72+
# TODO: actually parse the profile
73+
data = read(IOBuffer(req.body), String)
74+
@test length(data) > 100
75+
end
4176
end
4277

4378
@testset "Allocation profiling" begin

0 commit comments

Comments
 (0)