Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 84 additions & 35 deletions src/PerformanceProfilingHttpEndpoints.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,21 @@ using Serialization: serialize
#
#----------------------------------------------------------

function _http_response(binary_data, filename)
return HTTP.Response(200, [
"Content-Type" => "application/octet-stream"
"Content-Disposition" => "attachment; filename=$(repr(filename))"
], body = binary_data)
end

###
### CPU
###

default_n() = "1e8"
default_delay() = "0.01"
default_duration() = "10.0"
default_pprof() = "true"
default_alloc_sample_rate() = "0.0001"

cpu_profile_error_message() = """Need to provide query params:
- duration=$(default_duration())
Expand All @@ -50,24 +60,6 @@ controlled by `n=`. If you assume an average stack depth of 100, and you were ai

The default `n` is 1e8, which should be big enough for most profiles.
"""
allocs_profile_error_message() = """Need to provide query params:
- duration=$(default_duration())
- sample_rate=$(default_alloc_sample_rate())

Hint: A good goal is to shoot for around 1,000 to 10,000 samples. So if you know what
duration you want to profile for, and you *already have an expectation for how much your
program will allocate,* you can pick a sample_rate via `sample_rate = 1,000 / expected_allocations`.

For example, if you expect your program will actually perform 1 million allocations:
1_000 / 1_000_000 = 0.001
for `duration=30&sample_rate=0.001`

Note that if your sample_rate gets too large, you can really slow down the program you're
profiling, and thus end up with an inaccurate profile.

Finally, if you think your program only allocates a small amount, you can capture *all*
allocations by passing sample_rate=1.
"""

function cpu_profile_endpoint(req::HTTP.Request)
uri = HTTP.URI(req.target)
Expand All @@ -82,45 +74,90 @@ function cpu_profile_endpoint(req::HTTP.Request)
delay = parse(Float64, get(qp, "delay", default_delay()))
duration = parse(Float64, get(qp, "duration", default_duration()))
with_pprof = parse(Bool, get(qp, "pprof", default_pprof()))

return _do_cpu_profile(n, delay, duration, with_pprof)
end

function cpu_profile_start_endpoint(req::HTTP.Request)
uri = HTTP.URI(req.target)
qp = HTTP.queryparams(uri)

# Run the profile
n = convert(Int, parse(Float64, get(qp, "n", default_n())))
delay = parse(Float64, get(qp, "delay", default_delay()))
return _start_cpu_profile(n, delay)
end

function cpu_profile_stop_endpoint(req::HTTP.Request)
Profile.stop_timer()
@info "Stopping CPU Profiling from PerformanceProfilingHttpEndpoints"
uri = HTTP.URI(req.target)
qp = HTTP.queryparams(uri)
with_pprof = parse(Bool, get(qp, "pprof", default_pprof()))
filename = "cpu_profile"
return _cpu_profile_response(filename; with_pprof)
end

function _do_cpu_profile(n, delay, duration, with_pprof)
@info "Starting CPU Profiling from PerformanceProfilingHttpEndpoints with configuration:" n delay duration

Profile.clear()

Profile.init(n, delay)

Profile.@profile sleep(duration)
filename = "cpu_profile-duration=$duration&delay=$delay&n=$n"
return _cpu_profile_response(filename; with_pprof)
end

data = Profile.retrieve()
function _start_cpu_profile(n, delay)
@info "Starting CPU Profiling from PerformanceProfilingHttpEndpoints with configuration:" n delay
resp = HTTP.Response(200, "CPU profiling started.")
Profile.clear()
Profile.init(n, delay)
Profile.start_timer()
return resp
end

function _cpu_profile_response(filename; with_pprof::Bool)
if with_pprof
prof_name = tempname()
PProf.pprof(out=prof_name, web=false)
prof_name = "$prof_name.pb.gz"
return _http_response(read(prof_name),
"cpu_profile-duration=$duration&delay=$delay&n=$n.pb.gz")
return _http_response(read(prof_name), "$filename.pb.gz")
else
iobuf = IOBuffer()
data = Profile.retrieve()
serialize(iobuf, data)
return _http_response(take!(iobuf),
"cpu_profile&duration=$duration&delay=$delay&n=$n.prof.bin")
return _http_response(take!(iobuf), "$filename.prof.bin")
end
end

function _http_response(binary_data, filename)
return HTTP.Response(200, [
"Content-Type" => "application/octet-stream"
"Content-Disposition" => "attachment; filename=$(repr(filename))"
], body = binary_data)
end
###
### Allocs
###

function heap_snapshot_endpoint(req::HTTP.Request)
# TODO: implement this once https://github.com/JuliaLang/julia/pull/42286 is merged
end

default_alloc_sample_rate() = "0.0001"

allocs_profile_error_message() = """Need to provide query params:
- duration=$(default_duration())
- sample_rate=$(default_alloc_sample_rate())

Hint: A good goal is to shoot for around 1,000 to 10,000 samples. So if you know what
duration you want to profile for, and you *already have an expectation for how much your
program will allocate,* you can pick a sample_rate via `sample_rate = 1,000 / expected_allocations`.

For example, if you expect your program will actually perform 1 million allocations:
1_000 / 1_000_000 = 0.001
for `duration=30&sample_rate=0.001`

Note that if your sample_rate gets too large, you can really slow down the program you're
profiling, and thus end up with an inaccurate profile.

Finally, if you think your program only allocates a small amount, you can capture *all*
allocations by passing sample_rate=1.
"""

@static if !(isdefined(Profile, :Allocs) && isdefined(PProf, :Allocs))

for f in (:allocations_profile_endpoint, :allocations_start_endpoint, :allocations_stop_endpoint)
Expand Down Expand Up @@ -170,9 +207,10 @@ end

function _start_alloc_profile(sample_rate)
@info "Starting allocation Profiling from PerformanceProfilingHttpEndpoints with configuration:" sample_rate
resp = HTTP.Response(200, "Allocation profiling started.")
Profile.Allocs.clear()
Profile.Allocs.start(; sample_rate)
return HTTP.Response(200, "Allocation profiling started.")
return resp
Comment on lines +210 to +213
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

genius

end

function _stop_alloc_profile()
Expand All @@ -185,10 +223,16 @@ end

end # if isdefined

###
### Server
###

function serve_profiling_server(;addr="127.0.0.1", port=16825, verbose=false, kw...)
verbose >= 0 && @info "Starting HTTP profiling server on port $port"
router = HTTP.Router()
HTTP.register!(router, "/profile", cpu_profile_endpoint)
HTTP.register!(router, "/profile_start", cpu_profile_start_endpoint)
HTTP.register!(router, "/profile_stop", cpu_profile_stop_endpoint)
HTTP.register!(router, "/allocs_profile", allocations_profile_endpoint)
HTTP.register!(router, "/allocs_profile_start", allocations_start_endpoint)
HTTP.register!(router, "/allocs_profile_stop", allocations_stop_endpoint)
Expand All @@ -200,8 +244,13 @@ end
# up profiling compilation!
function __init__()
precompile(serve_profiling_server, ()) || error("precompilation of package functions is not supposed to fail")

precompile(cpu_profile_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
precompile(cpu_profile_start_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
precompile(cpu_profile_stop_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
precompile(_do_cpu_profile, (Int,Float64,Float64,Bool)) || error("precompilation of package functions is not supposed to fail")
precompile(_start_cpu_profile, (Int,Float64,)) || error("precompilation of package functions is not supposed to fail")

precompile(allocations_profile_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
precompile(allocations_start_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
precompile(allocations_stop_endpoint, (HTTP.Request,)) || error("precompilation of package functions is not supposed to fail")
Expand Down
59 changes: 47 additions & 12 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,61 @@ const url = "http://127.0.0.1:$port"
@testset "CPU profiling" begin
done = Threads.Atomic{Bool}(false)
# Schedule some work that's known to be expensive, to profile it
t = @async begin
for _ in 1:200
workload() = @async begin
for _ in 1:1000
if done[] return end
InteractiveUtils.peakflops()
yield() # yield to allow the tests to run
end
end

req = HTTP.get("$url/profile?duration=3&pprof=false")
@test req.status == 200
@test length(req.body) > 0
@testset "profile endpoint" begin
done[] = false
t = workload()
req = HTTP.get("$url/profile?duration=3&pprof=false")
@test req.status == 200
@test length(req.body) > 0

data, lidict = deserialize(IOBuffer(req.body))
# Test that the profile seems like valid profile data
@test data isa Vector{UInt64}
@test lidict isa Dict{UInt64, Vector{Base.StackTraces.StackFrame}}

@info "Finished `profile` tests, waiting for peakflops workload to finish."
done[] = true
wait(t) # handle errors
end

@testset "profile_start/stop endpoints" begin
done[] = false
t = workload()
req = HTTP.get("$url/profile_start")
@test req.status == 200
@test String(req.body) == "CPU profiling started."

sleep(3) # Allow workload to run a while before we stop profiling.

data, lidict = deserialize(IOBuffer(req.body))
# Test that the profile seems like valid profile data
@test data isa Vector{UInt64}
@test lidict isa Dict{UInt64, Vector{Base.StackTraces.StackFrame}}
req = HTTP.get("$url/profile_stop?pprof=false")
@test req.status == 200
data, lidict = deserialize(IOBuffer(req.body))
# Test that the profile seems like valid profile data
@test data isa Vector{UInt64}
@test lidict isa Dict{UInt64, Vector{Base.StackTraces.StackFrame}}

@info "Finished tests, waiting for peakflops workload to finish."
done[] = true
wait(t) # handle errors
@info "Finished `profile_start/stop` tests, waiting for peakflops workload to finish."
done[] = true
wait(t) # handle errors

# We retrive data via PProf directly if `pprof=true`; make sure that path's tested.
# This second call to `profile_stop` should still return the profile, even though
# the profiler is already stopped, as it's `profile_start` that calls `clear()`.
req = HTTP.get("$url/profile_stop?pprof=true")
@test req.status == 200
# Test that there's something here
# TODO: actually parse the profile
data = read(IOBuffer(req.body), String)
@test length(data) > 100
end
end

@testset "Allocation profiling" begin
Expand Down