Skip to content

Commit 45748b8

Browse files
authored
[Profile] fix overhead counts in format=:flat (#49824)
Regression caused by #41742, which inverted the loop without inverting the logic. And fix a number of related formatting mistakes. Fix #49732
1 parent ee0199f commit 45748b8

File tree

1 file changed

+46
-34
lines changed

1 file changed

+46
-34
lines changed

stdlib/Profile/src/Profile.jl

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -227,11 +227,15 @@ function print(io::IO,
227227
elseif Sys.iswindows() && in(groupby, [:thread, [:task, :thread], [:thread, :task]])
228228
@warn "Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report"
229229
end
230-
any_nosamples = false
231-
println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
232-
println(io, "=========================================================")
230+
any_nosamples = true
231+
if format === :tree
232+
Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
233+
Base.print(io, "=========================================================\n")
234+
end
233235
if groupby == [:task, :thread]
234-
for taskid in intersect(get_task_ids(data), tasks)
236+
taskids = intersect(get_task_ids(data), tasks)
237+
isempty(taskids) && (any_nosamples = true)
238+
for taskid in taskids
235239
threadids = intersect(get_thread_ids(data, taskid), threads)
236240
if length(threadids) == 0
237241
any_nosamples = true
@@ -247,7 +251,9 @@ function print(io::IO,
247251
end
248252
end
249253
elseif groupby == [:thread, :task]
250-
for threadid in intersect(get_thread_ids(data), threads)
254+
threadids = intersect(get_thread_ids(data), threads)
255+
isempty(threadids) && (any_nosamples = true)
256+
for threadid in threadids
251257
taskids = intersect(get_task_ids(data, threadid), tasks)
252258
if length(taskids) == 0
253259
any_nosamples = true
@@ -264,15 +270,19 @@ function print(io::IO,
264270
end
265271
elseif groupby === :task
266272
threads = 1:typemax(Int)
267-
for taskid in intersect(get_task_ids(data), tasks)
273+
taskids = intersect(get_task_ids(data), tasks)
274+
isempty(taskids) && (any_nosamples = true)
275+
for taskid in taskids
268276
printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
269277
nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
270278
nosamples && (any_nosamples = true)
271279
println(io)
272280
end
273281
elseif groupby === :thread
274282
tasks = 1:typemax(UInt)
275-
for threadid in intersect(get_thread_ids(data), threads)
283+
threadids = intersect(get_thread_ids(data), threads)
284+
isempty(threadids) && (any_nosamples = true)
285+
for threadid in threadids
276286
printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
277287
nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
278288
nosamples && (any_nosamples = true)
@@ -387,6 +397,7 @@ function getdict!(dict::LineInfoDict, data::Vector{UInt})
387397
n_unique_ips = length(unique_ips)
388398
n_unique_ips == 0 && return dict
389399
iplookups = similar(unique_ips, Vector{StackFrame})
400+
sort!(unique_ips) # help each thread to get a disjoint set of libraries, as much if possible
390401
@sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.threadpoolsize(), RoundUp))
391402
Threads.@spawn begin
392403
for i in indexes_part
@@ -653,7 +664,7 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
653664
m = Int[]
654665
lilist_idx = Dict{T, Int}()
655666
recursive = Set{T}()
656-
first = true
667+
leaf = 0
657668
totalshots = 0
658669
startframe = length(data)
659670
skip = false
@@ -677,12 +688,16 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
677688
skip = false
678689
totalshots += 1
679690
empty!(recursive)
680-
first = true
691+
if leaf != 0
692+
m[leaf] += 1
693+
end
694+
leaf = 0
681695
startframe = i
682696
elseif !skip
683697
frames = lidict[ip]
684698
nframes = (frames isa Vector ? length(frames) : 1)
685-
for j = 1:nframes
699+
# the last lookup is the non-inlined root frame, the first is the inlined leaf frame
700+
for j = nframes:-1:1
686701
frame = (frames isa Vector ? frames[j] : frames)
687702
!C && frame.from_c && continue
688703
key = (T === UInt64 ? ip : frame)
@@ -696,10 +711,7 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
696711
push!(recursive, key)
697712
n[idx] += 1
698713
end
699-
if first
700-
m[idx] += 1
701-
first = false
702-
end
714+
leaf = idx
703715
end
704716
end
705717
end
@@ -710,30 +722,31 @@ end
710722
function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
711723
threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
712724
lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
725+
if false # optional: drop the "non-interpretable" ones
726+
keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
727+
lilist = lilist[keep]
728+
n = n[keep]
729+
m = m[keep]
730+
end
713731
util_perc = (1 - (nsleeping / totalshots)) * 100
732+
filenamemap = Dict{Symbol,String}()
714733
if isempty(lilist)
715734
if is_subsection
716735
Base.print(io, "Total snapshots: ")
717736
printstyled(io, "$(totalshots)", color=Base.warn_color())
718-
Base.println(io, " (", round(Int, util_perc), "% utilization)")
737+
Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n")
719738
else
720739
warning_empty()
721740
end
722741
return true
723742
end
724-
if false # optional: drop the "non-interpretable" ones
725-
keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
726-
lilist = lilist[keep]
727-
n = n[keep]
728-
m = m[keep]
729-
end
730-
filenamemap = Dict{Symbol,String}()
731-
print_flat(io, lilist, n, m, cols, filenamemap, fmt)
732-
Base.print(io, "Total snapshots: ", totalshots, " (", round(Int, util_perc), "% utilization")
743+
is_subsection || print_flat(io, lilist, n, m, cols, filenamemap, fmt)
744+
Base.print(io, "Total snapshots: ", totalshots, ". Utilization: ", round(Int, util_perc), "%")
733745
if is_subsection
734-
println(io, ")")
746+
println(io)
747+
print_flat(io, lilist, n, m, cols, filenamemap, fmt)
735748
else
736-
println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)")
749+
Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
737750
end
738751
return false
739752
end
@@ -1054,8 +1067,8 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat
10541067
filenamemap = Dict{Symbol,String}()
10551068
worklist = [(bt, 0, 0, "")]
10561069
if !is_subsection
1057-
println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
1058-
println(io, "=========================================================")
1070+
Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
1071+
Base.print(io, "=========================================================\n")
10591072
end
10601073
while !isempty(worklist)
10611074
(bt, level, noisefloor, str) = popfirst!(worklist)
@@ -1101,24 +1114,23 @@ function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, Line
11011114
root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
11021115
end
11031116
util_perc = (1 - (nsleeping / root.count)) * 100
1104-
!is_subsection && print_tree(io, root, cols, fmt, is_subsection)
1117+
is_subsection || print_tree(io, root, cols, fmt, is_subsection)
11051118
if isempty(root.down)
11061119
if is_subsection
11071120
Base.print(io, "Total snapshots: ")
11081121
printstyled(io, "$(root.count)", color=Base.warn_color())
1109-
Base.println(io, ". Utilization: ", round(Int, util_perc), "%")
1122+
Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n")
11101123
else
11111124
warning_empty()
11121125
end
11131126
return true
1114-
else
1115-
Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
11161127
end
1128+
Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
11171129
if is_subsection
1118-
println(io)
1130+
Base.println(io)
11191131
print_tree(io, root, cols, fmt, is_subsection)
11201132
else
1121-
println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task")
1133+
Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
11221134
end
11231135
return false
11241136
end

0 commit comments

Comments
 (0)