@@ -7,6 +7,8 @@ module Profile
77
88import Base. StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
99
10+ const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
11+
1012# deprecated functions: use `getdict` instead
1113lookup (ip:: UInt ) = lookup (convert (Ptr{Cvoid}, ip))
1214
4143
4244Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be
4345stored per thread. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long
44- list of instruction pointers. Note that 5 spaces for instruction pointers per backtrace are used to store metadata and a marker.
45- Current settings can be obtained by calling this function with no arguments, and each can be set independently using keywords
46- or in the order `(n, delay)`.
46+ list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
47+ NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
48+ using keywords or in the order `(n, delay)`.
4749
4850!!! compat "Julia 1.8"
4951 As of Julia 1.8, this function allocates space for `n` instruction pointers per thread being profiled.
263265function get_task_ids (data:: Vector{<:Unsigned} , threadid = nothing )
264266 taskids = UInt[]
265267 for i in length (data): - 1 : 1
266- if data[i] == 0 # find start of block
267- if isnothing (threadid) || data[i - 4 ] == threadid
268- taskid = data[i - 3 ]
268+ if is_block_end ( data, i)
269+ if isnothing (threadid) || data[i - 5 ] == threadid
270+ taskid = data[i - 4 ]
269271 ! in (taskid, taskids) && push! (taskids, taskid)
270272 end
271273 end
@@ -276,16 +278,23 @@ end
276278function get_thread_ids (data:: Vector{<:Unsigned} , taskid = nothing )
277279 threadids = Int[]
278280 for i in length (data): - 1 : 1
279- if data[i] == 0 # find start of block
280- if isnothing (taskid) || data[i - 3 ] == taskid
281- threadid = data[i - 4 ]
281+ if is_block_end ( data, i)
282+ if isnothing (taskid) || data[i - 4 ] == taskid
283+ threadid = data[i - 5 ]
282284 ! in (threadid, threadids) && push! (threadids, threadid)
283285 end
284286 end
285287 end
286288 return sort (threadids)
287289end
288290
291+ function is_block_end (data, i)
292+ i < nmeta + 1 && return false
293+ # 32-bit linux has been seen to have rogue NULL ips, so we use two to indicate block end, where the 2nd is the
294+ # actual end index
295+ return data[i] == 0 && data[i - 1 ] == 0
296+ end
297+
289298"""
290299 print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...)
291300
@@ -509,22 +518,17 @@ function fetch(;include_meta = false)
509518 else
510519 nblocks = 0
511520 for i = 2 : length (data)
512- if data[i] == 0 && in (data[i - 1 ], [1 ,2 ])
513- # detect block ends and count them
514- # linux 32 has been seen to have rogue ips equal to 0 so also check for the previous entry looking like an idle
515- # state metadata entry which can only be 1 or 2
521+ if is_block_end (data, i) # detect block ends and count them
516522 nblocks += 1
517523 end
518524 end
519- nmeta = 4 # number of metadata fields (threadid, taskid, cpu_cycle_clock, thread_sleeping)
520- data_stripped = Vector {UInt} (undef, length (data) - (nblocks * nmeta))
525+ data_stripped = Vector {UInt} (undef, length (data) - (nblocks * (nmeta + 1 )))
521526 j = length (data_stripped)
522527 i = length (data)
523528 while i > 0 && j > 0
524529 data_stripped[j] = data[i]
525- if i > 1 && data[i] == 0 && in (data[i - 1 ], [1 ,2 ])
526- # detect block end (same approach as above)
527- i -= nmeta
530+ if is_block_end (data, i)
531+ i -= (nmeta + 1 ) # metadata fields and the extra NULL IP
528532 end
529533 i -= 1
530534 j -= 1
@@ -551,14 +555,14 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
551555 skip = false
552556 nsleeping = 0
553557 for i in startframe: - 1 : 1
554- startframe - 1 <= i <= startframe - 4 && continue # skip metadata (it's read ahead below)
558+ startframe - 1 <= i <= startframe - (nmeta + 1 ) && continue # skip metadata (it's read ahead below) and extra block-end NULL IP
555559 ip = data[i]
556- if i > 1 && ip == 0 && in (data[i - 1 ], [ 1 , 2 ]) # check that the field next to the zero is the idle metadata entry
560+ if is_block_end (data, i)
557561 # read metadata
558- thread_sleeping = data[i - 1 ] - 1 # subtract 1 as state is incremented to avoid being equal to 0
559- # cpu_cycle_clock = data[i - 2 ]
560- taskid = data[i - 3 ]
561- threadid = data[i - 4 ]
562+ thread_sleeping = data[i - 2 ] - 1 # subtract 1 as state is incremented to avoid being equal to 0
563+ # cpu_cycle_clock = data[i - 3 ]
564+ taskid = data[i - 4 ]
565+ threadid = data[i - 5 ]
562566 if ! in (threadid, threads) || ! in (taskid, tasks)
563567 skip = true
564568 continue
@@ -799,14 +803,14 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
799803 skip = false
800804 nsleeping = 0
801805 for i in startframe: - 1 : 1
802- startframe - 1 <= i <= startframe - 4 && continue # skip metadata (its read ahead below)
806+ startframe - 1 <= i <= startframe - (nmeta + 1 ) && continue # skip metadata (its read ahead below) and extra block end NULL IP
803807 ip = all[i]
804- if i > 1 && ip == 0 && in (all[i - 1 ], [ 1 , 2 ]) # check that the field next to the zero is the idle metadata entry
808+ if is_block_end (all, i)
805809 # read metadata
806- thread_sleeping = all[i - 1 ] - 1 # subtract 1 as state is incremented to avoid being equal to 0
807- # cpu_cycle_clock = all[i - 2 ]
808- taskid = all[i - 3 ]
809- threadid = all[i - 4 ]
810+ thread_sleeping = all[i - 2 ] - 1 # subtract 1 as state is incremented to avoid being equal to 0
811+ # cpu_cycle_clock = all[i - 3 ]
812+ taskid = all[i - 4 ]
813+ threadid = all[i - 5 ]
810814 if ! in (threadid, threads) || ! in (taskid, tasks)
811815 skip = true
812816 continue
0 commit comments