Skip to content

Commit 56c31cd

Browse files
German Gomezacmel
authored andcommitted
perf arm-spe: Implement find_snapshot callback
The head pointer of the AUX buffer managed by the arm_spe_pmu.c driver is not monotonically increasing, therefore the find_snapshot callback is needed in order to find the trace data within the AUX buffer and avoid wasting space in the perf.data file. The pointer is assumed to have wrapped if the buffer contains non-zero data at the end. If it has wrapped, the entire contents of the AUX buffer are stored in the perf.data file. Otherwise only the data up to the head pointer is stored. Reviewed-by: James Clark <[email protected]> Reviewed-by: Leo Yan <[email protected]> Signed-off-by: German Gomez <[email protected]> Acked-by: Namhyung Kim <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: John Garry <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Poirier <[email protected]> Cc: Will Deacon <[email protected]> Cc: [email protected] Link: https://lore.kernel.org/r/[email protected] Tested-by: Leo Yan <[email protected]> Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 0901b56 commit 56c31cd

File tree

1 file changed

+145
-0
lines changed

1 file changed

+145
-0
lines changed

tools/perf/arch/arm64/util/arm-spe.c

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "../../../util/auxtrace.h"
2424
#include "../../../util/record.h"
2525
#include "../../../util/arm-spe.h"
26+
#include <tools/libc_compat.h> // reallocarray
2627

2728
#define KiB(x) ((x) * 1024)
2829
#define MiB(x) ((x) * 1024 * 1024)
@@ -31,6 +32,8 @@ struct arm_spe_recording {
3132
struct auxtrace_record itr;
3233
struct perf_pmu *arm_spe_pmu;
3334
struct evlist *evlist;
35+
int wrapped_cnt;
36+
bool *wrapped;
3437
};
3538

3639
static void arm_spe_set_timestamp(struct auxtrace_record *itr,
@@ -299,6 +302,146 @@ static int arm_spe_snapshot_finish(struct auxtrace_record *itr)
299302
return -EINVAL;
300303
}
301304

305+
static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx)
306+
{
307+
bool *wrapped;
308+
int cnt = ptr->wrapped_cnt, new_cnt, i;
309+
310+
/*
311+
* No need to allocate, so return early.
312+
*/
313+
if (idx < cnt)
314+
return 0;
315+
316+
/*
317+
* Make ptr->wrapped as big as idx.
318+
*/
319+
new_cnt = idx + 1;
320+
321+
/*
322+
* Free'ed in arm_spe_recording_free().
323+
*/
324+
wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool));
325+
if (!wrapped)
326+
return -ENOMEM;
327+
328+
/*
329+
* init new allocated values.
330+
*/
331+
for (i = cnt; i < new_cnt; i++)
332+
wrapped[i] = false;
333+
334+
ptr->wrapped_cnt = new_cnt;
335+
ptr->wrapped = wrapped;
336+
337+
return 0;
338+
}
339+
340+
static bool arm_spe_buffer_has_wrapped(unsigned char *buffer,
341+
size_t buffer_size, u64 head)
342+
{
343+
u64 i, watermark;
344+
u64 *buf = (u64 *)buffer;
345+
size_t buf_size = buffer_size;
346+
347+
/*
348+
* Defensively handle the case where head might be continually increasing - if its value is
349+
* equal or greater than the size of the ring buffer, then we can safely determine it has
350+
* wrapped around. Otherwise, continue to detect if head might have wrapped.
351+
*/
352+
if (head >= buffer_size)
353+
return true;
354+
355+
/*
356+
* We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer.
357+
*/
358+
watermark = buf_size - 512;
359+
360+
/*
361+
* The value of head is somewhere within the size of the ring buffer. This can be that there
362+
* hasn't been enough data to fill the ring buffer yet or the trace time was so long that
363+
* head has numerically wrapped around. To find we need to check if we have data at the
364+
* very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed
365+
* out and there is a fresh mapping with every new session.
366+
*/
367+
368+
/*
369+
* head is less than 512 byte from the end of the ring buffer.
370+
*/
371+
if (head > watermark)
372+
watermark = head;
373+
374+
/*
375+
* Speed things up by using 64 bit transactions (see "u64 *buf" above)
376+
*/
377+
watermark /= sizeof(u64);
378+
buf_size /= sizeof(u64);
379+
380+
/*
381+
* If we find trace data at the end of the ring buffer, head has been there and has
382+
* numerically wrapped around at least once.
383+
*/
384+
for (i = watermark; i < buf_size; i++)
385+
if (buf[i])
386+
return true;
387+
388+
return false;
389+
}
390+
391+
static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx,
392+
struct auxtrace_mmap *mm, unsigned char *data,
393+
u64 *head, u64 *old)
394+
{
395+
int err;
396+
bool wrapped;
397+
struct arm_spe_recording *ptr =
398+
container_of(itr, struct arm_spe_recording, itr);
399+
400+
/*
401+
* Allocate memory to keep track of wrapping if this is the first
402+
* time we deal with this *mm.
403+
*/
404+
if (idx >= ptr->wrapped_cnt) {
405+
err = arm_spe_alloc_wrapped_array(ptr, idx);
406+
if (err)
407+
return err;
408+
}
409+
410+
/*
411+
* Check to see if *head has wrapped around. If it hasn't only the
412+
* amount of data between *head and *old is snapshot'ed to avoid
413+
* bloating the perf.data file with zeros. But as soon as *head has
414+
* wrapped around the entire size of the AUX ring buffer it taken.
415+
*/
416+
wrapped = ptr->wrapped[idx];
417+
if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) {
418+
wrapped = true;
419+
ptr->wrapped[idx] = true;
420+
}
421+
422+
pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
423+
__func__, idx, (size_t)*old, (size_t)*head, mm->len);
424+
425+
/*
426+
* No wrap has occurred, we can just use *head and *old.
427+
*/
428+
if (!wrapped)
429+
return 0;
430+
431+
/*
432+
* *head has wrapped around - adjust *head and *old to pickup the
433+
* entire content of the AUX buffer.
434+
*/
435+
if (*head >= mm->len) {
436+
*old = *head - mm->len;
437+
} else {
438+
*head += mm->len;
439+
*old = *head - mm->len;
440+
}
441+
442+
return 0;
443+
}
444+
302445
static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused)
303446
{
304447
struct timespec ts;
@@ -313,6 +456,7 @@ static void arm_spe_recording_free(struct auxtrace_record *itr)
313456
struct arm_spe_recording *sper =
314457
container_of(itr, struct arm_spe_recording, itr);
315458

459+
free(sper->wrapped);
316460
free(sper);
317461
}
318462

@@ -336,6 +480,7 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
336480
sper->itr.pmu = arm_spe_pmu;
337481
sper->itr.snapshot_start = arm_spe_snapshot_start;
338482
sper->itr.snapshot_finish = arm_spe_snapshot_finish;
483+
sper->itr.find_snapshot = arm_spe_find_snapshot;
339484
sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options;
340485
sper->itr.recording_options = arm_spe_recording_options;
341486
sper->itr.info_priv_size = arm_spe_info_priv_size;

0 commit comments

Comments
 (0)