Skip to content

Commit 9118259

Browse files
captain5050acmel
authored andcommitted
perf jevents: Compress the pmu_events_table
The pmu_events array requires 15 pointers per entry which in position independent code need relocating. Change the array to be an array of offsets within a big C string. Only the offset of the first variable is required, subsequent variables are stored in order after the \0 terminator (requiring a byte per variable rather than 4 bytes per offset). The file size savings are: no jevents - the same 19,788,464bytes x86 jevents - ~16.7% file size saving 23,744,288bytes vs 28,502,632bytes all jevents - ~19.5% file size saving 24,469,056bytes vs 30,379,920bytes default build options plus NO_LIBBFD=1. For example, the x86 build savings come from .rela.dyn and .data.rel.ro becoming smaller by 3,157,032bytes and 3,042,016bytes respectively. .rodata increases by 1,432,448bytes, giving an overall 4,766,600bytes saving. To make metric strings more shareable, the topic is changed from say 'skx metrics' to just 'metrics'. To try to help with the memory layout the pmu_events are ordered as used by perf qsort comparator functions. Signed-off-by: Ian Rogers <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: James Clark <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: John Garry <[email protected]> Cc: Kan Liang <[email protected]> Cc: Leo Yan <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mike Leach <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Ravi Bangoria <[email protected]> Cc: Stephane Eranian <[email protected]> Cc: Will Deacon <[email protected]> Cc: Xing Zhengjun <[email protected]> Cc: [email protected] Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent d3abd7b commit 9118259

File tree

1 file changed

+162
-45
lines changed

1 file changed

+162
-45
lines changed

tools/perf/pmu-events/jevents.py

Lines changed: 162 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
import json
77
import os
88
import sys
9-
from typing import (Callable, Optional, Sequence)
9+
from typing import (Callable, Dict, Optional, Sequence, Set, Tuple)
10+
import collections
1011

1112
# Global command line arguments.
1213
_args = None
@@ -20,6 +21,19 @@
2021
_close_table = False
2122
# Events to write out when the table is closed
2223
_pending_events = []
24+
# Global BigCString shared by all structures.
25+
_bcs = None
26+
# Order specific JsonEvent attributes will be visited.
27+
_json_event_attributes = [
28+
# cmp_sevent related attributes.
29+
'name', 'pmu', 'topic', 'desc', 'metric_name', 'metric_group',
30+
# Seems useful, put it early.
31+
'event',
32+
# Short things in alphabetical order.
33+
'aggr_mode', 'compat', 'deprecated', 'perpkg', 'unit',
34+
# Longer things (the last won't be iterated over during decompress).
35+
'metric_constraint', 'metric_expr', 'long_desc'
36+
]
2337

2438

2539
def removesuffix(s: str, suffix: str) -> str:
@@ -39,6 +53,66 @@ def file_name_to_table_name(parents: Sequence[str], dirname: str) -> str:
3953
tblname += '_' + dirname
4054
return tblname.replace('-', '_')
4155

56+
def c_len(s: str) -> int:
57+
"""Return the length of s a C string
58+
59+
This doesn't handle all escape characters properly. It first assumes
60+
all \ are for escaping, it then adjusts as it will have over counted
61+
\\. The code uses \000 rather than \0 as a terminator as an adjacent
62+
number would be folded into a string of \0 (ie. "\0" + "5" doesn't
63+
equal a terminator followed by the number 5 but the escape of
64+
\05). The code adjusts for \000 but not properly for all octal, hex
65+
or unicode values.
66+
"""
67+
try:
68+
utf = s.encode(encoding='utf-8',errors='strict')
69+
except:
70+
print(f'broken string {s}')
71+
raise
72+
return len(utf) - utf.count(b'\\') + utf.count(b'\\\\') - (utf.count(b'\\000') * 2)
73+
74+
class BigCString:
75+
"""A class to hold many strings concatenated together.
76+
77+
Generating a large number of stand-alone C strings creates a large
78+
number of relocations in position independent code. The BigCString
79+
is a helper for this case. It builds a single string which within it
80+
are all the other C strings (to avoid memory issues the string
81+
itself is held as a list of strings). The offsets within the big
82+
string are recorded and when stored to disk these don't need
83+
relocation.
84+
"""
85+
strings: Set[str]
86+
big_string: Sequence[str]
87+
offsets: Dict[str, int]
88+
89+
def __init__(self):
90+
self.strings = set()
91+
92+
def add(self, s: str) -> None:
93+
"""Called to add to the big string."""
94+
self.strings.add(s)
95+
96+
def compute(self) -> None:
97+
"""Called once all strings are added to compute the string and offsets."""
98+
99+
# big_string_offset is the current location within the C string
100+
# being appended to - comments, etc. don't count. big_string is
101+
# the string contents represented as a list. Strings are immutable
102+
# in Python and so appending to one causes memory issues, while
103+
# lists are mutable.
104+
big_string_offset = 0
105+
self.big_string = []
106+
self.offsets = {}
107+
# Emit all strings in a sorted manner.
108+
for s in sorted(self.strings):
109+
self.offsets[s] = big_string_offset
110+
self.big_string.append(f'/* offset={big_string_offset} */ "')
111+
self.big_string.append(s)
112+
self.big_string.append('"\n')
113+
big_string_offset += c_len(s)
114+
115+
_bcs = BigCString()
42116

43117
class JsonEvent:
44118
"""Representation of an event loaded from a json file dictionary."""
@@ -203,26 +277,18 @@ def __repr__(self) -> str:
203277
s += f'\t{attr} = {value},\n'
204278
return s + '}'
205279

280+
def build_c_string(self) -> str:
281+
s = ''
282+
for attr in _json_event_attributes:
283+
x = getattr(self, attr)
284+
s += f'{x}\\000' if x else '\\000'
285+
return s
286+
206287
def to_c_string(self) -> str:
207288
"""Representation of the event as a C struct initializer."""
208289

209-
def attr_string(attr: str, value: str) -> str:
210-
return f'\t.{attr} = \"{value}\",\n'
211-
212-
def str_if_present(self, attr: str) -> str:
213-
if not getattr(self, attr):
214-
return ''
215-
return attr_string(attr, getattr(self, attr))
216-
217-
s = '{\n'
218-
for attr in [
219-
'aggr_mode', 'compat', 'deprecated', 'desc', 'event', 'long_desc',
220-
'metric_constraint', 'metric_expr', 'metric_group', 'metric_name',
221-
'name', 'perpkg', 'pmu', 'topic', 'unit'
222-
]:
223-
s += str_if_present(self, attr)
224-
s += '},\n'
225-
return s
290+
s = self.build_c_string()
291+
return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'
226292

227293

228294
def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
@@ -237,7 +303,6 @@ def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
237303
event.topic = topic
238304
return result
239305

240-
241306
def preprocess_arch_std_files(archpath: str) -> None:
242307
"""Read in all architecture standard events."""
243308
global _arch_std_events
@@ -253,7 +318,7 @@ def print_events_table_prefix(tblname: str) -> None:
253318
global _close_table
254319
if _close_table:
255320
raise IOError('Printing table prefix but last table has no suffix')
256-
_args.output_file.write(f'static const struct pmu_event {tblname}[] = {{\n')
321+
_args.output_file.write(f'static const struct compact_pmu_event {tblname}[] = {{\n')
257322
_close_table = True
258323

259324

@@ -268,13 +333,13 @@ def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
268333
def print_events_table_suffix() -> None:
269334
"""Optionally close events table."""
270335

271-
def event_cmp_key(j: JsonEvent):
272-
def fix_none(s: str):
336+
def event_cmp_key(j: JsonEvent) -> Tuple[bool, str, str, str, str]:
337+
def fix_none(s: Optional[str]) -> str:
273338
if s is None:
274339
return ''
275340
return s
276341

277-
return (not j.desc is None, fix_none(j.topic), fix_none(j.name), fix_none(j.pmu),
342+
return (j.desc is not None, fix_none(j.topic), fix_none(j.name), fix_none(j.pmu),
278343
fix_none(j.metric_name))
279344

280345
global _close_table
@@ -286,23 +351,37 @@ def fix_none(s: str):
286351
_args.output_file.write(event.to_c_string())
287352
_pending_events = []
288353

289-
_args.output_file.write("""{
290-
\t.name = 0,
291-
\t.event = 0,
292-
\t.desc = 0,
293-
},
294-
};
295-
""")
354+
_args.output_file.write('};\n\n')
296355
_close_table = False
297356

357+
def get_topic(topic: str) -> str:
358+
if topic.endswith('metrics.json'):
359+
return 'metrics'
360+
return removesuffix(topic, '.json').replace('-', ' ')
361+
362+
def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
363+
364+
if item.is_dir():
365+
return
366+
367+
# base dir or too deep
368+
level = len(parents)
369+
if level == 0 or level > 4:
370+
return
371+
372+
# Ignore other directories. If the file name does not have a .json
373+
# extension, ignore it. It could be a readme.txt for instance.
374+
if not item.is_file() or not item.name.endswith('.json'):
375+
return
376+
377+
topic = get_topic(item.name)
378+
for event in read_json_events(item.path, topic):
379+
_bcs.add(event.build_c_string())
298380

299381
def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
300382
"""Process a JSON file during the main walk."""
301383
global _sys_event_tables
302384

303-
def get_topic(topic: str) -> str:
304-
return removesuffix(topic, '.json').replace('-', ' ')
305-
306385
def is_leaf_dir(path: str) -> bool:
307386
for item in os.scandir(path):
308387
if item.is_dir():
@@ -337,7 +416,8 @@ def print_mapping_table(archs: Sequence[str]) -> None:
337416
_args.output_file.write("""
338417
/* Struct used to make the PMU event table implementation opaque to callers. */
339418
struct pmu_events_table {
340-
const struct pmu_event *entries;
419+
const struct compact_pmu_event *entries;
420+
size_t length;
341421
};
342422
343423
/*
@@ -365,7 +445,10 @@ def print_mapping_table(archs: Sequence[str]) -> None:
365445
_args.output_file.write("""{
366446
\t.arch = "testarch",
367447
\t.cpuid = "testcpu",
368-
\t.table = { pme_test_soc_cpu },
448+
\t.table = {
449+
\t.entries = pme_test_soc_cpu,
450+
\t.length = ARRAY_SIZE(pme_test_soc_cpu),
451+
\t}
369452
},
370453
""")
371454
else:
@@ -380,15 +463,18 @@ def print_mapping_table(archs: Sequence[str]) -> None:
380463
_args.output_file.write(f"""{{
381464
\t.arch = "{arch}",
382465
\t.cpuid = "{cpuid}",
383-
\t.table = {{ {tblname} }}
466+
\t.table = {{
467+
\t\t.entries = {tblname},
468+
\t\t.length = ARRAY_SIZE({tblname})
469+
\t}}
384470
}},
385471
""")
386472
first = False
387473

388474
_args.output_file.write("""{
389475
\t.arch = 0,
390476
\t.cpuid = 0,
391-
\t.table = { 0 },
477+
\t.table = { 0, 0 },
392478
}
393479
};
394480
""")
@@ -406,23 +492,41 @@ def print_system_mapping_table() -> None:
406492
""")
407493
for tblname in _sys_event_tables:
408494
_args.output_file.write(f"""\t{{
409-
\t\t.table = {{ {tblname} }},
495+
\t\t.table = {{
496+
\t\t\t.entries = {tblname},
497+
\t\t\t.length = ARRAY_SIZE({tblname})
498+
\t\t}},
410499
\t\t.name = \"{tblname}\",
411500
\t}},
412501
""")
413502
_args.output_file.write("""\t{
414-
\t\t.table = { 0 }
503+
\t\t.table = { 0, 0 }
415504
\t},
416505
};
417506
418-
int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn,
507+
static void decompress(int offset, struct pmu_event *pe)
508+
{
509+
\tconst char *p = &big_c_string[offset];
510+
""")
511+
for attr in _json_event_attributes:
512+
_args.output_file.write(f"""
513+
\tpe->{attr} = (*p == '\\0' ? NULL : p);
514+
""")
515+
if attr == _json_event_attributes[-1]:
516+
continue
517+
_args.output_file.write('\twhile (*p++);')
518+
_args.output_file.write("""}
519+
520+
int pmu_events_table_for_each_event(const struct pmu_events_table *table,
521+
pmu_event_iter_fn fn,
419522
void *data)
420523
{
421-
for (const struct pmu_event *pe = &table->entries[0];
422-
pe->name || pe->metric_group || pe->metric_name;
423-
pe++) {
424-
int ret = fn(pe, table, data);
524+
for (size_t i = 0; i < table->length; i++) {
525+
struct pmu_event pe;
526+
int ret;
425527
528+
decompress(table->entries[i].offset, &pe);
529+
ret = fn(&pe, table, data);
426530
if (ret)
427531
return ret;
428532
}
@@ -531,7 +635,7 @@ def ftw(path: str, parents: Sequence[str],
531635
help='Root of tree containing architecture directories containing json files'
532636
)
533637
ap.add_argument(
534-
'output_file', type=argparse.FileType('w'), nargs='?', default=sys.stdout)
638+
'output_file', type=argparse.FileType('w', encoding='utf-8'), nargs='?', default=sys.stdout)
535639
_args = ap.parse_args()
536640

537641
_args.output_file.write("""
@@ -541,6 +645,10 @@ def ftw(path: str, parents: Sequence[str],
541645
#include <string.h>
542646
#include <stddef.h>
543647
648+
struct compact_pmu_event {
649+
int offset;
650+
};
651+
544652
""")
545653
archs = []
546654
for item in os.scandir(_args.starting_dir):
@@ -556,6 +664,15 @@ def ftw(path: str, parents: Sequence[str],
556664
for arch in archs:
557665
arch_path = f'{_args.starting_dir}/{arch}'
558666
preprocess_arch_std_files(arch_path)
667+
ftw(arch_path, [], preprocess_one_file)
668+
669+
_bcs.compute()
670+
_args.output_file.write('static const char *const big_c_string =\n')
671+
for s in _bcs.big_string:
672+
_args.output_file.write(s)
673+
_args.output_file.write(';\n\n')
674+
for arch in archs:
675+
arch_path = f'{_args.starting_dir}/{arch}'
559676
ftw(arch_path, [], process_one_file)
560677
print_events_table_suffix()
561678

0 commit comments

Comments
 (0)