From ec523e8b5e3896d5791bb35fbe952ce60f16ec2c Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sat, 25 Oct 2025 00:02:01 -0700 Subject: [PATCH 1/2] [libc][hdrgen] Sort identifiers with leading underscores specially This makes the sorting behavior more uniform: functions and macros are always sorted (separately), not only when merging. This changes the sort order used for functions and other things sorted by their symbol names. Symbols are sorted alphabetically without regard to leading underscores, and then for identifiers that differ only in the number of leading underscores, the fewer underscores the earlier in the sort order. For the functions declared in a generated header, adjacent names with and without underscores will be grouped together without blank lines. This is implemented by factoring the name field, equality, and sorting support out of the various entity classes into a new common superclass (hdrgen.Symbol). --- libc/utils/hdrgen/hdrgen/enumeration.py | 16 ++------ libc/utils/hdrgen/hdrgen/function.py | 16 ++------ libc/utils/hdrgen/hdrgen/header.py | 14 +++++-- libc/utils/hdrgen/hdrgen/macro.py | 16 ++------ libc/utils/hdrgen/hdrgen/main.py | 1 + libc/utils/hdrgen/hdrgen/object.py | 16 ++------ libc/utils/hdrgen/hdrgen/symbol.py | 41 +++++++++++++++++++ libc/utils/hdrgen/hdrgen/type.py | 20 +++------ .../hdrgen/tests/expected_output/sorting.h | 24 +++++++++++ libc/utils/hdrgen/tests/input/sorting.yaml | 20 +++++++++ libc/utils/hdrgen/tests/test_integration.py | 7 ++++ 11 files changed, 120 insertions(+), 71 deletions(-) create mode 100644 libc/utils/hdrgen/hdrgen/symbol.py create mode 100644 libc/utils/hdrgen/tests/expected_output/sorting.h create mode 100644 libc/utils/hdrgen/tests/input/sorting.yaml diff --git a/libc/utils/hdrgen/hdrgen/enumeration.py b/libc/utils/hdrgen/hdrgen/enumeration.py index 198720826720c..1e0f64aec1eda 100644 --- a/libc/utils/hdrgen/hdrgen/enumeration.py +++ b/libc/utils/hdrgen/hdrgen/enumeration.py @@ -6,24 +6,14 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Enumeration: +class Enumeration(Symbol): def __init__(self, name, value): - self.name = name + super().__init__(name) self.value = value - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def __str__(self): if self.value != None: return f"{self.name} = {self.value}" diff --git a/libc/utils/hdrgen/hdrgen/function.py b/libc/utils/hdrgen/hdrgen/function.py index f039996584e31..4de3406cc408e 100644 --- a/libc/utils/hdrgen/hdrgen/function.py +++ b/libc/utils/hdrgen/hdrgen/function.py @@ -7,7 +7,7 @@ # ==-------------------------------------------------------------------------==# import re -from functools import total_ordering +from hdrgen.symbol import Symbol from hdrgen.type import Type @@ -37,14 +37,13 @@ NONIDENTIFIER = re.compile("[^a-zA-Z0-9_]+") -@total_ordering -class Function: +class Function(Symbol): def __init__( self, return_type, name, arguments, standards, guard=None, attributes=[] ): + super().__init__(name) assert return_type self.return_type = return_type - self.name = name self.arguments = [ arg if isinstance(arg, str) else arg["type"] for arg in arguments ] @@ -53,15 +52,6 @@ def __init__( self.guard = guard self.attributes = attributes or [] - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def signature_types(self): def collapse(type_string): assert type_string diff --git a/libc/utils/hdrgen/hdrgen/header.py b/libc/utils/hdrgen/hdrgen/header.py index 558ee58469207..f592327f06ad6 100644 --- a/libc/utils/hdrgen/hdrgen/header.py +++ b/libc/utils/hdrgen/hdrgen/header.py @@ -147,8 +147,8 @@ def includes(self): } | { COMPILER_HEADER_TYPES.get( - typ.type_name, - PurePosixPath("llvm-libc-types") / f"{typ.type_name}.h", + typ.name, + PurePosixPath("llvm-libc-types") / f"{typ.name}.h", ) for typ in self.all_types() } @@ -227,7 +227,7 @@ def relpath(file): ) ] - for macro in self.macros: + for macro in sorted(self.macros): # When there is nothing to define, the Macro object converts to str # as an empty string. Don't emit a blank line for those cases. if str(macro): @@ -242,7 +242,12 @@ def relpath(file): content.append("\n__BEGIN_C_DECLS\n") current_guard = None - for function in self.functions: + last_name = None + for function in sorted(self.functions): + # If the last function's name was the same after underscores, + # elide the blank line between the declarations. + if last_name == function.name_without_underscores(): + content.pop() if function.guard == None and current_guard == None: content.append(str(function) + " __NOEXCEPT;") content.append("") @@ -264,6 +269,7 @@ def relpath(file): content.append(f"#ifdef {current_guard}") content.append(str(function) + " __NOEXCEPT;") content.append("") + last_name = function.name_without_underscores() if current_guard != None: content.pop() content.append(f"#endif // {current_guard}") diff --git a/libc/utils/hdrgen/hdrgen/macro.py b/libc/utils/hdrgen/hdrgen/macro.py index e42e82845694d..4664d9fb00494 100644 --- a/libc/utils/hdrgen/hdrgen/macro.py +++ b/libc/utils/hdrgen/hdrgen/macro.py @@ -6,25 +6,15 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Macro: +class Macro(Symbol): def __init__(self, name, value=None, header=None): - self.name = name + super().__init__(name) self.value = value self.header = header - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def __str__(self): if self.header != None: return "" diff --git a/libc/utils/hdrgen/hdrgen/main.py b/libc/utils/hdrgen/hdrgen/main.py index 25df41e506a1f..c12e89ef771d1 100755 --- a/libc/utils/hdrgen/hdrgen/main.py +++ b/libc/utils/hdrgen/hdrgen/main.py @@ -105,6 +105,7 @@ def merge_from(paths): return 2 header.merge(merge_from_header) + assert header.name, f"`header: name.h` line is required in {yaml_file}" return header if args.json: diff --git a/libc/utils/hdrgen/hdrgen/object.py b/libc/utils/hdrgen/hdrgen/object.py index a311c37168d60..a2ab496bed013 100644 --- a/libc/utils/hdrgen/hdrgen/object.py +++ b/libc/utils/hdrgen/hdrgen/object.py @@ -6,23 +6,13 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Object: +class Object(Symbol): def __init__(self, name, type): - self.name = name + super().__init__(name) self.type = type - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def __str__(self): return f"extern {self.type} {self.name};" diff --git a/libc/utils/hdrgen/hdrgen/symbol.py b/libc/utils/hdrgen/hdrgen/symbol.py new file mode 100644 index 0000000000000..28e9def128e47 --- /dev/null +++ b/libc/utils/hdrgen/hdrgen/symbol.py @@ -0,0 +1,41 @@ +# ====-- Symbol class for libc function headers----------------*- python -*--==# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ==-------------------------------------------------------------------------==# + +from functools import total_ordering + + +@total_ordering +class Symbol: + """ + Symbol is the common superclass for each kind of entity named by an + identifier. It provides the name field, and defines sort ordering, + hashing, and equality based only on the name. The sorting is pretty + presentation order for identifiers, which is to say it first sorts + lexically but ignores leading underscores and secondarily sorts with the + fewest underscores first. + """ + + def __init__(self, name): + assert name + self.name = name + + def __eq__(self, other): + return self.name == other.name + + def __hash__(self): + return self.name.__hash__() + + def name_without_underscores(self): + return self.name.lstrip("_") + + def name_sort_key(self): + ident = self.name_without_underscores() + return ident, len(self.name) - len(ident) + + def __lt__(self, other): + return self.name_sort_key() < other.name_sort_key() diff --git a/libc/utils/hdrgen/hdrgen/type.py b/libc/utils/hdrgen/hdrgen/type.py index 0c0af8569c61e..20c1881a9379a 100644 --- a/libc/utils/hdrgen/hdrgen/type.py +++ b/libc/utils/hdrgen/hdrgen/type.py @@ -6,20 +6,10 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Type: - def __init__(self, type_name): - assert type_name - self.type_name = type_name - - def __eq__(self, other): - return self.type_name == other.type_name - - def __lt__(self, other): - return self.type_name < other.type_name - - def __hash__(self): - return self.type_name.__hash__() +class Type(Symbol): + # A type so far carries no specific information beyond its name. + def __init__(self, name): + super().__init__(name) diff --git a/libc/utils/hdrgen/tests/expected_output/sorting.h b/libc/utils/hdrgen/tests/expected_output/sorting.h new file mode 100644 index 0000000000000..a091a421b2c3f --- /dev/null +++ b/libc/utils/hdrgen/tests/expected_output/sorting.h @@ -0,0 +1,24 @@ +//===-- Standard C header --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LLVM_LIBC_SORTING_H +#define _LLVM_LIBC_SORTING_H + +#include "__llvm-libc-common.h" + +__BEGIN_C_DECLS + +void func_with_aliases(int) __NOEXCEPT; +void _func_with_aliases(int) __NOEXCEPT; +void __func_with_aliases(int) __NOEXCEPT; + +void gunk(const char *) __NOEXCEPT; + +__END_C_DECLS + +#endif // _LLVM_LIBC_SORTING_H diff --git a/libc/utils/hdrgen/tests/input/sorting.yaml b/libc/utils/hdrgen/tests/input/sorting.yaml new file mode 100644 index 0000000000000..3c26cde9e6c41 --- /dev/null +++ b/libc/utils/hdrgen/tests/input/sorting.yaml @@ -0,0 +1,20 @@ +header: sorting.h +standards: + - stdc +functions: + - name: gunk + return_type: void + arguments: + - type: const char * + - name: _func_with_aliases + return_type: void + arguments: + - type: int + - name: func_with_aliases + return_type: void + arguments: + - type: int + - name: __func_with_aliases + return_type: void + arguments: + - type: int diff --git a/libc/utils/hdrgen/tests/test_integration.py b/libc/utils/hdrgen/tests/test_integration.py index c6e76d826a3a4..b975d8ff007b1 100644 --- a/libc/utils/hdrgen/tests/test_integration.py +++ b/libc/utils/hdrgen/tests/test_integration.py @@ -75,6 +75,13 @@ def test_generate_json(self): self.compare_files(output_file, expected_output_file) + def test_sorting(self): + yaml_file = self.source_dir / "input" / "sorting.yaml" + expected_output_file = self.source_dir / "expected_output" / "sorting.h" + output_file = self.output_dir / "sorting.h" + self.run_script(yaml_file, output_file) + self.compare_files(output_file, expected_output_file) + def main(): parser = argparse.ArgumentParser(description="TestHeaderGenIntegration arguments") From 2670f301a5ef83e7f533e2f0f615164d4b0a99d1 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 30 Oct 2025 10:20:38 -0700 Subject: [PATCH 2/2] Quote "NULL" string values in YAML --- libc/include/locale.yaml | 2 +- libc/include/stdio.yaml | 2 +- libc/include/stdlib.yaml | 2 +- libc/include/string.yaml | 2 +- libc/include/time.yaml | 2 +- libc/include/wchar.yaml | 8 ++++---- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libc/include/locale.yaml b/libc/include/locale.yaml index 4566984ad83af..3c3998eb07aa4 100644 --- a/libc/include/locale.yaml +++ b/libc/include/locale.yaml @@ -1,7 +1,7 @@ header: locale.h header_template: locale.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: locale_t diff --git a/libc/include/stdio.yaml b/libc/include/stdio.yaml index 394437ba3bbcd..c50b4ecb0bf08 100644 --- a/libc/include/stdio.yaml +++ b/libc/include/stdio.yaml @@ -1,7 +1,7 @@ header: stdio.h header_template: stdio.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h - macro_name: stdout macro_value: stdout diff --git a/libc/include/stdlib.yaml b/libc/include/stdlib.yaml index 3b2ff13c684b1..495eb7e1317b6 100644 --- a/libc/include/stdlib.yaml +++ b/libc/include/stdlib.yaml @@ -5,7 +5,7 @@ standards: merge_yaml_files: - stdlib-malloc.yaml macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: __atexithandler_t diff --git a/libc/include/string.yaml b/libc/include/string.yaml index 0bf297ee747a4..22010f4afa812 100644 --- a/libc/include/string.yaml +++ b/libc/include/string.yaml @@ -2,7 +2,7 @@ header: string.h standards: - stdc macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: locale_t diff --git a/libc/include/time.yaml b/libc/include/time.yaml index 2f8024298fad1..88e50d1288238 100644 --- a/libc/include/time.yaml +++ b/libc/include/time.yaml @@ -1,7 +1,7 @@ header: time.h header_template: time.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: struct_timeval diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index b8a0a748cd3ad..c8b9e21b56b28 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -1,7 +1,7 @@ header: wchar.h header_template: wchar.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: FILE @@ -188,8 +188,8 @@ functions: standards: - stdc return_type: wchar_t * - arguments: - - type: wchar_t *__restrict + arguments: + - type: wchar_t *__restrict - type: const wchar_t *__restrict - type: size_t - name: wmemmove @@ -212,7 +212,7 @@ functions: standards: - stdc return_type: wchar_t * - arguments: + arguments: - type: wchar_t *__restrict - type: const wchar_t *__restrict - name: wcslcat