From cf1cab9082d261d6f13b88edb4f01ea98e01152b Mon Sep 17 00:00:00 2001 From: Geoffrey Thomas Date: Mon, 18 Aug 2025 17:39:17 -0400 Subject: [PATCH 1/2] CI experiment: Build on latest Debian Mostly just want to see what the symbol validation failures are.... --- cpython-unix/base.Dockerfile | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/cpython-unix/base.Dockerfile b/cpython-unix/base.Dockerfile index 7e666e045..96aba06a5 100644 --- a/cpython-unix/base.Dockerfile +++ b/cpython-unix/base.Dockerfile @@ -1,5 +1,5 @@ -# Debian Jessie. -FROM debian@sha256:32ad5050caffb2c7e969dac873bce2c370015c2256ff984b70c1c08b3a2816a0 +# Debian 13 "Trixie" +FROM debian:trixie MAINTAINER Gregory Szorc RUN groupadd -g 1000 build && \ @@ -16,12 +16,7 @@ ENV HOME=/build \ CMD ["/bin/bash", "--login"] WORKDIR '/build' - -# Jessie's signing keys expired in late 2022. So need to add [trusted=yes] to force trust. -# Jessie stopped publishing snapshots in March 2023. -RUN for s in debian_jessie debian_jessie-updates debian-security_jessie/updates; do \ - echo "deb [trusted=yes] http://snapshot.debian.org/archive/${s%_*}/20230322T152120Z/ ${s#*_} main"; \ - done > /etc/apt/sources.list && \ +RUN \ ( echo 'quiet "true";'; \ echo 'APT::Get::Assume-Yes "true";'; \ echo 'APT::Install-Recommends "false";'; \ From 74c5ba482804c318307a94aad274ae9684e0d099 Mon Sep 17 00:00:00 2001 From: Geoffrey Thomas Date: Thu, 21 Aug 2025 02:20:23 +0000 Subject: [PATCH 2/2] Add a tool for reducing glibc versions --- cpython-unix/antiquator/.gitignore | 10 ++ cpython-unix/antiquator/Makefile | 23 +++ cpython-unix/antiquator/README.md | 40 +++++ cpython-unix/antiquator/bits/mman-shared.h | 7 + cpython-unix/antiquator/clang.config | 2 + cpython-unix/antiquator/elf-init.c | 107 ++++++++++++ cpython-unix/antiquator/elf-initfini.h | 9 + cpython-unix/antiquator/gcc.specs | 37 ++++ cpython-unix/antiquator/libc_start_main.c | 25 +++ .../antiquator/make_shadow_libraries.py | 163 ++++++++++++++++++ cpython-unix/antiquator/wrap-linker | 92 ++++++++++ 11 files changed, 515 insertions(+) create mode 100644 cpython-unix/antiquator/.gitignore create mode 100644 cpython-unix/antiquator/Makefile create mode 100644 cpython-unix/antiquator/README.md create mode 100644 cpython-unix/antiquator/bits/mman-shared.h create mode 100644 cpython-unix/antiquator/clang.config create mode 100644 cpython-unix/antiquator/elf-init.c create mode 100644 cpython-unix/antiquator/elf-initfini.h create mode 100644 cpython-unix/antiquator/gcc.specs create mode 100644 cpython-unix/antiquator/libc_start_main.c create mode 100755 cpython-unix/antiquator/make_shadow_libraries.py create mode 100755 cpython-unix/antiquator/wrap-linker diff --git a/cpython-unix/antiquator/.gitignore b/cpython-unix/antiquator/.gitignore new file mode 100644 index 000000000..84f3c3ee6 --- /dev/null +++ b/cpython-unix/antiquator/.gitignore @@ -0,0 +1,10 @@ +lib*_placeholder.c +lib*_placeholder.versions +lib*_placeholder.so +*.o +*.a +glibc-*/ +glibc_*.debian.tar.xz +glibc_*.dsc +glibc_*.orig.tar.xz +glibc_*.orig.tar.xz.asc diff --git a/cpython-unix/antiquator/Makefile b/cpython-unix/antiquator/Makefile new file mode 100644 index 000000000..2b722097e --- /dev/null +++ b/cpython-unix/antiquator/Makefile @@ -0,0 +1,23 @@ +GLIBC ?= glibc-2.39/ + +LIBS := libanl libc libdl libm libpthread libresolv librt libutil +HELPERS := elf-init.o libc_start_main.o + +all: ${LIBS:=_placeholder.so} libantiquator_helpers.a(${HELPERS}) + +%.so: %.c %.versions + ${CC} -fPIC -shared -o $@ $< -Wl,--version-script,$*.versions + +${LIBS:=_placeholder.c} ${LIBS:=_placeholder.versions} &: make_shadow_libraries.py ${GLIBC} + ./make_shadow_libraries.py ${GLIBC} . + +clean: + -${RM} ${LIBS:=_placeholder.so} ${LIBS:=_placeholder.c} ${LIBS:=._placeholder.versions} libc_start_main.o + +elf-init.o: CFLAGS += -I. + +glibc-%/: + apt-get source glibc + +.PHONY: all clean +.SECONDARY: diff --git a/cpython-unix/antiquator/README.md b/cpython-unix/antiquator/README.md new file mode 100644 index 000000000..0fac41989 --- /dev/null +++ b/cpython-unix/antiquator/README.md @@ -0,0 +1,40 @@ +Antiquator - Use a newer glibc to build for older glibc versions +=== + +This is a set of utilities to enable building binaries using a newer +glibc that can run on older glibc versions: if a symbol exists in an +older version, prefer that implementation, and if a symbol does not +exist in an older version, enable weak linking against it (runtime value +is NULL if not found). + +First, run `make` to build shadow libraries from glibc sources. If you +already have a glibc checkout, you can use `make GLIBC=/path/to/glibc`. +This should be as new as your compile-time glibc; newer will probably +work fine but is probably not helpful, since you won't pick up any +symbols from it. + +Then set the `antiquator` environment variable to this directory. (This +needs to be an exported environment variable.) + +Finally, use `gcc -specs ${antiquator}/gcc.spec` or `clang --config +${antiquator}/clang.config` for compiling and linking (e.g. add those options +to `CFLAGS` and `LDFLAGS`). + +The clang implementation uses `-fuse-ld` internally; to pick the actual +linker, you can use `-Wl,-fuse-ld=...`. + +Requirements +--- + +You need a relatively recent toolchain (at least binutils 2.35+) and +patchelf. + +lld seems to not work as a linker. This might be an lld bug. (bfd ld and +gold both seem to work.) + +Credits +--- + +elf-init.c is taken from glibc 2.33, the last version that had it. It is +unmodified from the version that was in glibc and used under the license +exception stated in the file. diff --git a/cpython-unix/antiquator/bits/mman-shared.h b/cpython-unix/antiquator/bits/mman-shared.h new file mode 100644 index 000000000..d0616a674 --- /dev/null +++ b/cpython-unix/antiquator/bits/mman-shared.h @@ -0,0 +1,7 @@ +#include_next + +#define weaken(sym) extern __typeof(sym) sym __attribute__((weak)) + +#ifdef _GNU_SOURCE +weaken(memfd_create); +#endif diff --git a/cpython-unix/antiquator/clang.config b/cpython-unix/antiquator/clang.config new file mode 100644 index 000000000..703420a2c --- /dev/null +++ b/cpython-unix/antiquator/clang.config @@ -0,0 +1,2 @@ +-isystem +-fuse-ld=/wrap-linker diff --git a/cpython-unix/antiquator/elf-init.c b/cpython-unix/antiquator/elf-init.c new file mode 100644 index 000000000..6e96ab7fc --- /dev/null +++ b/cpython-unix/antiquator/elf-init.c @@ -0,0 +1,107 @@ +/* Startup support for ELF initializers/finalizers in the main executable. + Copyright (C) 2002-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + +/* These magic symbols are provided by the linker. */ +extern void (*__preinit_array_start []) (int, char **, char **) + attribute_hidden; +extern void (*__preinit_array_end []) (int, char **, char **) + attribute_hidden; +extern void (*__init_array_start []) (int, char **, char **) + attribute_hidden; +extern void (*__init_array_end []) (int, char **, char **) + attribute_hidden; +extern void (*__fini_array_start []) (void) attribute_hidden; +extern void (*__fini_array_end []) (void) attribute_hidden; + + +#if ELF_INITFINI +/* These function symbols are provided for the .init/.fini section entry + points automagically by the linker. */ +extern void _init (void); +extern void _fini (void); +#endif + + +/* These functions are passed to __libc_start_main by the startup code. + These get statically linked into each program. For dynamically linked + programs, this module will come from libc_nonshared.a and differs from + the libc.a module in that it doesn't call the preinit array. */ + + +void +__libc_csu_init (int argc, char **argv, char **envp) +{ + /* For dynamically linked executables the preinit array is executed by + the dynamic linker (before initializing any shared object). */ + +#ifndef LIBC_NONSHARED + /* For static executables, preinit happens right before init. */ + { + const size_t size = __preinit_array_end - __preinit_array_start; + size_t i; + for (i = 0; i < size; i++) + (*__preinit_array_start [i]) (argc, argv, envp); + } +#endif + +#if ELF_INITFINI + _init (); +#endif + + const size_t size = __init_array_end - __init_array_start; + for (size_t i = 0; i < size; i++) + (*__init_array_start [i]) (argc, argv, envp); +} + +/* This function should not be used anymore. We run the executable's + destructor now just like any other. We cannot remove the function, + though. */ +void +__libc_csu_fini (void) +{ +#ifndef LIBC_NONSHARED + size_t i = __fini_array_end - __fini_array_start; + while (i-- > 0) + (*__fini_array_start [i]) (); + +# if ELF_INITFINI + _fini (); +# endif +#endif +} diff --git a/cpython-unix/antiquator/elf-initfini.h b/cpython-unix/antiquator/elf-initfini.h new file mode 100644 index 000000000..a1bdd4e56 --- /dev/null +++ b/cpython-unix/antiquator/elf-initfini.h @@ -0,0 +1,9 @@ +// Somewhat confusing name - this means "this code is going into +// libc_nonshared.a", the library of static code that is linked when +// you're using libc.so. So effectively it means you _are_ targeting a +// shared link and not a static link. +#define LIBC_NONSHARED 1 + +#define ELF_INITFINI 1 + +#define attribute_hidden __attribute__((visibility("hidden"))) diff --git a/cpython-unix/antiquator/gcc.specs b/cpython-unix/antiquator/gcc.specs new file mode 100644 index 000000000..71b89aa68 --- /dev/null +++ b/cpython-unix/antiquator/gcc.specs @@ -0,0 +1,37 @@ +# Spec file for GCC. `gcc -specs antiquator.spec` +# +# There are two ways to add to a spec intead of replacing it. One is to +# use +, which appends. But we need -lc_placeholder to come before -lc. +# The usual trick for that (documented in the GCC manual) is +# %rename lib old_lib +# *lib: -lc_placeholder %(old_lib) +# but GCC gets mad if old_lib already exists, and contrary to the +# documentation you cannot delete a spec, and if you set -specs in both +# CFLAGS and LDFLAGS, then this spec file runs twice if you build a +# program in one command with $(CC) $(CFLAGS) $(LDFLAGS), which autoconf +# does for test programs, causing it to fail. So, instead take advantage +# of %(mflib), an unused variable in the built-in specs in a spot we +# like (from the old "Mudflap" precursor to ASan, which was removed from +# GCC over a decade ago but they left this variable in the specs). Just +# in case anyone else had the same clever idea, we append to it, but we +# expect it to be empty. (Everything we add is idempotent, just wasteful +# to run twice.) +*cpp: ++ -isystem %:getenv(antiquator /) + +*mflib: ++ -L %:getenv(antiquator /) \ +--push-state --as-needed \ +-lantiquator_helpers \ +-lanl_placeholder \ +-lc_placeholder \ +-ldl_placeholder \ +-lm_placeholder \ +-lpthread_placeholder \ +-lresolv_placeholder \ +-lrt_placeholder \ +-lutil_placeholder \ +--pop-state + +*post_link: + +%:getenv(antiquator /wrap-linker) -fuse-ld=true %{o*} diff --git a/cpython-unix/antiquator/libc_start_main.c b/cpython-unix/antiquator/libc_start_main.c new file mode 100644 index 000000000..d59a9aa2a --- /dev/null +++ b/cpython-unix/antiquator/libc_start_main.c @@ -0,0 +1,25 @@ +#include + +extern int __libc_csu_init(int argc, char **argv, char **envp); + +#define LIBC_START_MAIN_ARGS \ + int (*main) (int, char **, char **), \ + int argc, char **argv, \ + __typeof (main) init, \ + void (*fini) (void), \ + void (*rtld_fini) (void), void *stack_end + +extern int real_libc_start_main(LIBC_START_MAIN_ARGS); + +// The static linker needs to find this under the name +// __libc_start_main, so that crt1.o calls this one instead of the real +// one in libc. But after we rename real_libc_start_main with patchelf +// to __libc_start_main, the dynamic linker needs to _not_ find this one +// and instead find the real one. To accomplish this, we give it a +// non-default symbol version that does not match the symbol version +// that we actually want. +__attribute__((symver("__libc_start_main@ANTIQUATOR_SHIM"))) +int __libc_start_main(LIBC_START_MAIN_ARGS) { + return real_libc_start_main(main, argc, argv, __libc_csu_init, fini, rtld_fini, stack_end); +} + diff --git a/cpython-unix/antiquator/make_shadow_libraries.py b/cpython-unix/antiquator/make_shadow_libraries.py new file mode 100755 index 000000000..2ef36c260 --- /dev/null +++ b/cpython-unix/antiquator/make_shadow_libraries.py @@ -0,0 +1,163 @@ +#!/usr/bin/env -S uv run --no-project +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "tree-sitter", +# "tree-sitter-c", +# ] +# /// + +from collections.abc import Generator +import dataclasses +import os +import pathlib +import sys +import textwrap +from typing import Self + +import tree_sitter +import tree_sitter_c + +C_LANGUAGE = tree_sitter.Language(tree_sitter_c.language()) +PARSER = tree_sitter.Parser(C_LANGUAGE) + + +class QueryDataclass: + def __init_subclass__(cls, query, **kwargs): + super().__init_subclass__(**kwargs) + cls._QUERY = tree_sitter.Query(C_LANGUAGE, query) + + @classmethod + def matches(cls, node: tree_sitter.Node) -> Generator[Self]: + qc = tree_sitter.QueryCursor(cls._QUERY) + fields = dataclasses.fields(cls) + for _, m in qc.matches(node): + yield cls(*(m[f.name][0].text.decode() for f in fields)) + +class GlibcVersion: + def __init__(self, ver: str): + self._ver = ver + if not ver.startswith("GLIBC_"): + self._components = None + return + self._components = tuple(int(i) for i in ver.removeprefix("GLIBC_").split("_")) + if self._components < (2, 2, 5): # TODO x86-64 only + self._components = (2, 2, 5) + + def __lt__(self, other: Self): + if self._components and other._components: + return self._components < other._components + else: + return str(self) < str(other) + + def __str__(self): + if self._components: + return "GLIBC_" + ".".join(str(i) for i in self._components) + else: + return self._ver + + def __repr__(self): + return f"" + + +@dataclasses.dataclass +class Symbol( + QueryDataclass, + query="""(call_expression + function: (identifier) @macro + (#any-of? @macro "versioned_symbol" "compat_symbol") + arguments: (argument_list + (identifier) @lib + (identifier) @local + (identifier) @symbol + (identifier) @version + ) + )""", +): + macro: str + lib: str + local: str + symbol: str + version: str + + +@dataclasses.dataclass +class Weaken( + QueryDataclass, + query="""(call_expression + function: (identifier) @macro + (#eq? @macro "weaken") + arguments: (argument_list + (identifier) @symbol + ) + )""", +): + symbol: str + + +if __name__ == "__main__": + if len(sys.argv) != 3: + sys.exit("usage: make_shadow_libraries.py path/to/glibc/source path/to/output") + glibc = pathlib.Path(sys.argv[1]) + output = pathlib.Path(sys.argv[2]) + + versioned = {} + compat = [] + + for file in glibc.glob("**/*.c"): + t = PARSER.parse(file.read_bytes()) + for symbol in Symbol.matches(t.root_node): + match symbol: + case Symbol(macro="compat_symbol", lib="libc", symbol="dlinfo", version="GLIBC_2_3_3"): + # typo in glibc source + symbol.lib = "libdl" + case Symbol(symbol="__libdl_version_placeholder"): + # used to populate the stub libdl, not relevant to us + continue + case Symbol(version="GLIBC_PRIVATE"): + # not relevant to us, hopefully + continue + + if symbol.macro == "versioned_symbol": + versioned[symbol.local] = symbol + else: + print(symbol) + compat.append(symbol) + + versioned["__real_libc_start_main"] = None + compat.append(Symbol("compat_symbol", "libc", "__real_libc_start_main", "real_libc_start_main", "GLIBC_2_2_5")) + + # glibc before 2.30 doesn't properly handle weak versioned symbols, + # so we have to de-version them. (Note the order of where + # _dl_lookup_symbol_x in elf/dl-lookup.c handles STB_WEAK vs. + # versioned symbols.) + weak_unversioned = set() + for file in output.glob("**/*.h"): + t = PARSER.parse(file.read_bytes()) + for weaken in Weaken.matches(t.root_node): + weak_unversioned.add(weaken.symbol) + + libs: dict[str, dict[str, GlibcVersion]] = {} + for symbol in compat: + if symbol.local in versioned: + lib = libs.setdefault(symbol.lib, {}) + name = symbol.symbol + version = GlibcVersion(symbol.version) + if name in lib: + # min? max? max up to glibc 2.17 or whatever? + lib[name] = min(lib[name], version) + else: + lib[name] = version + + for lib, symbols in libs.items(): + with open(output / f"{lib}_placeholder.c", "w") as f: + print("__attribute__((", file=f) + for symbol, version in symbols.items(): + print(f'symver("{symbol}@@{version}"),', file=f) + print(")) void placeholder(void) {}", file=f) + if lib == "libc": + for symbol in weak_unversioned: + print(f"void {symbol}(void) {{}}", file=f) + with open(output / f"{lib}_placeholder.versions", "w") as f: + for version in set(str(version) for version in symbols.values()): + print(f"{version} {{}};", file=f) diff --git a/cpython-unix/antiquator/wrap-linker b/cpython-unix/antiquator/wrap-linker new file mode 100755 index 000000000..fed03b774 --- /dev/null +++ b/cpython-unix/antiquator/wrap-linker @@ -0,0 +1,92 @@ +#!/bin/bash + +set -eu + +linker_cmdline=(ld) +outputs=() +while [ "$#" -gt 0 ]; do + arg="$1" + shift + if [ "${arg::9}" = "-fuse-ld=" ]; then + linker_cmdline[0]="${arg:9}" + else + if [ "${linker_cmdline[-1]}" = "-o" ]; then + outputs+=("$arg") + fi + linker_cmdline+=("$arg") + fi +done + +if [ "${#outputs[@]}" -eq 0 ]; then + outputs=(a.out) +fi + +set -x + +"${linker_cmdline[0]}" \ + -L"${antiquator}" \ + --push-state --as-needed \ + -lantiquator_helpers \ + -lanl_placeholder \ + -lc_placeholder \ + -ldl_placeholder \ + -lm_placeholder \ + -lpthread_placeholder \ + -lresolv_placeholder \ + -lrt_placeholder \ + -lutil_placeholder \ + --pop-state \ + "${linker_cmdline[@]:1}" + +patchelf \ + --replace-needed libanl_placeholder.so libanl.so.1 \ + --replace-needed libc_placeholder.so libc.so.6 \ + --replace-needed libdl_placeholder.so libdl.so.2 \ + --replace-needed libm_placeholder.so libm.so.6 \ + --replace-needed libpthread_placeholder.so libpthread.so.0 \ + --replace-needed libresolv_placeholder.so libresolv.so.2 \ + --replace-needed librt_placeholder.so librt.so.1 \ + --replace-needed libutil_placeholder.so libutil.so.1 \ + --rename-dynamic-symbols <(echo {real,_}_libc_start_main) \ + "${outputs[@]}" + +# The following code is unneceesary because of the weak symbol +# de-versioning in make_shadow_libraries.py, but it works. If we ever +# raise our glibc baseline to 2.30+ and the linkers still aren't fixed, +# we can re-enable this (... or rewrite it in not-bash). +exit 0 +# Set VER_FLG_WEAK on the vernaux entry for a symbol version if all the +# symbols of that version are weak. It turns out that linkers don't +# actually have support for setting this flag, but glibc processes it +# fine. (gold has a comment noting the oversight.) +for output in "${outputs[@]}"; do + # First, note down which symbol versions we see referenced from at + # least one weak symbol and nothing else. + declare -A seen strong_seen + while read -a line; do + if [ "${line[6]:-}" == "UND" ] && [[ "${line[7]:-}" == *@* ]]; then + version=${line[7]##*@} + seen["$version"]=1 + if [ "${line[4]}" != "WEAK" ]; then + strong_seen["$version"]=1 + fi + fi + done < <(readelf --dyn-syms --wide "$output") + + verneed=0 + readelf --version-info "$output" | while read -a line; do + if [ "${line[*]::3}" = "Version needs section" ]; then + verneed=1 + elif [ "${#line[@]}" -eq 0 ]; then + verneed=0 + elif [ "$verneed" -eq 1 ] && [ "${line[2]}" = "Offset:" ]; then + base_offset=$((line[3])) + elif [ "$verneed" -eq 1 ] && [ "${line[1]}" = "Name:" ]; then + if [ "${seen[${line[2]}]:-0}" -eq 1 ] && [ "${strong_seen[${line[2]}]:-0}" -eq 0 ]; then + # vna_flags is 4 bytes into Elfxx_Vernaux + offset=$((base_offset + "${line[0]%:}" + 4)) + echo 0200 | xxd -r -p | dd of="$output" bs=1 seek="$offset" conv=notrunc status=none + fi + fi + done +done