From 3543d8d1a1d8334ded8a2265e941dfadc8ce2f0e Mon Sep 17 00:00:00 2001 From: Greg Curtis Date: Tue, 3 Sep 2024 11:51:32 -0400 Subject: [PATCH] patchpkg: restore removed refs to build deps Add a `devbox patch --restore-refs` flag that attempts to restore removed store path references. It works by finding store paths that have had their hashes replaced with e's (`/nix/store/eeee...-foo`) and then searching the package's build environment for store paths with matching names. For example, for the removed reference `/nix/store/eee...-foo-1.2.3` search all environment variables for a path ending in `foo-1.2.3`, then overwrite the removed reference with the one we found. --- internal/boxcli/patch.go | 10 +-- internal/patchpkg/builder.go | 93 ++++++++++++++++++++- internal/patchpkg/patch.go | 65 +++++++++----- internal/patchpkg/search.go | 82 ++++++++++++++++++ internal/shellgen/tmpl/glibc-patch.nix.tmpl | 30 ++++++- 5 files changed, 248 insertions(+), 32 deletions(-) create mode 100644 internal/patchpkg/search.go diff --git a/internal/boxcli/patch.go b/internal/boxcli/patch.go index 3de18be17e3..f5df298ea60 100644 --- a/internal/boxcli/patch.go +++ b/internal/boxcli/patch.go @@ -6,21 +6,17 @@ import ( ) func patchCmd() *cobra.Command { - var glibc string + builder := &patchpkg.DerivationBuilder{} cmd := &cobra.Command{ Use: "patch ", Short: "Apply Devbox patches to a package to fix common linker errors", Args: cobra.ExactArgs(1), Hidden: true, RunE: func(cmd *cobra.Command, args []string) error { - builder, err := patchpkg.NewDerivationBuilder() - if err != nil { - return err - } - builder.Glibc = glibc return builder.Build(cmd.Context(), args[0]) }, } - cmd.Flags().StringVar(&glibc, "glibc", "", "patch binaries to use a different glibc") + cmd.Flags().StringVar(&builder.Glibc, "glibc", "", "patch binaries to use a different glibc") + cmd.Flags().BoolVar(&builder.RestoreRefs, "restore-refs", false, "restore references to removed store paths") return cmd } diff --git a/internal/patchpkg/builder.go b/internal/patchpkg/builder.go index 8de4131cbab..a42522579ce 100644 --- a/internal/patchpkg/builder.go +++ b/internal/patchpkg/builder.go @@ -4,6 +4,7 @@ package patchpkg import ( "bufio" "bytes" + "cmp" "context" _ "embed" "fmt" @@ -15,6 +16,7 @@ import ( "os/exec" "path" "path/filepath" + "regexp" ) //go:embed glibc-patch.bash @@ -30,7 +32,10 @@ type DerivationBuilder struct { // it's set, the builder will patch ELF binaries to use its shared // libraries and dynamic linker. Glibc string - glibcPatcher glibcPatcher + glibcPatcher *glibcPatcher + + RestoreRefs bool + bytePatches map[string][]fileSlice } // NewDerivationBuilder initializes a new DerivationBuilder from the current @@ -73,10 +78,40 @@ func (d *DerivationBuilder) Build(ctx context.Context, pkgStorePath string) erro } func (d *DerivationBuilder) build(ctx context.Context, pkg, out *packageFS) error { + if d.RestoreRefs { + // Find store path references to build inputs that were removed + // from Python. + refs, err := d.findRemovedRefs(ctx, pkg) + if err != nil { + return err + } + + // Group the references we want to restore by file path. + d.bytePatches = make(map[string][]fileSlice, len(refs)) + for _, ref := range refs { + d.bytePatches[ref.path] = append(d.bytePatches[ref.path], ref) + } + + // If any of those references have shared libraries, add them + // back to Python's RPATH. + if d.glibcPatcher != nil { + nixStore := cmp.Or(os.Getenv("NIX_STORE"), "/nix/store") + seen := make(map[string]bool) + for _, ref := range refs { + storePath := filepath.Join(nixStore, string(ref.data)) + if seen[storePath] { + continue + } + seen[storePath] = true + d.glibcPatcher.prependRPATH(newPackageFS(storePath)) + } + } + } + var err error for path, entry := range allFiles(pkg, ".") { if ctx.Err() != nil { - return err + return ctx.Err() } switch { @@ -156,6 +191,13 @@ func (d *DerivationBuilder) copyFile(ctx context.Context, pkg, out *packageFS, p if err != nil { return err } + + for _, patch := range d.bytePatches[path] { + _, err := dst.WriteAt(patch.data, patch.offset) + if err != nil { + return err + } + } return dst.Close() } @@ -172,7 +214,7 @@ func (d *DerivationBuilder) copySymlink(pkg, out *packageFS, path string) error } func (d *DerivationBuilder) needsGlibcPatch(file *bufio.Reader, filePath string) bool { - if d.Glibc == "" { + if d.Glibc == "" || d.glibcPatcher == nil { return false } if path.Dir(filePath) != "bin" { @@ -188,6 +230,51 @@ func (d *DerivationBuilder) needsGlibcPatch(file *bufio.Reader, filePath string) return magic[0] == 0x7F && magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F' } +func (d *DerivationBuilder) findRemovedRefs(ctx context.Context, pkg *packageFS) ([]fileSlice, error) { + var refs []fileSlice + matches, err := fs.Glob(pkg, "lib/python*/_sysconfigdata__linux*.py") + if err != nil { + return nil, err + } + for _, name := range matches { + if ctx.Err() != nil { + return nil, ctx.Err() + } + matches, err := searchFile(pkg, name, reRemovedRefs) + if err != nil { + return nil, err + } + refs = append(refs, matches...) + } + + pkgNameToHash := make(map[string]string, len(refs)) + for _, ref := range refs { + if ctx.Err() != nil { + return nil, ctx.Err() + } + + name := string(ref.data[33:]) + if hash, ok := pkgNameToHash[name]; ok { + copy(ref.data, hash) + continue + } + + re, err := regexp.Compile(`[0123456789abcdfghijklmnpqrsvwxyz]{32}-` + regexp.QuoteMeta(name) + `([$"'{}/[\] \t\r\n]|$)`) + if err != nil { + return nil, err + } + match := searchEnv(re) + if match == "" { + return nil, fmt.Errorf("can't find hash to restore store path reference %q in %q: regexp %q returned 0 matches", ref.data, ref.path, re) + } + hash := match[:32] + pkgNameToHash[name] = hash + copy(ref.data, hash) + slog.DebugContext(ctx, "restored store ref", "ref", ref) + } + return refs, nil +} + // packageFS is the tree of files for a package in the Nix store. type packageFS struct { fs.FS diff --git a/internal/patchpkg/patch.go b/internal/patchpkg/patch.go index 3555bd6fd7f..1d88e603b1f 100644 --- a/internal/patchpkg/patch.go +++ b/internal/patchpkg/patch.go @@ -9,6 +9,7 @@ import ( "os/exec" "path" "slices" + "strings" ) // glibcPatcher patches ELF binaries to use an alternative version of glibc. @@ -16,49 +17,76 @@ type glibcPatcher struct { // ld is the absolute path to the new dynamic linker (ld.so). ld string - // lib is the absolute path to the lib directory containing the new libc - // shared objects (libc.so). - lib string + // rpath is the new RPATH with the directories containing the new libc + // shared objects (libc.so) and other libraries. + rpath []string } // newGlibcPatcher creates a new glibcPatcher and verifies that it can find the // shared object files in glibc. -func newGlibcPatcher(glibc *packageFS) (patcher glibcPatcher, err error) { +func newGlibcPatcher(glibc *packageFS) (*glibcPatcher, error) { + patcher := &glibcPatcher{} + // Verify that we can find a directory with libc in it. glob := "lib*/libc.so*" matches, _ := fs.Glob(glibc, glob) if len(matches) == 0 { - return glibcPatcher{}, fmt.Errorf("cannot find libc.so file matching %q", glob) + return nil, fmt.Errorf("cannot find libc.so file matching %q", glob) } for i := range matches { matches[i] = path.Dir(matches[i]) } slices.Sort(matches) // pick the shortest name: lib < lib32 < lib64 < libx32 - patcher.lib, err = glibc.OSPath(matches[0]) + + lib, err := glibc.OSPath(matches[0]) if err != nil { - return glibcPatcher{}, err + return nil, err } - slog.Debug("found new libc directory", "path", patcher.lib) + patcher.rpath = append(patcher.rpath, lib) + slog.Debug("found new libc directory", "path", lib) // Verify that we can find the new dynamic linker. glob = "lib*/ld-linux*.so*" matches, _ = fs.Glob(glibc, glob) if len(matches) == 0 { - return glibcPatcher{}, fmt.Errorf("cannot find ld.so file matching %q", glob) + return nil, fmt.Errorf("cannot find ld.so file matching %q", glob) } slices.Sort(matches) patcher.ld, err = glibc.OSPath(matches[0]) if err != nil { - return glibcPatcher{}, err + return nil, err } slog.Debug("found new dynamic linker", "path", patcher.ld) return patcher, nil } +func (g *glibcPatcher) prependRPATH(libPkg *packageFS) { + glob := "lib*/*.so*" + matches, _ := fs.Glob(libPkg, glob) + if len(matches) == 0 { + slog.Debug("not prepending package to RPATH because no shared libraries were found", "pkg", libPkg.storePath) + return + } + for i := range matches { + matches[i] = path.Dir(matches[i]) + } + slices.Sort(matches) + matches = slices.Compact(matches) + for i := range matches { + var err error + matches[i], err = libPkg.OSPath(matches[i]) + if err != nil { + continue + } + } + g.rpath = append(matches, g.rpath...) + slog.Debug("prepended package lib dirs to RPATH", "pkg", libPkg.storePath, "dirs", matches) +} + // patch applies glibc patches to a binary and writes the patched result to // outPath. It does not modify the original binary in-place. -func (g glibcPatcher) patch(ctx context.Context, path, outPath string) error { +func (g *glibcPatcher) patch(ctx context.Context, path, outPath string) error { cmd := &patchelf{PrintInterpreter: true} out, err := cmd.run(ctx, path) if err != nil { @@ -71,18 +99,13 @@ func (g glibcPatcher) patch(ctx context.Context, path, outPath string) error { if err != nil { return err } - oldRpath := string(out) + oldRpath := strings.Split(string(out), ":") cmd = &patchelf{ SetInterpreter: g.ld, + SetRPATH: append(g.rpath, oldRpath...), Output: outPath, } - if len(oldRpath) == 0 { - cmd.SetRPATH = g.lib - } else { - cmd.SetRPATH = g.lib + ":" + oldRpath - } - slog.Debug("patching glibc on binary", "path", path, "outPath", cmd.Output, "old_interp", oldInterp, "new_interp", cmd.SetInterpreter, @@ -94,7 +117,7 @@ func (g glibcPatcher) patch(ctx context.Context, path, outPath string) error { // patchelf runs the patchelf command. type patchelf struct { - SetRPATH string + SetRPATH []string PrintRPATH bool SetInterpreter string @@ -106,8 +129,8 @@ type patchelf struct { // run runs patchelf on an ELF binary and returns its output. func (p *patchelf) run(ctx context.Context, elf string) ([]byte, error) { cmd := exec.CommandContext(ctx, lookPath("patchelf")) - if p.SetRPATH != "" { - cmd.Args = append(cmd.Args, "--force-rpath", "--set-rpath", p.SetRPATH) + if len(p.SetRPATH) != 0 { + cmd.Args = append(cmd.Args, "--force-rpath", "--set-rpath", strings.Join(p.SetRPATH, ":")) } if p.PrintRPATH { cmd.Args = append(cmd.Args, "--print-rpath") diff --git a/internal/patchpkg/search.go b/internal/patchpkg/search.go new file mode 100644 index 00000000000..a9667c27c9a --- /dev/null +++ b/internal/patchpkg/search.go @@ -0,0 +1,82 @@ +package patchpkg + +import ( + "fmt" + "io" + "io/fs" + "os" + "regexp" + "strings" + "sync" +) + +// maxFileSize limits the amount of data to load from a file when +// searching. +const maxFileSize = 1 << 30 // 1 GiB + +// reRemovedRefs matches a removed Nix store path where the hash is +// overwritten with e's (making it an invalid nix hash). +var reRemovedRefs = regexp.MustCompile(`e{32}-[^$"'{}/[\] \t\r\n]+`) + +// fileSlice is a slice of data within a file. +type fileSlice struct { + path string + data []byte + offset int64 +} + +func (f fileSlice) String() string { + return fmt.Sprintf("%s@%d: %s", f.path, f.offset, f.data) +} + +// searchFile searches a single file for a regular expression. It limits the +// search to the first [maxFileSize] bytes of the file to avoid consuming too +// much memory. +func searchFile(fsys fs.FS, path string, re *regexp.Regexp) ([]fileSlice, error) { + f, err := fsys.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + r := &io.LimitedReader{R: f, N: maxFileSize} + data, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + locs := re.FindAllIndex(data, -1) + if len(locs) == 0 { + return nil, nil + } + + matches := make([]fileSlice, len(locs)) + for i := range locs { + start, end := locs[i][0], locs[i][1] + matches[i] = fileSlice{ + path: path, + data: data[start:end], + offset: int64(start), + } + } + return matches, nil +} + +var envValues = sync.OnceValue(func() []string { + env := os.Environ() + values := make([]string, len(env)) + for i := range env { + _, values[i], _ = strings.Cut(env[i], "=") + } + return values +}) + +func searchEnv(re *regexp.Regexp) string { + for _, env := range envValues() { + match := re.FindString(env) + if match != "" { + return match + } + } + return "" +} diff --git a/internal/shellgen/tmpl/glibc-patch.nix.tmpl b/internal/shellgen/tmpl/glibc-patch.nix.tmpl index 9032539819b..d07d1b042c3 100644 --- a/internal/shellgen/tmpl/glibc-patch.nix.tmpl +++ b/internal/shellgen/tmpl/glibc-patch.nix.tmpl @@ -44,6 +44,34 @@ name = pkg.name; system = pkg.system; + # buildDependencies is the package's build dependencies as a list of + # store paths. It includes transitive dependencies. + # + # Setting this environment variable provides a corpus of store paths + # that the `devbox patch --restore-refs` flag can use to restore + # references to Python build-time dependencies. + buildDependencies = + let + # mkNodes makes tree nodes for a list of derivation (package) + # outputs. A node is just the package with a "key" attribute added + # to it so it works with builtins.genericClosure. + mkNodes = builtins.map (drv: drv // { key = drv.outPath; }); + + # mkTree recursively traverses the buildInputs of the package we're + # patching. It returns a list of nodes, where each node represents + # a package output path in the dependency tree. + mkTree = builtins.genericClosure { + # Start with the package's buildInputs + the packages in its + # stdenv. + startSet = mkNodes (pkg.buildInputs ++ pkg.stdenv.initialPath); + + # For each package, generate nodes for all of its outputs + # (node.all) and all of its buildInputs. Then visit those nodes. + operator = node: mkNodes (node.all or [ ] ++ node.buildInputs or [ ]); + }; + in + builtins.map (drv: drv.outPath) mkTree; + # Programs needed by glibc-patch.bash. inherit (nixpkgs-glibc.legacyPackages."${system}") bash coreutils glibc gnused patchelf ripgrep; @@ -64,7 +92,7 @@ DEVBOX_DEBUG = 1; builder = "${devbox}/bin/devbox"; - args = [ "patch" "--glibc" glibc pkg ]; + args = [ "patch" "--restore-refs" "--glibc" glibc pkg ]; }; in {