Skip to content

Commit f95694f

Browse files
committed
patchpkg: restore removed refs to build deps
Add a `devbox patch --restore-refs` flag that attempts to restore removed store path references. It works by finding store paths that have had their hashes replaced with e's (`/nix/store/eeee...-foo`) and then searching the package's build environment for store paths with matching names. For example, for the removed reference `/nix/store/eee...-foo-1.2.3` search all environment variables for a path ending in `foo-1.2.3`, then overwrite the removed reference with the one we found.
1 parent 6772d6a commit f95694f

File tree

5 files changed

+248
-32
lines changed

5 files changed

+248
-32
lines changed

internal/boxcli/patch.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,17 @@ import (
66
)
77

88
func patchCmd() *cobra.Command {
9-
var glibc string
9+
builder := &patchpkg.DerivationBuilder{}
1010
cmd := &cobra.Command{
1111
Use: "patch <store-path>",
1212
Short: "Apply Devbox patches to a package to fix common linker errors",
1313
Args: cobra.ExactArgs(1),
1414
Hidden: true,
1515
RunE: func(cmd *cobra.Command, args []string) error {
16-
builder, err := patchpkg.NewDerivationBuilder()
17-
if err != nil {
18-
return err
19-
}
20-
builder.Glibc = glibc
2116
return builder.Build(cmd.Context(), args[0])
2217
},
2318
}
24-
cmd.Flags().StringVar(&glibc, "glibc", "", "patch binaries to use a different glibc")
19+
cmd.Flags().StringVar(&builder.Glibc, "glibc", "", "patch binaries to use a different glibc")
20+
cmd.Flags().BoolVar(&builder.RestoreRefs, "restore-refs", false, "restore references to removed store paths")
2521
return cmd
2622
}

internal/patchpkg/builder.go

Lines changed: 90 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package patchpkg
44
import (
55
"bufio"
66
"bytes"
7+
"cmp"
78
"context"
89
_ "embed"
910
"fmt"
@@ -15,6 +16,7 @@ import (
1516
"os/exec"
1617
"path"
1718
"path/filepath"
19+
"regexp"
1820
)
1921

2022
//go:embed glibc-patch.bash
@@ -30,7 +32,10 @@ type DerivationBuilder struct {
3032
// it's set, the builder will patch ELF binaries to use its shared
3133
// libraries and dynamic linker.
3234
Glibc string
33-
glibcPatcher glibcPatcher
35+
glibcPatcher *glibcPatcher
36+
37+
RestoreRefs bool
38+
bytePatches map[string][]fileSlice
3439
}
3540

3641
// NewDerivationBuilder initializes a new DerivationBuilder from the current
@@ -73,10 +78,40 @@ func (d *DerivationBuilder) Build(ctx context.Context, pkgStorePath string) erro
7378
}
7479

7580
func (d *DerivationBuilder) build(ctx context.Context, pkg, out *packageFS) error {
81+
if d.RestoreRefs {
82+
// Find store path references to build inputs that were removed
83+
// from Python.
84+
refs, err := d.findRemovedRefs(ctx, pkg)
85+
if err != nil {
86+
return err
87+
}
88+
89+
// Group the references we want to restore by file path.
90+
d.bytePatches = make(map[string][]fileSlice, len(refs))
91+
for _, ref := range refs {
92+
d.bytePatches[ref.path] = append(d.bytePatches[ref.path], ref)
93+
}
94+
95+
// If any of those references have shared libraries, add them
96+
// back to Python's RPATH.
97+
if d.glibcPatcher != nil {
98+
nixStore := cmp.Or(os.Getenv("NIX_STORE"), "/nix/store")
99+
seen := make(map[string]bool)
100+
for _, ref := range refs {
101+
storePath := filepath.Join(nixStore, string(ref.data))
102+
if seen[storePath] {
103+
continue
104+
}
105+
seen[storePath] = true
106+
d.glibcPatcher.prependRPATH(newPackageFS(storePath))
107+
}
108+
}
109+
}
110+
76111
var err error
77112
for path, entry := range allFiles(pkg, ".") {
78113
if ctx.Err() != nil {
79-
return err
114+
return ctx.Err()
80115
}
81116

82117
switch {
@@ -156,6 +191,13 @@ func (d *DerivationBuilder) copyFile(ctx context.Context, pkg, out *packageFS, p
156191
if err != nil {
157192
return err
158193
}
194+
195+
for _, patch := range d.bytePatches[path] {
196+
_, err := dst.WriteAt(patch.data, patch.offset)
197+
if err != nil {
198+
return err
199+
}
200+
}
159201
return dst.Close()
160202
}
161203

@@ -172,7 +214,7 @@ func (d *DerivationBuilder) copySymlink(pkg, out *packageFS, path string) error
172214
}
173215

174216
func (d *DerivationBuilder) needsGlibcPatch(file *bufio.Reader, filePath string) bool {
175-
if d.Glibc == "" {
217+
if d.Glibc == "" || d.glibcPatcher == nil {
176218
return false
177219
}
178220
if path.Dir(filePath) != "bin" {
@@ -186,6 +228,51 @@ func (d *DerivationBuilder) needsGlibcPatch(file *bufio.Reader, filePath string)
186228
return magic[0] == 0x7F && magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F'
187229
}
188230

231+
func (d *DerivationBuilder) findRemovedRefs(ctx context.Context, pkg *packageFS) ([]fileSlice, error) {
232+
var refs []fileSlice
233+
matches, err := fs.Glob(pkg, "lib/python*/_sysconfigdata__linux*.py")
234+
if err != nil {
235+
return nil, err
236+
}
237+
for _, name := range matches {
238+
if ctx.Err() != nil {
239+
return nil, ctx.Err()
240+
}
241+
matches, err := searchFile(pkg, name, reRemovedRefs)
242+
if err != nil {
243+
return nil, err
244+
}
245+
refs = append(refs, matches...)
246+
}
247+
248+
pkgNameToHash := make(map[string]string, len(refs))
249+
for _, ref := range refs {
250+
if ctx.Err() != nil {
251+
return nil, ctx.Err()
252+
}
253+
254+
name := string(ref.data[33:])
255+
if hash, ok := pkgNameToHash[name]; ok {
256+
copy(ref.data, hash)
257+
continue
258+
}
259+
260+
re, err := regexp.Compile(`[0123456789abcdfghijklmnpqrsvwxyz]{32}-` + regexp.QuoteMeta(name) + `([$"'{}/[\] \t\r\n]|$)`)
261+
if err != nil {
262+
return nil, err
263+
}
264+
match := searchEnv(re)
265+
if match == "" {
266+
return nil, fmt.Errorf("can't find hash to restore store path reference %q in %q: regexp %q returned 0 matches", ref.data, ref.path, re)
267+
}
268+
hash := match[:32]
269+
pkgNameToHash[name] = hash
270+
copy(ref.data, hash)
271+
slog.DebugContext(ctx, "restored store ref", "ref", ref)
272+
}
273+
return refs, nil
274+
}
275+
189276
// packageFS is the tree of files for a package in the Nix store.
190277
type packageFS struct {
191278
fs.FS

internal/patchpkg/patch.go

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,56 +9,84 @@ import (
99
"os/exec"
1010
"path"
1111
"slices"
12+
"strings"
1213
)
1314

1415
// glibcPatcher patches ELF binaries to use an alternative version of glibc.
1516
type glibcPatcher struct {
1617
// ld is the absolute path to the new dynamic linker (ld.so).
1718
ld string
1819

19-
// lib is the absolute path to the lib directory containing the new libc
20-
// shared objects (libc.so).
21-
lib string
20+
// rpath is the new RPATH with the directories containing the new libc
21+
// shared objects (libc.so) and other libraries.
22+
rpath []string
2223
}
2324

2425
// newGlibcPatcher creates a new glibcPatcher and verifies that it can find the
2526
// shared object files in glibc.
26-
func newGlibcPatcher(glibc *packageFS) (patcher glibcPatcher, err error) {
27+
func newGlibcPatcher(glibc *packageFS) (*glibcPatcher, error) {
28+
patcher := &glibcPatcher{}
29+
2730
// Verify that we can find a directory with libc in it.
2831
glob := "lib*/libc.so*"
2932
matches, _ := fs.Glob(glibc, glob)
3033
if len(matches) == 0 {
31-
return glibcPatcher{}, fmt.Errorf("cannot find libc.so file matching %q", glob)
34+
return nil, fmt.Errorf("cannot find libc.so file matching %q", glob)
3235
}
3336
for i := range matches {
3437
matches[i] = path.Dir(matches[i])
3538
}
3639
slices.Sort(matches) // pick the shortest name: lib < lib32 < lib64 < libx32
37-
patcher.lib, err = glibc.OSPath(matches[0])
40+
41+
lib, err := glibc.OSPath(matches[0])
3842
if err != nil {
39-
return glibcPatcher{}, err
43+
return nil, err
4044
}
41-
slog.Debug("found new libc directory", "path", patcher.lib)
45+
patcher.rpath = append(patcher.rpath, lib)
46+
slog.Debug("found new libc directory", "path", lib)
4247

4348
// Verify that we can find the new dynamic linker.
4449
glob = "lib*/ld-linux*.so*"
4550
matches, _ = fs.Glob(glibc, glob)
4651
if len(matches) == 0 {
47-
return glibcPatcher{}, fmt.Errorf("cannot find ld.so file matching %q", glob)
52+
return nil, fmt.Errorf("cannot find ld.so file matching %q", glob)
4853
}
4954
slices.Sort(matches)
5055
patcher.ld, err = glibc.OSPath(matches[0])
5156
if err != nil {
52-
return glibcPatcher{}, err
57+
return nil, err
5358
}
5459
slog.Debug("found new dynamic linker", "path", patcher.ld)
5560

5661
return patcher, nil
5762
}
5863

64+
func (g *glibcPatcher) prependRPATH(libPkg *packageFS) {
65+
glob := "lib*/*.so*"
66+
matches, _ := fs.Glob(libPkg, glob)
67+
if len(matches) == 0 {
68+
slog.Debug("not prepending package to RPATH because no shared libraries were found", "pkg", libPkg.storePath)
69+
return
70+
}
71+
for i := range matches {
72+
matches[i] = path.Dir(matches[i])
73+
}
74+
slices.Sort(matches)
75+
matches = slices.Compact(matches)
76+
for i := range matches {
77+
var err error
78+
matches[i], err = libPkg.OSPath(matches[i])
79+
if err != nil {
80+
continue
81+
}
82+
}
83+
g.rpath = append(matches, g.rpath...)
84+
slog.Debug("prepended package lib dirs to RPATH", "pkg", libPkg.storePath, "dirs", matches)
85+
}
86+
5987
// patch applies glibc patches to a binary and writes the patched result to
6088
// outPath. It does not modify the original binary in-place.
61-
func (g glibcPatcher) patch(ctx context.Context, path, outPath string) error {
89+
func (g *glibcPatcher) patch(ctx context.Context, path, outPath string) error {
6290
cmd := &patchelf{PrintInterpreter: true}
6391
out, err := cmd.run(ctx, path)
6492
if err != nil {
@@ -71,18 +99,13 @@ func (g glibcPatcher) patch(ctx context.Context, path, outPath string) error {
7199
if err != nil {
72100
return err
73101
}
74-
oldRpath := string(out)
102+
oldRpath := strings.Split(string(out), ":")
75103

76104
cmd = &patchelf{
77105
SetInterpreter: g.ld,
106+
SetRPATH: append(g.rpath, oldRpath...),
78107
Output: outPath,
79108
}
80-
if len(oldRpath) == 0 {
81-
cmd.SetRPATH = g.lib
82-
} else {
83-
cmd.SetRPATH = g.lib + ":" + oldRpath
84-
}
85-
86109
slog.Debug("patching glibc on binary",
87110
"path", path, "outPath", cmd.Output,
88111
"old_interp", oldInterp, "new_interp", cmd.SetInterpreter,
@@ -94,7 +117,7 @@ func (g glibcPatcher) patch(ctx context.Context, path, outPath string) error {
94117

95118
// patchelf runs the patchelf command.
96119
type patchelf struct {
97-
SetRPATH string
120+
SetRPATH []string
98121
PrintRPATH bool
99122

100123
SetInterpreter string
@@ -106,8 +129,8 @@ type patchelf struct {
106129
// run runs patchelf on an ELF binary and returns its output.
107130
func (p *patchelf) run(ctx context.Context, elf string) ([]byte, error) {
108131
cmd := exec.CommandContext(ctx, lookPath("patchelf"))
109-
if p.SetRPATH != "" {
110-
cmd.Args = append(cmd.Args, "--force-rpath", "--set-rpath", p.SetRPATH)
132+
if len(p.SetRPATH) != 0 {
133+
cmd.Args = append(cmd.Args, "--force-rpath", "--set-rpath", strings.Join(p.SetRPATH, ":"))
111134
}
112135
if p.PrintRPATH {
113136
cmd.Args = append(cmd.Args, "--print-rpath")

internal/patchpkg/search.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
package patchpkg
2+
3+
import (
4+
"fmt"
5+
"io"
6+
"io/fs"
7+
"os"
8+
"regexp"
9+
"strings"
10+
"sync"
11+
)
12+
13+
// maxFileSize limits the amount of data to load from a file when
14+
// searching.
15+
const maxFileSize = 1 << 30 // 1 GiB
16+
17+
// reRemovedRefs matches a removed Nix store path where the hash is
18+
// overwritten with e's (making it an invalid nix hash).
19+
var reRemovedRefs = regexp.MustCompile(`e{32}-[^$"'{}/[\] \t\r\n]+`)
20+
21+
// fileSlice is a slice of data within a file.
22+
type fileSlice struct {
23+
path string
24+
data []byte
25+
offset int64
26+
}
27+
28+
func (f fileSlice) String() string {
29+
return fmt.Sprintf("%s@%d: %s", f.path, f.offset, f.data)
30+
}
31+
32+
// searchFile searches a single file for a regular expression. It limits the
33+
// search to the first [maxFileSize] bytes of the file to avoid consuming too
34+
// much memory.
35+
func searchFile(fsys fs.FS, path string, re *regexp.Regexp) ([]fileSlice, error) {
36+
f, err := fsys.Open(path)
37+
if err != nil {
38+
return nil, err
39+
}
40+
defer f.Close()
41+
42+
r := &io.LimitedReader{R: f, N: maxFileSize}
43+
data, err := io.ReadAll(r)
44+
if err != nil {
45+
return nil, err
46+
}
47+
48+
locs := re.FindAllIndex(data, -1)
49+
if len(locs) == 0 {
50+
return nil, nil
51+
}
52+
53+
matches := make([]fileSlice, len(locs))
54+
for i := range locs {
55+
start, end := locs[i][0], locs[i][1]
56+
matches[i] = fileSlice{
57+
path: path,
58+
data: data[start:end],
59+
offset: int64(start),
60+
}
61+
}
62+
return matches, nil
63+
}
64+
65+
var envValues = sync.OnceValue(func() []string {
66+
env := os.Environ()
67+
values := make([]string, len(env))
68+
for i := range env {
69+
_, values[i], _ = strings.Cut(env[i], "=")
70+
}
71+
return values
72+
})
73+
74+
func searchEnv(re *regexp.Regexp) string {
75+
for _, env := range envValues() {
76+
match := re.FindString(env)
77+
if match != "" {
78+
return match
79+
}
80+
}
81+
return ""
82+
}

0 commit comments

Comments
 (0)