Skip to content

Commit 4c9eb73

Browse files
authored
Address more extraction edge cases; improve naming and consistency (#733)
* Address more gzip, tar, and tar.gz edge cases Signed-off-by: egibs <[email protected]> * Add ordering comment; move tar case to top Signed-off-by: egibs <[email protected]> * Add .gzip for completeness, move above .zip Signed-off-by: egibs <[email protected]> * Address PR comments Signed-off-by: egibs <[email protected]> * Fix deb.go symlink validation Signed-off-by: egibs <[email protected]> * Move deb.go and tar.go extraction logic to helper functions Signed-off-by: egibs <[email protected]> * Re-add check for nonexistent targets Signed-off-by: egibs <[email protected]> * Universal naming/consistency changes Signed-off-by: egibs <[email protected]> * Run go mod tidy Signed-off-by: egibs <[email protected]> --------- Signed-off-by: egibs <[email protected]>
1 parent 3b49925 commit 4c9eb73

File tree

10 files changed

+181
-115
lines changed

10 files changed

+181
-115
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ require (
1616
github.com/google/go-cmp v0.6.0
1717
github.com/google/go-containerregistry v0.20.2
1818
github.com/hillu/go-yara/v4 v4.3.3
19+
github.com/klauspost/compress v1.17.11
1920
github.com/olekukonko/tablewriter v0.0.5
2021
github.com/shirou/gopsutil/v4 v4.24.11
2122
github.com/ulikunitz/xz v0.5.12
@@ -40,7 +41,6 @@ require (
4041
github.com/ebitengine/purego v0.8.1 // indirect
4142
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
4243
github.com/go-ole/go-ole v1.3.0 // indirect
43-
github.com/klauspost/compress v1.17.11 // indirect
4444
github.com/kr/pretty v0.2.1 // indirect
4545
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
4646
github.com/lufia/plan9stats v0.0.0-20240909124753-873cd0166683 // indirect

pkg/archive/archive.go

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
package archive
22

33
import (
4+
"archive/tar"
45
"context"
56
"fmt"
7+
"io"
68
"io/fs"
79
"os"
810
"path/filepath"
@@ -190,13 +192,16 @@ func ExtractArchiveToTempDir(ctx context.Context, path string) (string, error) {
190192
}
191193

192194
func ExtractionMethod(ext string) func(context.Context, string, string) error {
195+
// The ordering of these statements is important, especially for extensions
196+
// that are substrings of other extensions (e.g., `.gz` and `.tar.gz` or `.tgz`)
193197
switch ext {
194-
case ".jar", ".zip", ".whl":
195-
return ExtractZip
196-
case ".gz":
197-
return ExtractGzip
198+
// New cases should go below this line so that the lengthier tar extensions are evaluated first
198199
case ".apk", ".gem", ".tar", ".tar.bz2", ".tar.gz", ".tgz", ".tar.xz", ".tbz", ".xz":
199200
return ExtractTar
201+
case ".gz", ".gzip":
202+
return ExtractGzip
203+
case ".jar", ".zip", ".whl":
204+
return ExtractZip
200205
case ".bz2", ".bzip2":
201206
return ExtractBz2
202207
case ".rpm":
@@ -207,3 +212,68 @@ func ExtractionMethod(ext string) func(context.Context, string, string) error {
207212
return nil
208213
}
209214
}
215+
216+
// handleDirectory extracts valid directories within .deb or .tar archives.
217+
func handleDirectory(target string) error {
218+
if err := os.MkdirAll(target, 0o700); err != nil {
219+
return fmt.Errorf("failed to create directory: %w", err)
220+
}
221+
return nil
222+
}
223+
224+
// handleFile extracts valid files within .deb or .tar archives.
225+
func handleFile(target string, tr *tar.Reader) error {
226+
if err := os.MkdirAll(filepath.Dir(target), 0o700); err != nil {
227+
return fmt.Errorf("failed to create parent directory: %w", err)
228+
}
229+
230+
out, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
231+
if err != nil {
232+
return fmt.Errorf("failed to create file: %w", err)
233+
}
234+
defer out.Close()
235+
236+
written, err := io.Copy(out, io.LimitReader(tr, maxBytes))
237+
if err != nil {
238+
return fmt.Errorf("failed to copy file: %w", err)
239+
}
240+
if written >= maxBytes {
241+
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
242+
}
243+
244+
return nil
245+
}
246+
247+
// handleSymlink creates valid symlinks when extracting .deb or .tar archives.
248+
func handleSymlink(dir, linkName, target string) error {
249+
// Skip symlinks for targets that do not exist
250+
_, err := os.Readlink(target)
251+
if os.IsNotExist(err) {
252+
return nil
253+
}
254+
255+
fullLink := filepath.Join(dir, linkName)
256+
257+
// Remove existing symlinks
258+
if _, err := os.Lstat(fullLink); err == nil {
259+
if err := os.Remove(fullLink); err != nil {
260+
return fmt.Errorf("failed to remove existing symlink: %w", err)
261+
}
262+
}
263+
264+
if err := os.Symlink(target, fullLink); err != nil {
265+
return fmt.Errorf("failed to create symlink: %w", err)
266+
}
267+
268+
linkReal, err := filepath.EvalSymlinks(fullLink)
269+
if err != nil {
270+
os.Remove(fullLink)
271+
return fmt.Errorf("failed to evaluate symlink: %w", err)
272+
}
273+
if !IsValidPath(linkReal, dir) {
274+
os.Remove(fullLink)
275+
return fmt.Errorf("symlink points outside temporary directory: %s", linkReal)
276+
}
277+
278+
return nil
279+
}

pkg/archive/bz2.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,28 @@ func ExtractBz2(ctx context.Context, d, f string) error {
3838
uncompressed := strings.TrimSuffix(filepath.Base(f), ".bz2")
3939
uncompressed = strings.TrimSuffix(uncompressed, ".bzip2")
4040
target := filepath.Join(d, uncompressed)
41+
if !IsValidPath(target, d) {
42+
return fmt.Errorf("invalid file path: %s", target)
43+
}
4144
if err := os.MkdirAll(d, 0o700); err != nil {
4245
return fmt.Errorf("failed to create directory for file: %w", err)
4346
}
4447

4548
// #nosec G115 // ignore Type conversion which leads to integer overflow
4649
// header.Mode is int64 and FileMode is uint32
47-
out, err := os.OpenFile(target, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o600)
50+
out, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
4851
if err != nil {
4952
return fmt.Errorf("failed to create file: %w", err)
5053
}
5154
defer out.Close()
52-
if _, err := io.Copy(out, io.LimitReader(br, maxBytes)); err != nil {
53-
out.Close()
55+
56+
written, err := io.Copy(out, io.LimitReader(br, maxBytes))
57+
if err != nil {
5458
return fmt.Errorf("failed to copy file: %w", err)
5559
}
60+
if written >= maxBytes {
61+
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
62+
}
63+
5664
return nil
5765
}

pkg/archive/deb.go

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -46,49 +46,21 @@ func ExtractDeb(ctx context.Context, d, f string) error {
4646
}
4747

4848
target := filepath.Join(d, clean)
49+
if !IsValidPath(target, d) {
50+
return fmt.Errorf("invalid file path: %s", target)
51+
}
4952

5053
switch header.Typeflag {
5154
case tar.TypeDir:
52-
// #nosec G115 // ignore Type conversion which leads to integer overflow
53-
// header.Mode is int64 and FileMode is uint32
54-
if err := os.MkdirAll(target, os.FileMode(header.Mode)); err != nil {
55-
return fmt.Errorf("failed to create directory: %w", err)
55+
if err := handleDirectory(target); err != nil {
56+
return fmt.Errorf("failed to extract directory: %w", err)
5657
}
5758
case tar.TypeReg:
58-
if err := os.MkdirAll(filepath.Dir(target), 0o700); err != nil {
59-
return fmt.Errorf("failed to create parent directory: %w", err)
60-
}
61-
62-
// #nosec G115
63-
out, err := os.OpenFile(target, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.FileMode(header.Mode))
64-
if err != nil {
65-
return fmt.Errorf("failed to create file: %w", err)
66-
}
67-
68-
if _, err := io.Copy(out, io.LimitReader(df.Data, maxBytes)); err != nil {
69-
out.Close()
70-
return fmt.Errorf("failed to copy file: %w", err)
71-
}
72-
73-
if err := out.Close(); err != nil {
74-
return fmt.Errorf("failed to close file: %w", err)
59+
if err := handleFile(target, df.Data); err != nil {
60+
return fmt.Errorf("failed to extract file: %w", err)
7561
}
7662
case tar.TypeSymlink:
77-
// Skip symlinks for targets that do not exist
78-
_, err = os.Readlink(target)
79-
if os.IsNotExist(err) {
80-
continue
81-
}
82-
// Ensure that symlinks are not relative path traversals
83-
// #nosec G305 // L208 handles the check
84-
linkReal, err := filepath.EvalSymlinks(filepath.Join(d, header.Linkname))
85-
if err != nil {
86-
return fmt.Errorf("failed to evaluate symlink: %w", err)
87-
}
88-
if !IsValidPath(linkReal, d) {
89-
return fmt.Errorf("symlink points outside temporary directory: %s", linkReal)
90-
}
91-
if err := os.Symlink(linkReal, target); err != nil {
63+
if err := handleSymlink(d, header.Linkname, target); err != nil {
9264
return fmt.Errorf("failed to create symlink: %w", err)
9365
}
9466
}

pkg/archive/gzip.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,23 @@ import (
99
"path/filepath"
1010

1111
"github.com/chainguard-dev/clog"
12+
"github.com/chainguard-dev/malcontent/pkg/programkind"
1213
)
1314

1415
// extractGzip extracts .gz archives.
1516
func ExtractGzip(ctx context.Context, d string, f string) error {
17+
// Check whether the provided file is a valid gzip archive
18+
var isGzip bool
19+
if ft, err := programkind.File(f); err == nil && ft != nil {
20+
if ft.MIME == "application/gzip" {
21+
isGzip = true
22+
}
23+
}
24+
25+
if !isGzip {
26+
return fmt.Errorf("not a valid gzip archive")
27+
}
28+
1629
logger := clog.FromContext(ctx).With("dir", d, "file", f)
1730
logger.Debug("extracting gzip")
1831

@@ -30,22 +43,29 @@ func ExtractGzip(ctx context.Context, d string, f string) error {
3043

3144
base := filepath.Base(f)
3245
target := filepath.Join(d, base[:len(base)-len(filepath.Ext(base))])
46+
if !IsValidPath(target, d) {
47+
return fmt.Errorf("invalid file path: %s", target)
48+
}
3349

3450
gr, err := gzip.NewReader(gf)
3551
if err != nil {
3652
return fmt.Errorf("failed to create gzip reader: %w", err)
3753
}
3854
defer gr.Close()
3955

40-
ef, err := os.Create(target)
56+
out, err := os.Create(target)
4157
if err != nil {
4258
return fmt.Errorf("failed to create extracted file: %w", err)
4359
}
44-
defer ef.Close()
60+
defer out.Close()
4561

46-
if _, err := io.Copy(ef, io.LimitReader(gr, maxBytes)); err != nil {
62+
written, err := io.Copy(out, io.LimitReader(gr, maxBytes))
63+
if err != nil {
4764
return fmt.Errorf("failed to copy file: %w", err)
4865
}
66+
if written >= maxBytes {
67+
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
68+
}
4969

5070
return nil
5171
}

pkg/archive/rpm.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ func ExtractRPM(ctx context.Context, d, f string) error {
8686
}
8787

8888
target := filepath.Join(d, clean)
89+
if !IsValidPath(target, d) {
90+
return fmt.Errorf("invalid file path: %s", target)
91+
}
8992

9093
if header.FileInfo().IsDir() {
9194
if err := os.MkdirAll(target, os.FileMode(header.Mode)); err != nil {
@@ -98,15 +101,18 @@ func ExtractRPM(ctx context.Context, d, f string) error {
98101
return fmt.Errorf("failed to create parent directory: %w", err)
99102
}
100103

101-
out, err := os.OpenFile(target, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.FileMode(header.Mode))
104+
out, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
102105
if err != nil {
103106
return fmt.Errorf("failed to create file: %w", err)
104107
}
105108

106-
if _, err := io.Copy(out, io.LimitReader(cr, maxBytes)); err != nil {
107-
out.Close()
109+
written, err := io.Copy(out, io.LimitReader(cr, maxBytes))
110+
if err != nil {
108111
return fmt.Errorf("failed to copy file: %w", err)
109112
}
113+
if written >= maxBytes {
114+
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
115+
}
110116

111117
if err := out.Close(); err != nil {
112118
return fmt.Errorf("failed to close file: %w", err)

0 commit comments

Comments
 (0)