Skip to content

Commit 82cb955

Browse files
authored
Merge pull request #27 from pterodactyl-china/copilot/fix-http-request-error
Fix GBK filename decoding and file handle leaks in archive decompression
2 parents 86fabec + 89e880c commit 82cb955

File tree

3 files changed

+250
-8
lines changed

3 files changed

+250
-8
lines changed
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
package archiverext
2+
3+
import (
4+
"io"
5+
"io/fs"
6+
"time"
7+
"unicode/utf8"
8+
9+
"github.com/klauspost/compress/zip"
10+
"golang.org/x/text/encoding/simplifiedchinese"
11+
)
12+
13+
// ZipFS is a wrapper around zip.Reader that implements fs.FS with
14+
// automatic filename decoding. It handles GBK-encoded filenames
15+
// (common in Chinese Windows systems) by converting them to UTF-8.
16+
type ZipFS struct {
17+
reader *zip.Reader
18+
file io.Closer
19+
}
20+
21+
// NewZipFS creates a new ZipFS from a ReaderAt and size.
22+
func NewZipFS(r io.ReaderAt, size int64) (*ZipFS, error) {
23+
zr, err := zip.NewReader(r, size)
24+
if err != nil {
25+
return nil, err
26+
}
27+
28+
// Keep a reference to the underlying file if it's a Closer
29+
var closer io.Closer
30+
if c, ok := r.(io.Closer); ok {
31+
closer = c
32+
}
33+
34+
return &ZipFS{reader: zr, file: closer}, nil
35+
}
36+
37+
// Close closes the underlying file if it's a Closer.
38+
func (z *ZipFS) Close() error {
39+
if z.file != nil {
40+
return z.file.Close()
41+
}
42+
return nil
43+
}
44+
45+
// Open opens the named file from the ZIP archive.
46+
// It automatically decodes GBK-encoded filenames to UTF-8.
47+
func (z *ZipFS) Open(name string) (fs.File, error) {
48+
// Try to open with the name as-is first (for UTF-8 filenames)
49+
var targetFile *zip.File
50+
for _, f := range z.reader.File {
51+
if f.Name == name {
52+
targetFile = f
53+
break
54+
}
55+
}
56+
57+
// If not found, try to find a file with a matching decoded name
58+
if targetFile == nil {
59+
for _, f := range z.reader.File {
60+
decodedName := decodeZipFilename(f.Name)
61+
if decodedName == name {
62+
targetFile = f
63+
break
64+
}
65+
}
66+
}
67+
68+
if targetFile == nil {
69+
return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
70+
}
71+
72+
rc, err := targetFile.Open()
73+
if err != nil {
74+
return nil, err
75+
}
76+
77+
return &zipFile{
78+
ReadCloser: rc,
79+
file: targetFile,
80+
}, nil
81+
}
82+
83+
// ReadDir reads the directory named by name from the ZIP archive.
84+
// It returns directory entries with decoded filenames.
85+
func (z *ZipFS) ReadDir(name string) ([]fs.DirEntry, error) {
86+
// For zip files, we need to build the directory listing
87+
// by examining all files and filtering by prefix
88+
var entries []fs.DirEntry
89+
90+
// Normalize the directory name
91+
if name == "." {
92+
name = ""
93+
} else if name != "" && name[len(name)-1] != '/' {
94+
name = name + "/"
95+
}
96+
97+
seen := make(map[string]bool)
98+
for _, f := range z.reader.File {
99+
decodedName := decodeZipFilename(f.Name)
100+
101+
// Skip files not in this directory
102+
if name != "" && !hasPrefix(decodedName, name) {
103+
continue
104+
}
105+
106+
// Get the relative path within this directory
107+
relPath := decodedName
108+
if name != "" {
109+
relPath = decodedName[len(name):]
110+
}
111+
112+
// Skip if this is the directory itself
113+
if relPath == "" {
114+
continue
115+
}
116+
117+
// Extract the first path component
118+
var entryName string
119+
idx := indexOf(relPath, '/')
120+
if idx >= 0 {
121+
entryName = relPath[:idx]
122+
} else {
123+
entryName = relPath
124+
}
125+
126+
// Skip duplicates
127+
if seen[entryName] {
128+
continue
129+
}
130+
seen[entryName] = true
131+
132+
// Determine if this entry is a directory by checking if there's more path after it
133+
// or if the original file is marked as a directory
134+
isDir := idx >= 0 || f.FileInfo().IsDir()
135+
136+
entries = append(entries, &zipDirEntry{
137+
name: entryName,
138+
isDir: isDir,
139+
info: &f.FileHeader,
140+
})
141+
}
142+
143+
return entries, nil
144+
}
145+
146+
// Stat returns file information for the named file from the ZIP archive.
147+
func (z *ZipFS) Stat(name string) (fs.FileInfo, error) {
148+
if name == "." {
149+
// Return info for root directory
150+
return &zipRootInfo{}, nil
151+
}
152+
153+
// Try to find the file with decoded name
154+
for _, f := range z.reader.File {
155+
decodedName := decodeZipFilename(f.Name)
156+
if decodedName == name || decodedName == name+"/" {
157+
return f.FileInfo(), nil
158+
}
159+
}
160+
161+
return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrNotExist}
162+
}
163+
164+
// decodeZipFilename decodes a filename from a ZIP archive, automatically
165+
// detecting and converting GBK-encoded filenames to UTF-8.
166+
func decodeZipFilename(filename string) string {
167+
// Check if it's already valid UTF-8
168+
if utf8.ValidString(filename) {
169+
return filename
170+
}
171+
172+
// Not valid UTF-8, try to decode as GBK
173+
decoded, err := simplifiedchinese.GBK.NewDecoder().String(filename)
174+
if err != nil {
175+
// GBK decoding failed, return original
176+
return filename
177+
}
178+
179+
// Successfully decoded from GBK
180+
return decoded
181+
}
182+
183+
// Helper functions
184+
func hasPrefix(s, prefix string) bool {
185+
return len(s) >= len(prefix) && s[:len(prefix)] == prefix
186+
}
187+
188+
func indexOf(s string, c byte) int {
189+
for i := 0; i < len(s); i++ {
190+
if s[i] == c {
191+
return i
192+
}
193+
}
194+
return -1
195+
}
196+
197+
// zipFile wraps an io.ReadCloser from a zip.File and implements fs.File
198+
type zipFile struct {
199+
io.ReadCloser
200+
file *zip.File
201+
}
202+
203+
func (zf *zipFile) Stat() (fs.FileInfo, error) {
204+
return zf.file.FileInfo(), nil
205+
}
206+
207+
// zipDirEntry implements fs.DirEntry for ZIP file entries
208+
type zipDirEntry struct {
209+
name string
210+
isDir bool
211+
info *zip.FileHeader
212+
}
213+
214+
func (e *zipDirEntry) Name() string { return e.name }
215+
func (e *zipDirEntry) IsDir() bool { return e.isDir }
216+
func (e *zipDirEntry) Type() fs.FileMode { return e.info.Mode().Type() }
217+
func (e *zipDirEntry) Info() (fs.FileInfo, error) { return e.info.FileInfo(), nil }
218+
219+
// zipRootInfo implements fs.FileInfo for the root directory
220+
type zipRootInfo struct{}
221+
222+
func (i *zipRootInfo) Name() string { return "." }
223+
func (i *zipRootInfo) Size() int64 { return 0 }
224+
func (i *zipRootInfo) Mode() fs.FileMode { return fs.ModeDir | 0755 }
225+
func (i *zipRootInfo) ModTime() time.Time { return time.Time{} }
226+
func (i *zipRootInfo) IsDir() bool { return true }
227+
func (i *zipRootInfo) Sys() any { return nil }

server/filesystem/compress.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313
"unicode/utf8"
1414

1515
"emperror.dev/errors"
16-
"github.com/klauspost/compress/zip"
1716
"github.com/mholt/archives"
1817
"golang.org/x/text/encoding/simplifiedchinese"
1918

@@ -81,11 +80,10 @@ func (fs *Filesystem) archiverFileSystem(ctx context.Context, p string) (iofs.FS
8180
if format != nil {
8281
switch ff := format.(type) {
8382
case archives.Zip:
84-
// zip.Reader is more performant than ArchiveFS, because zip.Reader caches content information
85-
// and zip.Reader can open several content files concurrently because of io.ReaderAt requirement
86-
// while ArchiveFS can't.
87-
// zip.Reader doesn't suffer from issue #330 and #310 according to local test (but they should be fixed anyway)
88-
return zip.NewReader(f, info.Size())
83+
// Use our custom ZipFS wrapper that handles GBK-encoded filenames
84+
// This is more performant than ArchiveFS, because it caches content information
85+
// and can open several content files concurrently because of io.ReaderAt requirement.
86+
return archiverext.NewZipFS(f, info.Size())
8987
case archives.Extraction:
9088
return &archives.ArchiveFS{Stream: io.NewSectionReader(f, 0, info.Size()), Format: ff, Context: ctx}, nil
9189
case archives.Compression:
@@ -115,6 +113,11 @@ func (fs *Filesystem) SpaceAvailableForDecompression(ctx context.Context, dir st
115113
return err
116114
}
117115

116+
// Close the filesystem after we're done to release file handles
117+
if closer, ok := fsys.(io.Closer); ok {
118+
defer closer.Close()
119+
}
120+
118121
// Create a context with timeout to prevent long delays on large archives
119122
timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
120123
defer cancel()

server/filesystem/compress_test.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,18 @@ func TestFilesystem_DecompressFile_GBK(t *testing.T) {
7777
g.Assert(err).IsNil()
7878
})
7979

80+
g.It("can check space for a zip with GBK-encoded filenames", func() {
81+
// copy the file to the new FS
82+
c, err := os.ReadFile("./testdata/test-gbk.zip")
83+
g.Assert(err).IsNil()
84+
err = rfs.CreateServerFile("./test-gbk.zip", c)
85+
g.Assert(err).IsNil()
86+
87+
// check space availability
88+
err = fs.SpaceAvailableForDecompression(context.Background(), "/", "test-gbk.zip")
89+
g.Assert(err).IsNil()
90+
})
91+
8092
g.AfterEach(func() {
8193
_ = fs.TruncateRootDirectory()
8294
})
@@ -105,10 +117,10 @@ func TestDecodeFilename(t *testing.T) {
105117
utf8String := "测试文件.txt"
106118
gbkString, err := simplifiedchinese.GBK.NewEncoder().String(utf8String)
107119
g.Assert(err).IsNil()
108-
120+
109121
// Verify it's not valid UTF-8
110122
g.Assert(utf8.ValidString(gbkString)).IsFalse()
111-
123+
112124
// Decode and verify it matches original UTF-8
113125
output := decodeFilename(gbkString)
114126
g.Assert(output).Equal(utf8String)

0 commit comments

Comments
 (0)