Skip to content

Commit 5821053

Browse files
committed
cache: update to Go tip as of April 2023
As of commit 0fd6ae548f550bdbee4a434285ff052fb9dc7417. Besides rewriting import paths, we swapped base.Fatalf with log.Fatalf, and replaced cfg.Getenv with os.Getenv, adding a note about the difference in behavior. The old code already had this limitation. We hadn't updated this package since it was first copied in 2018, so quite a few changes have taken place. Of note, it now supports mmap; leave that out for now, to keep this commit simple and to leave adding the mmap package for another patch. A minor API change is that Trim now returns an error. While technically a breaking change, the vast majority of users will be simply calling the API without expecting a result, and that will continue to work like it did before. Checking for errors on trim is useful, which is why upstream added it. Finally, the cache now uses lockedfile, which we already copied over.
1 parent eeed7e8 commit 5821053

File tree

6 files changed

+231
-217
lines changed

6 files changed

+231
-217
lines changed

cache/cache.go

Lines changed: 138 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@ import (
1212
"errors"
1313
"fmt"
1414
"io"
15-
"io/ioutil"
15+
"io/fs"
1616
"os"
1717
"path/filepath"
1818
"strconv"
1919
"strings"
2020
"time"
21+
22+
"github.com/rogpeppe/go-internal/lockedfile"
2123
)
2224

2325
// An ActionID is a cache action key, the hash of a complete description of a
@@ -31,7 +33,6 @@ type OutputID [HashSize]byte
3133
// A Cache is a package cache, backed by a file system directory tree.
3234
type Cache struct {
3335
dir string
34-
log *os.File
3536
now func() time.Time
3637
}
3738

@@ -52,21 +53,16 @@ func Open(dir string) (*Cache, error) {
5253
return nil, err
5354
}
5455
if !info.IsDir() {
55-
return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
56+
return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
5657
}
5758
for i := 0; i < 256; i++ {
5859
name := filepath.Join(dir, fmt.Sprintf("%02x", i))
59-
if err := os.MkdirAll(name, 0o777); err != nil {
60+
if err := os.MkdirAll(name, 0777); err != nil {
6061
return nil, err
6162
}
6263
}
63-
f, err := os.OpenFile(filepath.Join(dir, "log.txt"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o666)
64-
if err != nil {
65-
return nil, err
66-
}
6764
c := &Cache{
6865
dir: dir,
69-
log: f,
7066
now: time.Now,
7167
}
7268
return c, nil
@@ -77,7 +73,22 @@ func (c *Cache) fileName(id [HashSize]byte, key string) string {
7773
return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
7874
}
7975

80-
var errMissing = errors.New("cache entry not found")
76+
// An entryNotFoundError indicates that a cache entry was not found, with an
77+
// optional underlying reason.
78+
type entryNotFoundError struct {
79+
Err error
80+
}
81+
82+
func (e *entryNotFoundError) Error() string {
83+
if e.Err == nil {
84+
return "cache entry not found"
85+
}
86+
return fmt.Sprintf("cache entry not found: %v", e.Err)
87+
}
88+
89+
func (e *entryNotFoundError) Unwrap() error {
90+
return e.Err
91+
}
8192

8293
const (
8394
// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
@@ -96,6 +107,8 @@ const (
96107
// GODEBUG=gocacheverify=1.
97108
var verify = false
98109

110+
var errVerifyMode = errors.New("gocacheverify=1")
111+
99112
// DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
100113
var DebugTest = false
101114

@@ -124,7 +137,7 @@ func initEnv() {
124137
// saved file for that output ID is still available.
125138
func (c *Cache) Get(id ActionID) (Entry, error) {
126139
if verify {
127-
return Entry{}, errMissing
140+
return Entry{}, &entryNotFoundError{Err: errVerifyMode}
128141
}
129142
return c.get(id)
130143
}
@@ -137,52 +150,62 @@ type Entry struct {
137150

138151
// get is Get but does not respect verify mode, so that Put can use it.
139152
func (c *Cache) get(id ActionID) (Entry, error) {
140-
missing := func() (Entry, error) {
141-
fmt.Fprintf(c.log, "%d miss %x\n", c.now().Unix(), id)
142-
return Entry{}, errMissing
153+
missing := func(reason error) (Entry, error) {
154+
return Entry{}, &entryNotFoundError{Err: reason}
143155
}
144156
f, err := os.Open(c.fileName(id, "a"))
145157
if err != nil {
146-
return missing()
158+
return missing(err)
147159
}
148160
defer f.Close()
149161
entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
150-
if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF {
151-
return missing()
162+
if n, err := io.ReadFull(f, entry); n > entrySize {
163+
return missing(errors.New("too long"))
164+
} else if err != io.ErrUnexpectedEOF {
165+
if err == io.EOF {
166+
return missing(errors.New("file is empty"))
167+
}
168+
return missing(err)
169+
} else if n < entrySize {
170+
return missing(errors.New("entry file incomplete"))
152171
}
153172
if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
154-
return missing()
173+
return missing(errors.New("invalid header"))
155174
}
156175
eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
157176
eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
158177
esize, entry := entry[1:1+20], entry[1+20:]
159178
etime, entry := entry[1:1+20], entry[1+20:]
160179
var buf [HashSize]byte
161-
if _, err := hex.Decode(buf[:], eid); err != nil || buf != id {
162-
return missing()
180+
if _, err := hex.Decode(buf[:], eid); err != nil {
181+
return missing(fmt.Errorf("decoding ID: %v", err))
182+
} else if buf != id {
183+
return missing(errors.New("mismatched ID"))
163184
}
164185
if _, err := hex.Decode(buf[:], eout); err != nil {
165-
return missing()
186+
return missing(fmt.Errorf("decoding output ID: %v", err))
166187
}
167188
i := 0
168189
for i < len(esize) && esize[i] == ' ' {
169190
i++
170191
}
171192
size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
172-
if err != nil || size < 0 {
173-
return missing()
193+
if err != nil {
194+
return missing(fmt.Errorf("parsing size: %v", err))
195+
} else if size < 0 {
196+
return missing(errors.New("negative size"))
174197
}
175198
i = 0
176199
for i < len(etime) && etime[i] == ' ' {
177200
i++
178201
}
179202
tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
180-
if err != nil || size < 0 {
181-
return missing()
203+
if err != nil {
204+
return missing(fmt.Errorf("parsing timestamp: %v", err))
205+
} else if tm < 0 {
206+
return missing(errors.New("negative timestamp"))
182207
}
183208

184-
fmt.Fprintf(c.log, "%d get %x\n", c.now().Unix(), id)
185-
186209
c.used(c.fileName(id, "a"))
187210

188211
return Entry{buf, size, time.Unix(0, tm)}, nil
@@ -197,8 +220,11 @@ func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
197220
}
198221
file = c.OutputFile(entry.OutputID)
199222
info, err := os.Stat(file)
200-
if err != nil || info.Size() != entry.Size {
201-
return "", Entry{}, errMissing
223+
if err != nil {
224+
return "", Entry{}, &entryNotFoundError{Err: err}
225+
}
226+
if info.Size() != entry.Size {
227+
return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
202228
}
203229
return file, entry, nil
204230
}
@@ -211,13 +237,35 @@ func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
211237
if err != nil {
212238
return nil, entry, err
213239
}
214-
data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID))
240+
data, _ := os.ReadFile(c.OutputFile(entry.OutputID))
215241
if sha256.Sum256(data) != entry.OutputID {
216-
return nil, entry, errMissing
242+
return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")}
217243
}
218244
return data, entry, nil
219245
}
220246

247+
/*
248+
TODO: consider copying cmd/go/internal/mmap over for this method
249+
250+
// GetMmap looks up the action ID in the cache and returns
251+
// the corresponding output bytes.
252+
// GetMmap should only be used for data that can be expected to fit in memory.
253+
func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) {
254+
entry, err := c.Get(id)
255+
if err != nil {
256+
return nil, entry, err
257+
}
258+
md, err := mmap.Mmap(c.OutputFile(entry.OutputID))
259+
if err != nil {
260+
return nil, Entry{}, err
261+
}
262+
if int64(len(md.Data)) != entry.Size {
263+
return nil, Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
264+
}
265+
return md.Data, entry, nil
266+
}
267+
*/
268+
221269
// OutputFile returns the name of the cache file storing output with the given OutputID.
222270
func (c *Cache) OutputFile(out OutputID) string {
223271
file := c.fileName(out, "d")
@@ -261,16 +309,23 @@ func (c *Cache) used(file string) {
261309
}
262310

263311
// Trim removes old cache entries that are likely not to be reused.
264-
func (c *Cache) Trim() {
312+
func (c *Cache) Trim() error {
265313
now := c.now()
266314

267315
// We maintain in dir/trim.txt the time of the last completed cache trim.
268316
// If the cache has been trimmed recently enough, do nothing.
269317
// This is the common case.
270-
data, _ := ioutil.ReadFile(filepath.Join(c.dir, "trim.txt"))
271-
t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
272-
if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
273-
return
318+
// If the trim file is corrupt, detected if the file can't be parsed, or the
319+
// trim time is too far in the future, attempt the trim anyway. It's possible that
320+
// the cache was full when the corruption happened. Attempting a trim on
321+
// an empty cache is cheap, so there wouldn't be a big performance hit in that case.
322+
if data, err := lockedfile.Read(filepath.Join(c.dir, "trim.txt")); err == nil {
323+
if t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64); err == nil {
324+
lastTrim := time.Unix(t, 0)
325+
if d := now.Sub(lastTrim); d < trimInterval && d > -mtimeInterval {
326+
return nil
327+
}
328+
}
274329
}
275330

276331
// Trim each of the 256 subdirectories.
@@ -282,7 +337,15 @@ func (c *Cache) Trim() {
282337
c.trimSubdir(subdir, cutoff)
283338
}
284339

285-
ioutil.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0o666)
340+
// Ignore errors from here: if we don't write the complete timestamp, the
341+
// cache will appear older than it is, and we'll trim it again next time.
342+
var b bytes.Buffer
343+
fmt.Fprintf(&b, "%d", now.Unix())
344+
if err := lockedfile.Write(filepath.Join(c.dir, "trim.txt"), &b, 0666); err != nil {
345+
return err
346+
}
347+
348+
return nil
286349
}
287350

288351
// trimSubdir trims a single cache subdirectory.
@@ -326,7 +389,7 @@ func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify
326389
// in verify mode we are double-checking that the cache entries
327390
// are entirely reproducible. As just noted, this may be unrealistic
328391
// in some cases but the check is also useful for shaking out real bugs.
329-
entry := []byte(fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()))
392+
entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
330393
if verify && allowVerify {
331394
old, err := c.get(id)
332395
if err == nil && (old.OutputID != out || old.Size != size) {
@@ -336,13 +399,35 @@ func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify
336399
}
337400
}
338401
file := c.fileName(id, "a")
339-
if err := ioutil.WriteFile(file, entry, 0o666); err != nil {
402+
403+
// Copy file to cache directory.
404+
mode := os.O_WRONLY | os.O_CREATE
405+
f, err := os.OpenFile(file, mode, 0666)
406+
if err != nil {
407+
return err
408+
}
409+
_, err = f.WriteString(entry)
410+
if err == nil {
411+
// Truncate the file only *after* writing it.
412+
// (This should be a no-op, but truncate just in case of previous corruption.)
413+
//
414+
// This differs from os.WriteFile, which truncates to 0 *before* writing
415+
// via os.O_TRUNC. Truncating only after writing ensures that a second write
416+
// of the same content to the same file is idempotent, and does not — even
417+
// temporarily! — undo the effect of the first write.
418+
err = f.Truncate(int64(len(entry)))
419+
}
420+
if closeErr := f.Close(); err == nil {
421+
err = closeErr
422+
}
423+
if err != nil {
424+
// TODO(bcmills): This Remove potentially races with another go command writing to file.
425+
// Can we eliminate it?
340426
os.Remove(file)
341427
return err
342428
}
343429
os.Chtimes(file, c.now(), c.now()) // mainly for tests
344430

345-
fmt.Fprintf(c.log, "%d put %x %x %d\n", c.now().Unix(), id, out, size)
346431
return nil
347432
}
348433

@@ -413,7 +498,7 @@ func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
413498
if err == nil && info.Size() > size { // shouldn't happen but fix in case
414499
mode |= os.O_TRUNC
415500
}
416-
f, err := os.OpenFile(name, mode, 0o666)
501+
f, err := os.OpenFile(name, mode, 0666)
417502
if err != nil {
418503
return err
419504
}
@@ -471,3 +556,15 @@ func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
471556

472557
return nil
473558
}
559+
560+
// FuzzDir returns a subdirectory within the cache for storing fuzzing data.
561+
// The subdirectory may not exist.
562+
//
563+
// This directory is managed by the internal/fuzz package. Files in this
564+
// directory aren't removed by the 'go clean -cache' command or by Trim.
565+
// They may be removed with 'go clean -fuzzcache'.
566+
//
567+
// TODO(#48526): make Trim remove unused files from this directory.
568+
func (c *Cache) FuzzDir() string {
569+
return filepath.Join(c.dir, "fuzz")
570+
}

0 commit comments

Comments
 (0)