Skip to content

Commit 7cf25c6

Browse files
committed
Update registry cache to (ab)use Data field of Descriptor objects
This allows us to store full descriptors (necessary to implement `Resolve*`), but with a net decrease in the number of fields we have to juggle / keep in sync. This does mean consumers need to be careful about how they use the `Descriptor` objects we return (esp. WRT `Data`), but it makes it easier for them to then have `Data` available if they want it (which is something I'd like to use in the future). This is a net win anyhow because the upstream objects might've contained `Data` fields so this forces us to deal with them in a sane way we're comfortable with instead of potentially just including them verbatim unintentionally. 🚀
1 parent 7335921 commit 7cf25c6

File tree

3 files changed

+54
-46
lines changed

3 files changed

+54
-46
lines changed

registry/cache.go

Lines changed: 31 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ func RegistryCache(r ociregistry.Interface) ociregistry.Interface {
2121
registry: r, // TODO support "nil" here so this can be a poor-man's ocimem implementation? 👀 see also https://github.com/cue-labs/oci/issues/24
2222
has: map[string]bool{},
2323
tags: map[string]ociregistry.Digest{},
24-
types: map[ociregistry.Digest]string{},
25-
data: map[ociregistry.Digest][]byte{},
24+
data: map[ociregistry.Digest]ociregistry.Descriptor{},
2625
}
2726
}
2827

@@ -32,11 +31,10 @@ type registryCache struct {
3231
registry ociregistry.Interface
3332

3433
// https://github.com/cue-labs/oci/issues/24
35-
mu sync.Mutex // TODO some kind of per-object/name/digest mutex so we don't request the same object from the upstream registry concurrently (on *top* of our maps mutex)?
36-
has map[string]bool // "repo/name@digest" => true (whether a given repo has the given digest)
37-
tags map[string]ociregistry.Digest // "repo/name:tag" => digest
38-
types map[ociregistry.Digest]string // digest => "mediaType" (most recent *storing* / "cache-miss" lookup wins, in the case of upstream/cross-repo ambiguity)
39-
data map[ociregistry.Digest][]byte // digest => data
34+
mu sync.Mutex // TODO some kind of per-object/name/digest mutex so we don't request the same object from the upstream registry concurrently (on *top* of our maps mutex)?
35+
has map[string]bool // "repo/name@digest" => true (whether a given repo has the given digest)
36+
tags map[string]ociregistry.Digest // "repo/name:tag" => digest
37+
data map[ociregistry.Digest]ociregistry.Descriptor // digest => mediaType+size(+data) (most recent *storing* / "cache-miss" lookup wins, in the case of upstream/cross-repo ambiguity)
4038
}
4139

4240
func cacheKeyDigest(repo string, digest ociregistry.Digest) string {
@@ -52,41 +50,38 @@ func (rc *registryCache) getBlob(ctx context.Context, repo string, digest ocireg
5250
rc.mu.Lock()
5351
defer rc.mu.Unlock()
5452

55-
if b, ok := rc.data[digest]; ok && rc.has[cacheKeyDigest(repo, digest)] {
56-
return ocimem.NewBytesReader(b, ociregistry.Descriptor{
57-
MediaType: rc.types[digest],
58-
Digest: digest,
59-
Size: int64(len(b)),
60-
}), nil
53+
if desc, ok := rc.data[digest]; ok && desc.Data != nil && rc.has[cacheKeyDigest(repo, digest)] {
54+
return ocimem.NewBytesReader(desc.Data, desc), nil
6155
}
6256

6357
r, err := f(ctx, repo, digest)
6458
if err != nil {
6559
return nil, err
6660
}
67-
//defer r.Close()
61+
// defer r.Close() happens later when we know we aren't making Close the caller's responsibility
6862

6963
desc := r.Descriptor()
64+
digest = desc.Digest // if this isn't a no-op, we've got a naughty registry
7065

71-
rc.has[cacheKeyDigest(repo, desc.Digest)] = true
72-
rc.types[desc.Digest] = desc.MediaType
66+
rc.has[cacheKeyDigest(repo, digest)] = true
67+
68+
if desc.Size > manifestSizeLimit {
69+
rc.data[digest] = desc
70+
return r, nil
71+
}
72+
defer r.Close()
7373

74-
b, err := io.ReadAll(r)
74+
desc.Data, err = io.ReadAll(r)
7575
if err != nil {
76-
r.Close()
7776
return nil, err
7877
}
7978
if err := r.Close(); err != nil {
8079
return nil, err
8180
}
8281

83-
if len(b) <= manifestSizeLimit {
84-
rc.data[desc.Digest] = b
85-
} else {
86-
delete(rc.data, desc.Digest)
87-
}
82+
rc.data[digest] = desc
8883

89-
return ocimem.NewBytesReader(b, desc), nil
84+
return ocimem.NewBytesReader(desc.Data, desc), nil
9085
}
9186

9287
func (rc *registryCache) GetBlob(ctx context.Context, repo string, digest ociregistry.Digest) (ociregistry.BlobReader, error) {
@@ -104,43 +99,39 @@ func (rc *registryCache) GetTag(ctx context.Context, repo string, tag string) (o
10499
tagKey := cacheKeyTag(repo, tag)
105100

106101
if digest, ok := rc.tags[tagKey]; ok {
107-
if b, ok := rc.data[digest]; ok {
108-
return ocimem.NewBytesReader(b, ociregistry.Descriptor{
109-
MediaType: rc.types[digest],
110-
Digest: digest,
111-
Size: int64(len(b)),
112-
}), nil
102+
if desc, ok := rc.data[digest]; ok && desc.Data != nil {
103+
return ocimem.NewBytesReader(desc.Data, desc), nil
113104
}
114105
}
115106

116107
r, err := rc.registry.GetTag(ctx, repo, tag)
117108
if err != nil {
118109
return nil, err
119110
}
120-
//defer r.Close()
111+
// defer r.Close() happens later when we know we aren't making Close the caller's responsibility
121112

122113
desc := r.Descriptor()
123114

124115
rc.has[cacheKeyDigest(repo, desc.Digest)] = true
125116
rc.tags[tagKey] = desc.Digest
126-
rc.types[desc.Digest] = desc.MediaType
127117

128-
b, err := io.ReadAll(r)
118+
if desc.Size > manifestSizeLimit {
119+
rc.data[desc.Digest] = desc
120+
return r, nil
121+
}
122+
defer r.Close()
123+
124+
desc.Data, err = io.ReadAll(r)
129125
if err != nil {
130-
r.Close()
131126
return nil, err
132127
}
133128
if err := r.Close(); err != nil {
134129
return nil, err
135130
}
136131

137-
if len(b) <= manifestSizeLimit {
138-
rc.data[desc.Digest] = b
139-
} else {
140-
delete(rc.data, desc.Digest)
141-
}
132+
rc.data[desc.Digest] = desc
142133

143-
return ocimem.NewBytesReader(b, desc), nil
134+
return ocimem.NewBytesReader(desc.Data, desc), nil
144135
}
145136

146137
// TODO more methods (currently only implements what's actually necessary for SynthesizeIndex)

registry/read-helpers.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ func readJSONHelper(r ociregistry.BlobReader, v interface{}) error {
2020
return err
2121
}
2222

23+
// TODO if desc.Data != nil and len() == desc.Size, we should probably check/use that? 👀
24+
2325
// make sure we can't possibly read (much) more than we're supposed to
2426
limited := &io.LimitedReader{
2527
R: r,

registry/synthesize-index.go

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"github.com/docker-library/bashbrew/architecture"
1010

1111
"cuelabs.dev/go/oci/ociregistry"
12+
"cuelabs.dev/go/oci/ociregistry/ocimem"
1213
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
1314
)
1415

@@ -127,6 +128,11 @@ func SynthesizeIndex(ctx context.Context, ref Reference) (*ocispec.Index, error)
127128
}
128129
}
129130

131+
// TODO if m.Size > 2048 {
132+
// make sure we don't return any (big) data fields, now that we know we don't need them for sure (they might exist in the index we queried, but they're also used as an implementation detail in our registry cache code to store the original upstream data)
133+
m.Data = nil
134+
// }
135+
130136
index.Manifests[i] = m
131137
seen[string(m.Digest)] = &index.Manifests[i]
132138
i++
@@ -158,9 +164,13 @@ func normalizeManifestPlatform(ctx context.Context, m *ocispec.Descriptor, r oci
158164
case ocispec.MediaTypeImageManifest, mediaTypeDockerImageManifest:
159165
var err error
160166
if r == nil {
161-
r, err = client.GetManifest(ctx, ref.Repository, m.Digest)
162-
if err != nil {
163-
return err
167+
if m.Data != nil && int64(len(m.Data)) == m.Size {
168+
r = ocimem.NewBytesReader(m.Data, *m)
169+
} else {
170+
r, err = client.GetManifest(ctx, ref.Repository, m.Digest)
171+
if err != nil {
172+
return err
173+
}
164174
}
165175
defer r.Close()
166176
}
@@ -172,9 +182,14 @@ func normalizeManifestPlatform(ctx context.Context, m *ocispec.Descriptor, r oci
172182

173183
switch manifest.Config.MediaType {
174184
case ocispec.MediaTypeImageConfig, mediaTypeDockerImageConfig:
175-
r, err := client.GetBlob(ctx, ref.Repository, manifest.Config.Digest)
176-
if err != nil {
177-
return err
185+
var r ociregistry.BlobReader
186+
if manifest.Config.Data != nil && int64(len(manifest.Config.Data)) == manifest.Config.Size {
187+
r = ocimem.NewBytesReader(manifest.Config.Data, manifest.Config)
188+
} else {
189+
r, err = client.GetBlob(ctx, ref.Repository, manifest.Config.Digest)
190+
if err != nil {
191+
return err
192+
}
178193
}
179194
defer r.Close()
180195

0 commit comments

Comments
 (0)