Skip to content

Commit b0dcc10

Browse files
committed
Implement more fine-grained mutexes in registryCache
In short, this keeps the "data mutex" as-is, but only acquires it when we have some data to shove in (and only for the limited instructions necessary to update the data), and adds a new mutex per-tag or per-digest to prevent concurrent requests for the exact same upstream content (which is the main reason for this cache in the first place, so feels appropriate). Without this, our current mutex means that any other efforts to parallelize will ultimately bottleneck on our registry cache. In order to test this effectively, I've added a `--parallel` flag to `lookup` which just hyper-aggressively runs every single lookup in parallel inside a goroutine. My first test of this was doing a lookup of the first tag of every DOI repository (so ~147 concurrent lookups in total), and I found it was consistently ~1m57s both with and without this change. Our Hub rate limiter is pegged at ~100/min, which seems consistent with that result. If I increase that limit to ~200/min, I was able to achieve a small speedup with this change (~43s down to ~30s). In other words, for this to actually be effective as a speedup against Docker Hub if we implement parallel deploy, for example, we'll *also* have to increase our rate limit (which I think is fairly safe now that we handle 429 by explicitly emptying the limiter). In order to *really* test this effectively, I took a different approach. I spun up a local registry (`docker run -dit -p 127.0.0.1:5000:5000 -p [::1]:5000:5000 --name registry registry`), and copied `hello-world:linux` into it 10,000 times (something like `crane cp hello-world:linux localhost:5000/hello && echo localhost:5000/hello:{1..10000} | xargs -rtn1 -P$(nproc) crane cp localhost:5000/hello` -- could also be done with `jq` and `deploy` if you are ambitious 👀). Then I used `time ./bin/lookup --parallel $(crane ls --full-ref localhost:5000/hello) > /dev/null` to establish some benchmarks. Without this change, it's pretty consistently in the 15-20s range on my local system. With this change, it drops down an order of magnitude to be in the 3-6s range.
1 parent c8b6c1a commit b0dcc10

File tree

2 files changed

+192
-76
lines changed

2 files changed

+192
-76
lines changed

cmd/lookup/main.go

Lines changed: 65 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"io"
99
"os"
1010
"os/signal"
11+
"sync"
1112

1213
"github.com/docker-library/meta-scripts/registry"
1314
)
@@ -22,6 +23,16 @@ func main() {
2223
)
2324

2425
args := os.Args[1:]
26+
27+
var (
28+
parallel = false
29+
wg sync.WaitGroup
30+
)
31+
if len(args) > 0 && args[0] == "--parallel" {
32+
args = args[1:]
33+
parallel = true
34+
}
35+
2536
for len(args) > 0 {
2637
img := args[0]
2738
args = args[1:]
@@ -35,53 +46,66 @@ func main() {
3546
continue
3647
}
3748

38-
ref, err := registry.ParseRef(img)
39-
if err != nil {
40-
panic(err)
41-
}
42-
43-
var obj any
44-
if opts == zeroOpts {
45-
// if we have no explicit type and didn't request a HEAD, invoke SynthesizeIndex instead of Lookup
46-
obj, err = registry.SynthesizeIndex(ctx, ref)
47-
if err != nil {
48-
panic(err)
49-
}
50-
} else {
51-
r, err := registry.Lookup(ctx, ref, &opts)
49+
do := func(opts registry.LookupOptions) {
50+
ref, err := registry.ParseRef(img)
5251
if err != nil {
5352
panic(err)
5453
}
55-
if r != nil {
56-
desc := r.Descriptor()
57-
if opts.Head {
58-
obj = desc
59-
} else {
60-
b, err := io.ReadAll(r)
61-
if err != nil {
62-
r.Close()
63-
panic(err)
64-
}
65-
if opts.Type == registry.LookupTypeManifest {
66-
// if it was a manifest lookup, cast the byte slice to json.RawMessage so we get the actual JSON (not base64)
67-
obj = json.RawMessage(b)
68-
} else {
69-
obj = b
70-
}
71-
}
72-
err = r.Close()
54+
55+
var obj any
56+
if opts == zeroOpts {
57+
// if we have no explicit type and didn't request a HEAD, invoke SynthesizeIndex instead of Lookup
58+
obj, err = registry.SynthesizeIndex(ctx, ref)
7359
if err != nil {
7460
panic(err)
7561
}
7662
} else {
77-
obj = nil
63+
r, err := registry.Lookup(ctx, ref, &opts)
64+
if err != nil {
65+
panic(err)
66+
}
67+
if r != nil {
68+
desc := r.Descriptor()
69+
if opts.Head {
70+
obj = desc
71+
} else {
72+
b, err := io.ReadAll(r)
73+
if err != nil {
74+
r.Close()
75+
panic(err)
76+
}
77+
if opts.Type == registry.LookupTypeManifest {
78+
// if it was a manifest lookup, cast the byte slice to json.RawMessage so we get the actual JSON (not base64)
79+
obj = json.RawMessage(b)
80+
} else {
81+
obj = b
82+
}
83+
}
84+
err = r.Close()
85+
if err != nil {
86+
panic(err)
87+
}
88+
} else {
89+
obj = nil
90+
}
91+
}
92+
93+
e := json.NewEncoder(os.Stdout)
94+
e.SetIndent("", "\t")
95+
if err := e.Encode(obj); err != nil {
96+
panic(err)
7897
}
7998
}
8099

81-
e := json.NewEncoder(os.Stdout)
82-
e.SetIndent("", "\t")
83-
if err := e.Encode(obj); err != nil {
84-
panic(err)
100+
if parallel {
101+
wg.Add(1)
102+
go func(opts registry.LookupOptions) {
103+
defer wg.Done()
104+
// TODO synchronize output so that it still arrives in-order? maybe the randomness is part of the charm?
105+
do(opts)
106+
}(opts)
107+
} else {
108+
do(opts)
85109
}
86110

87111
// reset state
@@ -91,4 +115,8 @@ func main() {
91115
if opts != zeroOpts {
92116
panic("dangling --type, --head, etc (without a following reference for it to apply to)")
93117
}
118+
119+
if parallel {
120+
wg.Wait()
121+
}
94122
}

0 commit comments

Comments
 (0)