Skip to content

Commit c35143a

Browse files
retlehsclaude
andauthored
Shared content-addressed store for R2 deploys (#13)
* Use shared content-addressed store for R2 deploys Content-addressed p/ files are now stored once in a shared top-level prefix instead of being copied into every release. Deploys diff against the previous build locally to identify new files, reducing R2 operations from ~140k to ~2-5k per deploy. - SyncToR2 partitions files into shared (p/$hash) and per-release (p2/) - Local diff skips unchanged shared files (zero R2 ops) - Migration safety: reads R2 root to detect old layout, forces full upload when shared prefix doesn't exist yet - Skip logic requires r2_synced_at on previous build to avoid skipping files from builds that were never synced - RewritePackagesJSON only prefixes metadata-url; providers-url and provider-includes point at shared p/ prefix - Cleanup preserves shared p/ files (GC deferred to follow-up) - Removes CopyObject logic (no longer needed) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Fix gofmt alignment in r2_test.go Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent dfe2f89 commit c35143a

File tree

4 files changed

+241
-116
lines changed

4 files changed

+241
-116
lines changed

cmd/wpcomposer/cmd/deploy.go

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ func secondsPtrSince(start time.Time) *int {
2525
return &v
2626
}
2727

28-
func syncToR2Timed(cmd *cobra.Command, buildDir, buildID, previousBuildID string) error {
28+
func syncToR2Timed(cmd *cobra.Command, buildDir, buildID, previousBuildDir string) error {
2929
started := time.Now()
30-
err := deploy.SyncToR2(cmd.Context(), application.Config.R2, buildDir, buildID, previousBuildID, application.Logger)
30+
err := deploy.SyncToR2(cmd.Context(), application.Config.R2, buildDir, buildID, previousBuildDir, application.Logger)
3131
deployR2SyncSeconds = secondsPtrSince(started)
3232
if err != nil {
3333
return fmt.Errorf("R2 sync failed: %w", err)
@@ -42,7 +42,7 @@ func runDeploy(cmd *cobra.Command, args []string) error {
4242
repoDir := filepath.Join("storage", "repository")
4343
cleanup, _ := cmd.Flags().GetBool("cleanup")
4444
toR2, _ := cmd.Flags().GetBool("to-r2")
45-
previousBuildID, _ := deploy.CurrentBuildID(repoDir)
45+
previousBuildDir := previousBuildDirFor(repoDir)
4646

4747
r2Cleanup, _ := cmd.Flags().GetBool("r2-cleanup")
4848
retainCount, _ := cmd.Flags().GetInt("retain")
@@ -99,7 +99,7 @@ func runDeploy(cmd *cobra.Command, args []string) error {
9999

100100
// Sync to R2 first, then promote locally
101101
if toR2 || application.Config.R2.Enabled {
102-
if err := syncToR2Timed(cmd, buildDir, target, previousBuildID); err != nil {
102+
if err := syncToR2Timed(cmd, buildDir, target, previousBuildDir); err != nil {
103103
return err
104104
}
105105
}
@@ -130,7 +130,7 @@ func runDeploy(cmd *cobra.Command, args []string) error {
130130

131131
// Sync to R2 first, then promote locally
132132
if toR2 || application.Config.R2.Enabled {
133-
if err := syncToR2Timed(cmd, buildDir, buildID, previousBuildID); err != nil {
133+
if err := syncToR2Timed(cmd, buildDir, buildID, previousBuildDir); err != nil {
134134
return err
135135
}
136136
}
@@ -142,6 +142,25 @@ func runDeploy(cmd *cobra.Command, args []string) error {
142142
return nil
143143
}
144144

145+
// previousBuildDirFor returns the build directory for the currently promoted
146+
// build, but only if that build was previously synced to R2 (has r2_synced_at).
147+
// Returns "" if no build is promoted or the promoted build was never R2-synced.
148+
// This prevents skipping shared p/ uploads for builds that only exist locally.
149+
func previousBuildDirFor(repoDir string) string {
150+
id, _ := deploy.CurrentBuildID(repoDir)
151+
if id == "" {
152+
return ""
153+
}
154+
var synced *string
155+
err := application.DB.QueryRow(
156+
`SELECT r2_synced_at FROM builds WHERE id = ?`, id,
157+
).Scan(&synced)
158+
if err != nil || synced == nil {
159+
return ""
160+
}
161+
return deploy.BuildDirFromID(repoDir, id)
162+
}
163+
145164
func recordR2Sync(cmd *cobra.Command, buildID string) {
146165
now := time.Now().UTC().Format(time.RFC3339)
147166
_, err := application.DB.ExecContext(cmd.Context(),

docs/r2-deployment.md

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,22 @@ WP Composer deploys built repository artifacts to Cloudflare R2 for serving via
44

55
## Versioned Deploy Model
66

7-
Each deploy uploads all files to an immutable release prefix (`releases/<build-id>/`). The only mutable object in the bucket is the root `packages.json`, which acts as an atomic pointer to the current release.
7+
Content-addressed `p/` files (containing `$` in the filename) are stored once in a shared top-level prefix and never re-uploaded. Per-release files (`p2/`, `packages.json`, `manifest.json`) go under an immutable release prefix. The only mutable object in the bucket is the root `packages.json`, which acts as an atomic pointer to the current release.
88

99
```
1010
packages.json ← atomic pointer (only mutable object)
11-
releases/20260314-150405/packages.json ← snapshot reference (immutable)
12-
releases/20260314-150405/manifest.json
13-
releases/20260314-150405/p/wp-plugin/akismet$abc.json
14-
releases/20260314-150405/p2/wp-plugin/akismet.json ← immutable within release
11+
p/wp-plugin/akismet$abc123.json ← shared, uploaded once ever (immutable)
12+
p/providers-week$def456.json ← shared, uploaded once ever (immutable)
13+
releases/20260314-150405/packages.json ← per-release snapshot (immutable)
14+
releases/20260314-150405/manifest.json ← per-release (immutable)
15+
releases/20260314-150405/p2/wp-plugin/akismet.json ← per-release (immutable)
1516
```
1617

17-
The root `packages.json` is rewritten on each deploy so that `metadata-url`, `providers-url`, and `provider-includes` keys point into the new release prefix. This single PUT is the atomic switch — clients either see the old release or the new one, never a mix.
18+
Since ~95% of files are content-addressed and identical across releases, the deploy diffs the current build against the previous build locally and only uploads new shared files. This reduces R2 operations from ~140k to ~2-5k (only new `p/` files + all `p2/` files + indexes), with zero R2 calls for unchanged files.
19+
20+
On the first deploy with this layout (upgrading from the old release-prefixed model, or no previous build), all shared `p/` files are uploaded. The deploy detects this by reading the current root `packages.json` from R2 — if `providers-url` still points into `releases/`, the shared prefix doesn't exist yet and the local diff is bypassed. Subsequent deploys benefit from the local diff immediately.
21+
22+
The root `packages.json` is rewritten on each deploy so that `metadata-url` points into the new release prefix. `providers-url` and `provider-includes` point at the shared top-level `p/` prefix. This single PUT is the atomic switch — clients either see the old release or the new one, never a mix.
1823

1924
## Prerequisites
2025

@@ -63,8 +68,8 @@ Find your account ID in the Cloudflare dashboard under **R2 > Overview**.
6368
When deploying to R2 (`wpcomposer deploy --to-r2`):
6469

6570
1. Validates the build (packages.json and manifest.json must exist).
66-
2. Uploads all files under `releases/<build-id>/` with appropriate `Cache-Control` headers. Provider and package files upload first; `packages.json` uploads last within the release prefix. Each upload retries up to 3 times with exponential backoff.
67-
3. Rewrites `packages.json` URL templates to point at the new release prefix.
71+
2. Diffs shared `p/` files against the previous build directory to find new content-addressed files. Uploads only new `p/` files to the shared top-level prefix (zero R2 ops for unchanged files). Uploads per-release files (`p2/`, indexes) under `releases/<build-id>/`. Each upload retries up to 3 times with exponential backoff.
72+
3. Rewrites `packages.json`: `metadata-url` points into the release prefix; `providers-url` and `provider-includes` point at the shared `p/` prefix.
6873
4. Uploads the rewritten `packages.json` as the root — the atomic switch.
6974
5. Promotes the local build symlink (for rollback capability).
7075

@@ -80,22 +85,15 @@ When using a Cloudflare custom domain on the R2 bucket, cache behavior is contro
8085
|---|---|---|
8186
| `packages.json` (root) | `max-age=300` | Atomic pointer, only mutable object |
8287
| `releases/*` (everything) | `max-age=31536000, immutable` | Entire release prefix is immutable |
83-
84-
Legacy flat-path cases (backward compat during transition):
85-
86-
| Path pattern | Cache-Control | Rationale |
87-
|---|---|---|
88-
| `manifest.json` | `max-age=300` | Build metadata |
89-
| `p/*$hash.json` | `max-age=31536000, immutable` | Content-addressed, never changes |
90-
| `p2/*.json` | `max-age=300` | No hash in URL, must revalidate |
88+
| `p/*$hash.json` (shared) | `max-age=31536000, immutable` | Content-addressed, never changes |
9189

9290
## URL Requirements
9391

9492
The generated root `packages.json` on R2 contains prefixed URLs pointing into the current release:
9593

96-
- `metadata-url`: `/releases/<build-id>/p2/%package%.json`
97-
- `providers-url`: `/releases/<build-id>/p/%package%$%hash%.json`
98-
- `provider-includes` keys: `releases/<build-id>/p/providers-*$hash.json`
94+
- `metadata-url`: `/releases/<build-id>/p2/%package%.json` (per-release)
95+
- `providers-url`: `/p/%package%$%hash%.json` (shared, not prefixed)
96+
- `provider-includes` keys: `p/providers-*$hash.json` (shared, not prefixed)
9997
- `notify-batch`: absolute URL pointing to the **app domain** (not R2, not rewritten)
10098

10199
## AWS CLI Setup (Manual Operations)
@@ -138,7 +136,7 @@ wpcomposer deploy --cleanup --r2-cleanup
138136
wpcomposer deploy --cleanup --r2-cleanup --grace-hours 6
139137
```
140138

141-
`--r2-cleanup` is required — plain `--cleanup` only removes local build directories. The cleanup reads R2 state directly (no local filesystem dependency), identifies release prefixes, and deletes those outside the keep set. It also deletes legacy flat files (anything not under `releases/` except root `packages.json`).
139+
`--r2-cleanup` is required — plain `--cleanup` only removes local build directories. The cleanup reads R2 state directly (no local filesystem dependency), identifies release prefixes, and deletes those outside the keep set. It also deletes legacy flat files (anything not under `releases/` except root `packages.json` and shared content-addressed `p/` files). Shared `p/` files are preserved — GC of orphaned shared files is deferred to a future release.
142140

143141
The keep set is: live release (from root `packages.json`) + releases within `--grace-hours` + top `--retain` most recent. The retain count has a hard minimum of 5 — even if `--retain` is set lower, at least 5 recent releases are always preserved.
144142

0 commit comments

Comments
 (0)