Skip to content

Commit c8a5ab5

Browse files
retlehsclaude
andauthored
Fix SVN discovery never detecting package changes (#6)
* Fix SVN discovery never detecting package changes SVN discovery parsed plugin/theme slugs from the HTML listing but never extracted timestamps (LastCommitted was always nil). After the initial sync set last_synced_at on all packages, the update query `last_committed > last_synced_at` could never match — so no package was ever re-synced, resulting in perpetual "Changed: 0" builds with stale metadata. Fix by tracking SVN revision numbers and using the DAV REPORT endpoint to fetch the changelog between runs: 1. Parse the current SVN revision from the listing page header 2. Store last-seen revision per type in a new site_meta table 3. On each run, fetch the SVN log (REPORT) for revisions since last run 4. Extract changed slugs from the log and mark their last_committed 5. The update step then picks up only packages that actually changed Also includes: - Backfill migration setting last_committed = last_synced_at for existing rows with NULL last_committed - 600s timeout for DAV REPORT requests (catch-up runs can be large) - Skip changelog marking when --limit is set (test/partial runs) - Error on malformed stored revision instead of silent fallback Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Fix gofmt struct field alignment in svn.go Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent dc1286a commit c8a5ab5

File tree

6 files changed

+405
-19
lines changed

6 files changed

+405
-19
lines changed

cmd/wpcomposer/cmd/discover.go

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package cmd
33
import (
44
"context"
55
"fmt"
6+
"strconv"
67
"sync/atomic"
78

89
"github.com/spf13/cobra"
@@ -119,14 +120,23 @@ func discoverFromSVN(ctx context.Context, pkgType string, limit int) error {
119120
type svnSource struct {
120121
url string
121122
pkgType string
123+
metaKey string // site_meta key for storing last-seen SVN revision
122124
}
123125

124126
var sources []svnSource
125127
if pkgType == "all" || pkgType == "plugin" {
126-
sources = append(sources, svnSource{url: "https://plugins.svn.wordpress.org/", pkgType: "plugin"})
128+
sources = append(sources, svnSource{
129+
url: "https://plugins.svn.wordpress.org/",
130+
pkgType: "plugin",
131+
metaKey: "svn_revision_plugin",
132+
})
127133
}
128134
if pkgType == "all" || pkgType == "theme" {
129-
sources = append(sources, svnSource{url: "https://themes.svn.wordpress.org/", pkgType: "theme"})
135+
sources = append(sources, svnSource{
136+
url: "https://themes.svn.wordpress.org/",
137+
pkgType: "theme",
138+
metaKey: "svn_revision_theme",
139+
})
130140
}
131141

132142
var totalCount, totalFailed int
@@ -153,7 +163,7 @@ func discoverFromSVN(ctx context.Context, pkgType string, limit int) error {
153163
batch = batch[:0]
154164
}
155165

156-
err := client.ParseSVNListing(ctx, src.url, func(entry wporg.SVNEntry) error {
166+
result, err := client.ParseSVNListing(ctx, src.url, func(entry wporg.SVNEntry) error {
157167
if limit > 0 && totalCount >= limit {
158168
return errLimitReached
159169
}
@@ -178,6 +188,16 @@ func discoverFromSVN(ctx context.Context, pkgType string, limit int) error {
178188
return fmt.Errorf("SVN discovery for %s: %w", src.pkgType, err)
179189
}
180190

191+
// Use SVN revision log to find which packages changed since last run.
192+
// Skip when --limit is set (partial/test runs shouldn't mutate global
193+
// change state or trigger large update workloads).
194+
if limit == 0 && result != nil && result.Revision > 0 {
195+
if markErr := markChangedFromSVNLog(ctx, client, src, result.Revision); markErr != nil {
196+
application.Logger.Warn("SVN changelog fetch failed, skipping change detection",
197+
"type", src.pkgType, "error", markErr)
198+
}
199+
}
200+
181201
totalFailed += failed
182202
application.Logger.Info("SVN discovery done", "type", src.pkgType, "succeeded", count, "failed", failed)
183203

@@ -196,6 +216,58 @@ func discoverFromSVN(ctx context.Context, pkgType string, limit int) error {
196216
return nil
197217
}
198218

219+
// markChangedFromSVNLog fetches the SVN log between the last-seen revision and
220+
// the current revision, extracts which slugs changed, and marks them in the DB
221+
// so they'll be picked up by the update step.
222+
func markChangedFromSVNLog(ctx context.Context, client *wporg.Client, src struct {
223+
url string
224+
pkgType string
225+
metaKey string
226+
}, currentRev int64) error {
227+
lastRevStr, err := packages.GetMeta(ctx, application.DB, src.metaKey)
228+
if err != nil {
229+
return fmt.Errorf("reading last revision: %w", err)
230+
}
231+
232+
var lastRev int64
233+
if lastRevStr != "" {
234+
var parseErr error
235+
lastRev, parseErr = strconv.ParseInt(lastRevStr, 10, 64)
236+
if parseErr != nil {
237+
return fmt.Errorf("malformed stored revision %q for %s: %w", lastRevStr, src.metaKey, parseErr)
238+
}
239+
}
240+
241+
if lastRev > 0 && lastRev < currentRev {
242+
application.Logger.Info("fetching SVN changelog",
243+
"type", src.pkgType, "from_rev", lastRev, "to_rev", currentRev)
244+
245+
slugs, err := client.FetchSVNChangedSlugs(ctx, src.url, lastRev+1, currentRev)
246+
if err != nil {
247+
return err
248+
}
249+
250+
if len(slugs) > 0 {
251+
affected, err := packages.MarkPackagesChanged(ctx, application.DB, src.pkgType, slugs)
252+
if err != nil {
253+
return fmt.Errorf("marking changed packages: %w", err)
254+
}
255+
application.Logger.Info("marked changed packages from SVN log",
256+
"type", src.pkgType, "slugs_in_log", len(slugs), "packages_marked", affected)
257+
}
258+
} else if lastRev == 0 {
259+
application.Logger.Info("no previous SVN revision stored, skipping changelog (first run)",
260+
"type", src.pkgType, "current_rev", currentRev)
261+
}
262+
263+
// Store current revision for next run.
264+
if err := packages.SetMeta(ctx, application.DB, src.metaKey, strconv.FormatInt(currentRev, 10)); err != nil {
265+
return fmt.Errorf("storing revision: %w", err)
266+
}
267+
268+
return nil
269+
}
270+
199271
var errLimitReached = fmt.Errorf("limit reached")
200272

201273
func init() {

internal/packages/package.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,48 @@ func RefreshSiteStats(ctx context.Context, db *sql.DB) error {
356356
return nil
357357
}
358358

359+
// MarkPackagesChanged sets last_committed = now for the given slugs of a
360+
// specific type, so they'll be picked up by GetPackagesNeedingUpdate.
361+
func MarkPackagesChanged(ctx context.Context, db *sql.DB, pkgType string, slugs []string) (int64, error) {
362+
if len(slugs) == 0 {
363+
return 0, nil
364+
}
365+
366+
now := time.Now().UTC().Format(time.RFC3339)
367+
368+
tx, err := db.BeginTx(ctx, nil)
369+
if err != nil {
370+
return 0, fmt.Errorf("beginning transaction: %w", err)
371+
}
372+
defer func() { _ = tx.Rollback() }()
373+
374+
stmt, err := tx.PrepareContext(ctx, `
375+
UPDATE packages
376+
SET last_committed = ?, provider_group = ?, updated_at = ?
377+
WHERE type = ? AND name = ? AND is_active = 1`)
378+
if err != nil {
379+
return 0, fmt.Errorf("preparing statement: %w", err)
380+
}
381+
defer func() { _ = stmt.Close() }()
382+
383+
t, _ := time.Parse(time.RFC3339, now)
384+
pg := ComputeProviderGroup(&t)
385+
var affected int64
386+
for _, slug := range slugs {
387+
res, err := stmt.ExecContext(ctx, now, pg, now, pkgType, slug)
388+
if err != nil {
389+
return affected, fmt.Errorf("marking package %s/%s changed: %w", pkgType, slug, err)
390+
}
391+
n, _ := res.RowsAffected()
392+
affected += n
393+
}
394+
395+
if err := tx.Commit(); err != nil {
396+
return 0, fmt.Errorf("committing: %w", err)
397+
}
398+
return affected, nil
399+
}
400+
359401
func boolToInt(b bool) int {
360402
if b {
361403
return 1

internal/packages/site_meta.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package packages
2+
3+
import (
4+
"context"
5+
"database/sql"
6+
"fmt"
7+
)
8+
9+
// GetMeta retrieves a value from site_meta by key. Returns "" if not found.
10+
func GetMeta(ctx context.Context, db *sql.DB, key string) (string, error) {
11+
var value string
12+
err := db.QueryRowContext(ctx, `SELECT value FROM site_meta WHERE key = ?`, key).Scan(&value)
13+
if err == sql.ErrNoRows {
14+
return "", nil
15+
}
16+
if err != nil {
17+
return "", fmt.Errorf("getting site_meta %q: %w", key, err)
18+
}
19+
return value, nil
20+
}
21+
22+
// SetMeta inserts or updates a value in site_meta.
23+
func SetMeta(ctx context.Context, db *sql.DB, key, value string) error {
24+
_, err := db.ExecContext(ctx, `
25+
INSERT INTO site_meta (key, value) VALUES (?, ?)
26+
ON CONFLICT(key) DO UPDATE SET value = excluded.value`,
27+
key, value)
28+
if err != nil {
29+
return fmt.Errorf("setting site_meta %q: %w", key, err)
30+
}
31+
return nil
32+
}

0 commit comments

Comments
 (0)