Skip to content

Commit aa7346c

Browse files
authored
Refactor LFS GC functions
- Remove options that currently aren't set on `GarbageCollectLFSMetaObjectsOptions` and `IterateLFSMetaObjectsForRepoOptions`. - Simplify `IterateRepositoryIDsWithLFSMetaObjects` and `IterateLFSMetaObjectsForRepo`. - `IterateLFSMetaObjectsForRepo` was previously able to get in a loop (`gc-lfs` doctor check was able to reproduce this) because the code expected that the records would be updated to not match the SQL query, but that wasn't the case. Simply enforce that only records higher than the latest `id` from the previous iteration are allowed. - For `gc-lfs` doctor check this was because `UpdatedLessRecentlyThan` option was not set, which caused that records just marked as active in the iteration weren't being filtered. - Add unit tests - Most likely a regression from 2cc3a63. - The bug with `gc-lfs` was found on Codeberg. (cherry picked from commit 7ffa7f5)
1 parent 7e5fed7 commit aa7346c

File tree

5 files changed

+132
-56
lines changed

5 files changed

+132
-56
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-
2+
3+
id: 1000
4+
oid: 9d172e5c64b4f0024b9901ec6afe9ea052f3c9b6ff9f4b07956d8c48c86fca82
5+
size: 25
6+
repository_id: 1
7+
created_unix: 1712309123

models/git/lfs.go

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -337,58 +337,47 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
337337
func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
338338
batchSize := setting.Database.IterateBufferSize
339339
sess := db.GetEngine(ctx)
340-
id := int64(0)
340+
var start int
341341
type RepositoryCount struct {
342342
RepositoryID int64
343343
Count int64
344344
}
345345
for {
346346
counts := make([]*RepositoryCount, 0, batchSize)
347-
sess.Select("repository_id, COUNT(id) AS count").
347+
if err := sess.Select("repository_id, COUNT(id) AS count").
348348
Table("lfs_meta_object").
349-
Where("repository_id > ?", id).
350349
GroupBy("repository_id").
351-
OrderBy("repository_id ASC")
352-
353-
if err := sess.Limit(batchSize, 0).Find(&counts); err != nil {
350+
OrderBy("repository_id ASC").Limit(batchSize, start).Find(&counts); err != nil {
354351
return err
355352
}
356353
if len(counts) == 0 {
357354
return nil
358355
}
356+
start += len(counts)
359357

360358
for _, count := range counts {
361359
if err := f(ctx, count.RepositoryID, count.Count); err != nil {
362360
return err
363361
}
364362
}
365-
id = counts[len(counts)-1].RepositoryID
366363
}
367364
}
368365

369366
// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo
370367
type IterateLFSMetaObjectsForRepoOptions struct {
371-
OlderThan timeutil.TimeStamp
372-
UpdatedLessRecentlyThan timeutil.TimeStamp
373-
OrderByUpdated bool
374-
LoopFunctionAlwaysUpdates bool
368+
OlderThan timeutil.TimeStamp
369+
UpdatedLessRecentlyThan timeutil.TimeStamp
375370
}
376371

377372
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
378-
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
379-
var start int
373+
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
380374
batchSize := setting.Database.IterateBufferSize
381375
engine := db.GetEngine(ctx)
382-
type CountLFSMetaObject struct {
383-
Count int64
384-
LFSMetaObject `xorm:"extends"`
385-
}
386-
387376
id := int64(0)
388377

389378
for {
390-
beans := make([]*CountLFSMetaObject, 0, batchSize)
391-
sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
379+
beans := make([]*LFSMetaObject, 0, batchSize)
380+
sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*").
392381
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
393382
Where("`lfs_meta_object`.repository_id = ?", repoID)
394383
if !opts.OlderThan.IsZero() {
@@ -397,25 +386,19 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
397386
if !opts.UpdatedLessRecentlyThan.IsZero() {
398387
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
399388
}
400-
sess.GroupBy("`lfs_meta_object`.id")
401-
if opts.OrderByUpdated {
402-
sess.OrderBy("`lfs_meta_object`.updated_unix ASC")
403-
} else {
404-
sess.And("`lfs_meta_object`.id > ?", id)
405-
sess.OrderBy("`lfs_meta_object`.id ASC")
406-
}
407-
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
389+
sess.GroupBy("`lfs_meta_object`.id").
390+
And("`lfs_meta_object`.id > ?", id).
391+
OrderBy("`lfs_meta_object`.id ASC")
392+
393+
if err := sess.Limit(batchSize, 0).Find(&beans); err != nil {
408394
return err
409395
}
410396
if len(beans) == 0 {
411397
return nil
412398
}
413-
if !opts.LoopFunctionAlwaysUpdates {
414-
start += len(beans)
415-
}
416399

417400
for _, bean := range beans {
418-
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
401+
if err := f(ctx, bean); err != nil {
419402
return err
420403
}
421404
}

models/git/lfs_test.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright 2024 The Forgejo Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package git
5+
6+
import (
7+
"context"
8+
"path/filepath"
9+
"testing"
10+
11+
"code.gitea.io/gitea/models/db"
12+
"code.gitea.io/gitea/models/unittest"
13+
"code.gitea.io/gitea/modules/setting"
14+
"code.gitea.io/gitea/modules/test"
15+
16+
"github.com/stretchr/testify/assert"
17+
)
18+
19+
func TestIterateRepositoryIDsWithLFSMetaObjects(t *testing.T) {
20+
defer unittest.OverrideFixtures(
21+
unittest.FixturesOptions{
22+
Dir: filepath.Join(setting.AppWorkPath, "models/fixtures/"),
23+
Base: setting.AppWorkPath,
24+
Dirs: []string{"models/git/TestIterateRepositoryIDsWithLFSMetaObjects/"},
25+
},
26+
)()
27+
assert.NoError(t, unittest.PrepareTestDatabase())
28+
29+
type repocount struct {
30+
repoid int64
31+
count int64
32+
}
33+
expected := []repocount{{1, 1}, {54, 4}}
34+
35+
t.Run("Normal batch size", func(t *testing.T) {
36+
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)()
37+
cases := []repocount{}
38+
39+
err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error {
40+
cases = append(cases, repocount{repoID, count})
41+
return nil
42+
})
43+
assert.NoError(t, err)
44+
assert.EqualValues(t, expected, cases)
45+
})
46+
47+
t.Run("Low batch size", func(t *testing.T) {
48+
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
49+
cases := []repocount{}
50+
51+
err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error {
52+
cases = append(cases, repocount{repoID, count})
53+
return nil
54+
})
55+
assert.NoError(t, err)
56+
assert.EqualValues(t, expected, cases)
57+
})
58+
}
59+
60+
func TestIterateLFSMetaObjectsForRepo(t *testing.T) {
61+
assert.NoError(t, unittest.PrepareTestDatabase())
62+
63+
expectedIDs := []int64{1, 2, 3, 4}
64+
65+
t.Run("Normal batch size", func(t *testing.T) {
66+
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)()
67+
actualIDs := []int64{}
68+
69+
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
70+
actualIDs = append(actualIDs, lo.ID)
71+
return nil
72+
}, &IterateLFSMetaObjectsForRepoOptions{})
73+
assert.NoError(t, err)
74+
assert.EqualValues(t, expectedIDs, actualIDs)
75+
})
76+
77+
t.Run("Low batch size", func(t *testing.T) {
78+
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
79+
actualIDs := []int64{}
80+
81+
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
82+
actualIDs = append(actualIDs, lo.ID)
83+
return nil
84+
}, &IterateLFSMetaObjectsForRepoOptions{})
85+
assert.NoError(t, err)
86+
assert.EqualValues(t, expectedIDs, actualIDs)
87+
88+
t.Run("Batch handles updates", func(t *testing.T) {
89+
actualIDs := []int64{}
90+
91+
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
92+
actualIDs = append(actualIDs, lo.ID)
93+
_, err := db.DeleteByID[LFSMetaObject](ctx, lo.ID)
94+
assert.NoError(t, err)
95+
return nil
96+
}, &IterateLFSMetaObjectsForRepoOptions{})
97+
assert.NoError(t, err)
98+
assert.EqualValues(t, expectedIDs, actualIDs)
99+
})
100+
})
101+
}

services/doctor/lfs.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool
4444
OlderThan: time.Now().Add(-24 * time.Hour * 7),
4545
// We don't set the UpdatedLessRecentlyThan because we want to do a full GC
4646
}); err != nil {
47+
logger.Error("Couldn't garabage collect LFS objects: %v", err)
4748
return err
4849
}
4950

services/repository/lfs.go

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ package repository
55

66
import (
77
"context"
8-
"errors"
98
"fmt"
109
"time"
1110

@@ -21,12 +20,10 @@ import (
2120

2221
// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
2322
type GarbageCollectLFSMetaObjectsOptions struct {
24-
LogDetail func(format string, v ...any)
25-
AutoFix bool
26-
OlderThan time.Time
27-
UpdatedLessRecentlyThan time.Time
28-
NumberToCheckPerRepo int64
29-
ProportionToCheckPerRepo float64
23+
LogDetail func(format string, v ...any)
24+
AutoFix bool
25+
OlderThan time.Time
26+
UpdatedLessRecentlyThan time.Time
3027
}
3128

3229
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
@@ -49,9 +46,6 @@ func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMet
4946
return err
5047
}
5148

52-
if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
53-
opts.NumberToCheckPerRepo = newMinimum
54-
}
5549
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
5650
})
5751
}
@@ -78,13 +72,9 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
7872
defer gitRepo.Close()
7973

8074
store := lfs.NewContentStore()
81-
errStop := errors.New("STOPERR")
8275
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
8376

84-
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
85-
if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
86-
return errStop
87-
}
77+
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject) error {
8878
total++
8979
pointerSha := git.ComputeBlobHash(objectFormat, []byte(metaObject.Pointer.StringContent()))
9080

@@ -123,16 +113,10 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
123113
//
124114
// It is likely that a week is potentially excessive but it should definitely be enough that any
125115
// unassociated LFS object is genuinely unassociated.
126-
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
127-
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
128-
OrderByUpdated: true,
129-
LoopFunctionAlwaysUpdates: true,
116+
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
117+
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
130118
})
131-
132-
if err == errStop {
133-
opts.LogDetail("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
134-
return nil
135-
} else if err != nil {
119+
if err != nil {
136120
return err
137121
}
138122
return nil

0 commit comments

Comments
 (0)