Skip to content

Commit d2a7905

Browse files
author
Gusted
committed
Merge pull request '[v7.0/forgejo] Refactor LFS GC functions' (go-gitea#3072) from bp-v7.0/forgejo-7ffa7f5 into v7.0/forgejo
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3072 Reviewed-by: Gusted <[email protected]>
2 parents 8e27c91 + aa7346c commit d2a7905

File tree

5 files changed

+132
-56
lines changed

5 files changed

+132
-56
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-
2+
3+
id: 1000
4+
oid: 9d172e5c64b4f0024b9901ec6afe9ea052f3c9b6ff9f4b07956d8c48c86fca82
5+
size: 25
6+
repository_id: 1
7+
created_unix: 1712309123

models/git/lfs.go

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -337,58 +337,47 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
337337
func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
338338
batchSize := setting.Database.IterateBufferSize
339339
sess := db.GetEngine(ctx)
340-
id := int64(0)
340+
var start int
341341
type RepositoryCount struct {
342342
RepositoryID int64
343343
Count int64
344344
}
345345
for {
346346
counts := make([]*RepositoryCount, 0, batchSize)
347-
sess.Select("repository_id, COUNT(id) AS count").
347+
if err := sess.Select("repository_id, COUNT(id) AS count").
348348
Table("lfs_meta_object").
349-
Where("repository_id > ?", id).
350349
GroupBy("repository_id").
351-
OrderBy("repository_id ASC")
352-
353-
if err := sess.Limit(batchSize, 0).Find(&counts); err != nil {
350+
OrderBy("repository_id ASC").Limit(batchSize, start).Find(&counts); err != nil {
354351
return err
355352
}
356353
if len(counts) == 0 {
357354
return nil
358355
}
356+
start += len(counts)
359357

360358
for _, count := range counts {
361359
if err := f(ctx, count.RepositoryID, count.Count); err != nil {
362360
return err
363361
}
364362
}
365-
id = counts[len(counts)-1].RepositoryID
366363
}
367364
}
368365

369366
// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo
370367
type IterateLFSMetaObjectsForRepoOptions struct {
371-
OlderThan timeutil.TimeStamp
372-
UpdatedLessRecentlyThan timeutil.TimeStamp
373-
OrderByUpdated bool
374-
LoopFunctionAlwaysUpdates bool
368+
OlderThan timeutil.TimeStamp
369+
UpdatedLessRecentlyThan timeutil.TimeStamp
375370
}
376371

377372
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
378-
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
379-
var start int
373+
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
380374
batchSize := setting.Database.IterateBufferSize
381375
engine := db.GetEngine(ctx)
382-
type CountLFSMetaObject struct {
383-
Count int64
384-
LFSMetaObject `xorm:"extends"`
385-
}
386-
387376
id := int64(0)
388377

389378
for {
390-
beans := make([]*CountLFSMetaObject, 0, batchSize)
391-
sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
379+
beans := make([]*LFSMetaObject, 0, batchSize)
380+
sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*").
392381
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
393382
Where("`lfs_meta_object`.repository_id = ?", repoID)
394383
if !opts.OlderThan.IsZero() {
@@ -397,25 +386,19 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
397386
if !opts.UpdatedLessRecentlyThan.IsZero() {
398387
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
399388
}
400-
sess.GroupBy("`lfs_meta_object`.id")
401-
if opts.OrderByUpdated {
402-
sess.OrderBy("`lfs_meta_object`.updated_unix ASC")
403-
} else {
404-
sess.And("`lfs_meta_object`.id > ?", id)
405-
sess.OrderBy("`lfs_meta_object`.id ASC")
406-
}
407-
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
389+
sess.GroupBy("`lfs_meta_object`.id").
390+
And("`lfs_meta_object`.id > ?", id).
391+
OrderBy("`lfs_meta_object`.id ASC")
392+
393+
if err := sess.Limit(batchSize, 0).Find(&beans); err != nil {
408394
return err
409395
}
410396
if len(beans) == 0 {
411397
return nil
412398
}
413-
if !opts.LoopFunctionAlwaysUpdates {
414-
start += len(beans)
415-
}
416399

417400
for _, bean := range beans {
418-
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
401+
if err := f(ctx, bean); err != nil {
419402
return err
420403
}
421404
}

models/git/lfs_test.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright 2024 The Forgejo Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package git
5+
6+
import (
7+
"context"
8+
"path/filepath"
9+
"testing"
10+
11+
"code.gitea.io/gitea/models/db"
12+
"code.gitea.io/gitea/models/unittest"
13+
"code.gitea.io/gitea/modules/setting"
14+
"code.gitea.io/gitea/modules/test"
15+
16+
"github.com/stretchr/testify/assert"
17+
)
18+
19+
func TestIterateRepositoryIDsWithLFSMetaObjects(t *testing.T) {
20+
defer unittest.OverrideFixtures(
21+
unittest.FixturesOptions{
22+
Dir: filepath.Join(setting.AppWorkPath, "models/fixtures/"),
23+
Base: setting.AppWorkPath,
24+
Dirs: []string{"models/git/TestIterateRepositoryIDsWithLFSMetaObjects/"},
25+
},
26+
)()
27+
assert.NoError(t, unittest.PrepareTestDatabase())
28+
29+
type repocount struct {
30+
repoid int64
31+
count int64
32+
}
33+
expected := []repocount{{1, 1}, {54, 4}}
34+
35+
t.Run("Normal batch size", func(t *testing.T) {
36+
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)()
37+
cases := []repocount{}
38+
39+
err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error {
40+
cases = append(cases, repocount{repoID, count})
41+
return nil
42+
})
43+
assert.NoError(t, err)
44+
assert.EqualValues(t, expected, cases)
45+
})
46+
47+
t.Run("Low batch size", func(t *testing.T) {
48+
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
49+
cases := []repocount{}
50+
51+
err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error {
52+
cases = append(cases, repocount{repoID, count})
53+
return nil
54+
})
55+
assert.NoError(t, err)
56+
assert.EqualValues(t, expected, cases)
57+
})
58+
}
59+
60+
func TestIterateLFSMetaObjectsForRepo(t *testing.T) {
61+
assert.NoError(t, unittest.PrepareTestDatabase())
62+
63+
expectedIDs := []int64{1, 2, 3, 4}
64+
65+
t.Run("Normal batch size", func(t *testing.T) {
66+
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)()
67+
actualIDs := []int64{}
68+
69+
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
70+
actualIDs = append(actualIDs, lo.ID)
71+
return nil
72+
}, &IterateLFSMetaObjectsForRepoOptions{})
73+
assert.NoError(t, err)
74+
assert.EqualValues(t, expectedIDs, actualIDs)
75+
})
76+
77+
t.Run("Low batch size", func(t *testing.T) {
78+
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
79+
actualIDs := []int64{}
80+
81+
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
82+
actualIDs = append(actualIDs, lo.ID)
83+
return nil
84+
}, &IterateLFSMetaObjectsForRepoOptions{})
85+
assert.NoError(t, err)
86+
assert.EqualValues(t, expectedIDs, actualIDs)
87+
88+
t.Run("Batch handles updates", func(t *testing.T) {
89+
actualIDs := []int64{}
90+
91+
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
92+
actualIDs = append(actualIDs, lo.ID)
93+
_, err := db.DeleteByID[LFSMetaObject](ctx, lo.ID)
94+
assert.NoError(t, err)
95+
return nil
96+
}, &IterateLFSMetaObjectsForRepoOptions{})
97+
assert.NoError(t, err)
98+
assert.EqualValues(t, expectedIDs, actualIDs)
99+
})
100+
})
101+
}

services/doctor/lfs.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool
4444
OlderThan: time.Now().Add(-24 * time.Hour * 7),
4545
// We don't set the UpdatedLessRecentlyThan because we want to do a full GC
4646
}); err != nil {
47+
logger.Error("Couldn't garabage collect LFS objects: %v", err)
4748
return err
4849
}
4950

services/repository/lfs.go

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ package repository
55

66
import (
77
"context"
8-
"errors"
98
"fmt"
109
"time"
1110

@@ -21,12 +20,10 @@ import (
2120

2221
// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
2322
type GarbageCollectLFSMetaObjectsOptions struct {
24-
LogDetail func(format string, v ...any)
25-
AutoFix bool
26-
OlderThan time.Time
27-
UpdatedLessRecentlyThan time.Time
28-
NumberToCheckPerRepo int64
29-
ProportionToCheckPerRepo float64
23+
LogDetail func(format string, v ...any)
24+
AutoFix bool
25+
OlderThan time.Time
26+
UpdatedLessRecentlyThan time.Time
3027
}
3128

3229
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
@@ -49,9 +46,6 @@ func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMet
4946
return err
5047
}
5148

52-
if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
53-
opts.NumberToCheckPerRepo = newMinimum
54-
}
5549
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
5650
})
5751
}
@@ -78,13 +72,9 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
7872
defer gitRepo.Close()
7973

8074
store := lfs.NewContentStore()
81-
errStop := errors.New("STOPERR")
8275
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
8376

84-
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
85-
if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
86-
return errStop
87-
}
77+
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject) error {
8878
total++
8979
pointerSha := git.ComputeBlobHash(objectFormat, []byte(metaObject.Pointer.StringContent()))
9080

@@ -123,16 +113,10 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
123113
//
124114
// It is likely that a week is potentially excessive but it should definitely be enough that any
125115
// unassociated LFS object is genuinely unassociated.
126-
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
127-
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
128-
OrderByUpdated: true,
129-
LoopFunctionAlwaysUpdates: true,
116+
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
117+
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
130118
})
131-
132-
if err == errStop {
133-
opts.LogDetail("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
134-
return nil
135-
} else if err != nil {
119+
if err != nil {
136120
return err
137121
}
138122
return nil

0 commit comments

Comments
 (0)