Skip to content

Commit b8fc604

Browse files
committed
kubelet: GC for image pull managers
1 parent 3793bec commit b8fc604

File tree

7 files changed

+185
-26
lines changed

7 files changed

+185
-26
lines changed

pkg/kubelet/images/image_gc_manager.go

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ const (
5757
ImageGarbageCollectedTotalReasonSpace = "space"
5858
)
5959

60+
// PostImageGCHook allows external sources to react to GC collect events.
61+
// `remainingImages` is a list of images that were left on the system after garbage
62+
// collection finished.
63+
type PostImageGCHook func(remainingImages []string, gcStart time.Time)
64+
6065
// StatsProvider is an interface for fetching stats used during image garbage
6166
// collection.
6267
type StatsProvider interface {
@@ -128,6 +133,8 @@ type realImageGCManager struct {
128133
// imageCache is the cache of latest image list.
129134
imageCache imageCache
130135

136+
postGCHooks []PostImageGCHook
137+
131138
// tracer for recording spans
132139
tracer trace.Tracer
133140
}
@@ -181,7 +188,7 @@ type imageRecord struct {
181188
}
182189

183190
// NewImageGCManager instantiates a new ImageGCManager object.
184-
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, tracerProvider trace.TracerProvider) (ImageGCManager, error) {
191+
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, postGCHooks []PostImageGCHook, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, tracerProvider trace.TracerProvider) (ImageGCManager, error) {
185192
// Validate policy.
186193
if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
187194
return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
@@ -200,6 +207,7 @@ func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, r
200207
statsProvider: statsProvider,
201208
recorder: recorder,
202209
nodeRef: nodeRef,
210+
postGCHooks: postGCHooks,
203211
tracer: tracer,
204212
}
205213

@@ -381,11 +389,13 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context, beganGC time.T
381389
if usagePercent >= im.policy.HighThresholdPercent {
382390
amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
383391
klog.InfoS("Disk usage on image filesystem is over the high threshold, trying to free bytes down to the low threshold", "usage", usagePercent, "highThreshold", im.policy.HighThresholdPercent, "amountToFree", amountToFree, "lowThreshold", im.policy.LowThresholdPercent)
384-
freed, err := im.freeSpace(ctx, amountToFree, freeTime, images)
392+
remainingImages, freed, err := im.freeSpace(ctx, amountToFree, freeTime, images)
385393
if err != nil {
386394
return err
387395
}
388396

397+
im.runPostGCHooks(remainingImages, freeTime)
398+
389399
if freed < amountToFree {
390400
err := fmt.Errorf("Failed to garbage collect required amount of images. Attempted to free %d bytes, but only found %d bytes eligible to free.", amountToFree, freed)
391401
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
@@ -396,6 +406,12 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context, beganGC time.T
396406
return nil
397407
}
398408

409+
func (im *realImageGCManager) runPostGCHooks(remainingImages []string, gcStartTime time.Time) {
410+
for _, h := range im.postGCHooks {
411+
h(remainingImages, gcStartTime)
412+
}
413+
}
414+
399415
func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime, beganGC time.Time) ([]evictionInfo, error) {
400416
if im.policy.MaxAge == 0 {
401417
return images, nil
@@ -430,41 +446,52 @@ func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evicti
430446
func (im *realImageGCManager) DeleteUnusedImages(ctx context.Context) error {
431447
klog.InfoS("Attempting to delete unused images")
432448
freeTime := time.Now()
449+
433450
images, err := im.imagesInEvictionOrder(ctx, freeTime)
434451
if err != nil {
435452
return err
436453
}
437-
_, err = im.freeSpace(ctx, math.MaxInt64, freeTime, images)
438-
return err
454+
455+
remainingImages, _, err := im.freeSpace(ctx, math.MaxInt64, freeTime, images)
456+
if err != nil {
457+
return err
458+
}
459+
460+
im.runPostGCHooks(remainingImages, freeTime)
461+
return nil
439462
}
440463

441464
// Tries to free bytesToFree worth of images on the disk.
442465
//
443-
// Returns the number of bytes free and an error if any occurred. The number of
444-
// bytes freed is always returned.
466+
// Returns the images that are still available after the cleanup, the number of bytes freed
467+
// and an error if any occurred. The number of bytes freed is always returned.
445468
// Note that error may be nil and the number of bytes free may be less
446469
// than bytesToFree.
447-
func (im *realImageGCManager) freeSpace(ctx context.Context, bytesToFree int64, freeTime time.Time, images []evictionInfo) (int64, error) {
470+
func (im *realImageGCManager) freeSpace(ctx context.Context, bytesToFree int64, freeTime time.Time, images []evictionInfo) ([]string, int64, error) {
448471
// Delete unused images until we've freed up enough space.
449472
var deletionErrors []error
450473
spaceFreed := int64(0)
474+
var imagesLeft []string
451475
for _, image := range images {
452476
klog.V(5).InfoS("Evaluating image ID for possible garbage collection based on disk usage", "imageID", image.id, "runtimeHandler", image.imageRecord.runtimeHandlerUsedToPullImage)
453477
// Images that are currently in used were given a newer lastUsed.
454478
if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) {
455479
klog.V(5).InfoS("Image ID was used too recently, not eligible for garbage collection", "imageID", image.id, "lastUsed", image.lastUsed, "freeTime", freeTime)
480+
imagesLeft = append(imagesLeft, image.id)
456481
continue
457482
}
458483

459484
// Avoid garbage collect the image if the image is not old enough.
460485
// In such a case, the image may have just been pulled down, and will be used by a container right away.
461486
if freeTime.Sub(image.firstDetected) < im.policy.MinAge {
462487
klog.V(5).InfoS("Image ID's age is less than the policy's minAge, not eligible for garbage collection", "imageID", image.id, "age", freeTime.Sub(image.firstDetected), "minAge", im.policy.MinAge)
488+
imagesLeft = append(imagesLeft, image.id)
463489
continue
464490
}
465491

466492
if err := im.freeImage(ctx, image, ImageGarbageCollectedTotalReasonSpace); err != nil {
467493
deletionErrors = append(deletionErrors, err)
494+
imagesLeft = append(imagesLeft, image.id)
468495
continue
469496
}
470497
spaceFreed += image.size
@@ -475,9 +502,9 @@ func (im *realImageGCManager) freeSpace(ctx context.Context, bytesToFree int64,
475502
}
476503

477504
if len(deletionErrors) > 0 {
478-
return spaceFreed, fmt.Errorf("wanted to free %d bytes, but freed %d bytes space with errors in image deletion: %v", bytesToFree, spaceFreed, errors.NewAggregate(deletionErrors))
505+
return nil, spaceFreed, fmt.Errorf("wanted to free %d bytes, but freed %d bytes space with errors in image deletion: %w", bytesToFree, spaceFreed, errors.NewAggregate(deletionErrors))
479506
}
480-
return spaceFreed, nil
507+
return imagesLeft, spaceFreed, nil
481508
}
482509

483510
func (im *realImageGCManager) freeImage(ctx context.Context, image evictionInfo, reason string) error {

pkg/kubelet/images/image_gc_manager_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,7 @@ func TestGarbageCollectImageNotOldEnough(t *testing.T) {
740740
func getImagesAndFreeSpace(ctx context.Context, t *testing.T, assert *assert.Assertions, im *realImageGCManager, fakeRuntime *containertest.FakeRuntime, spaceToFree, expectedSpaceFreed int64, imagesLen int, freeTime time.Time) {
741741
images, err := im.imagesInEvictionOrder(ctx, freeTime)
742742
require.NoError(t, err)
743-
spaceFreed, err := im.freeSpace(ctx, spaceToFree, freeTime, images)
743+
_, spaceFreed, err := im.freeSpace(ctx, spaceToFree, freeTime, images)
744744
require.NoError(t, err)
745745
assert.EqualValues(expectedSpaceFreed, spaceFreed)
746746
assert.Len(fakeRuntime.ImageList, imagesLen)
@@ -910,7 +910,7 @@ func TestValidateImageGCPolicy(t *testing.T) {
910910
}
911911

912912
for _, tc := range testCases {
913-
if _, err := NewImageGCManager(nil, nil, nil, nil, tc.imageGCPolicy, noopoteltrace.NewTracerProvider()); err != nil {
913+
if _, err := NewImageGCManager(nil, nil, nil, nil, nil, tc.imageGCPolicy, noopoteltrace.NewTracerProvider()); err != nil {
914914
if err.Error() != tc.expectErr {
915915
t.Errorf("[%s:]Expected err:%v, but got:%v", tc.name, tc.expectErr, err.Error())
916916
}

pkg/kubelet/images/image_pull_manager.go

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,30 @@ func (f *PullManager) MustAttemptImagePull(image, imageRef string, podSecrets []
274274
}
275275

276276
func (f *PullManager) PruneUnknownRecords(imageList []string, until time.Time) {
277-
// TODO: also cleanup the lock maps for intent/pull records?
278-
panic("implement me")
277+
f.pulledAccessors.GlobalLock()
278+
defer f.pulledAccessors.GlobalUnlock()
279+
280+
pulledRecords, err := f.recordsAccessor.ListImagePulledRecords()
281+
if err != nil {
282+
klog.ErrorS(err, "there were errors listing ImagePulledRecords, garbage collection will proceed with incomplete records list")
283+
}
284+
285+
imagesInUse := sets.New(imageList...)
286+
for _, imageRecord := range pulledRecords {
287+
if !imageRecord.LastUpdatedTime.Time.Before(until) {
288+
// the image record was only updated after the GC started
289+
continue
290+
}
291+
292+
if imagesInUse.Has(imageRecord.ImageRef) {
293+
continue
294+
}
295+
296+
if err := f.recordsAccessor.DeleteImagePulledRecord(imageRecord.ImageRef); err != nil {
297+
klog.ErrorS(err, "failed to remove an ImagePulledRecord", "imageRef", imageRecord.ImageRef)
298+
}
299+
}
300+
279301
}
280302

281303
// initialize gathers all the images from pull intent records that exist
@@ -288,8 +310,7 @@ func (f *PullManager) PruneUnknownRecords(imageList []string, until time.Time) {
288310
func (f *PullManager) initialize(ctx context.Context) {
289311
pullIntents, err := f.recordsAccessor.ListImagePullIntents()
290312
if err != nil {
291-
klog.ErrorS(err, "there was an error listing ImagePullIntents")
292-
return
313+
klog.ErrorS(err, "there were errors listing ImagePullIntents, continuing with an incomplete records list")
293314
}
294315

295316
if len(pullIntents) == 0 {

pkg/kubelet/images/image_pull_manager_test.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package images
1818

1919
import (
20+
"io/fs"
2021
"os"
2122
"path/filepath"
2223
"reflect"
@@ -873,6 +874,113 @@ func TestFileBasedImagePullManager_initialize(t *testing.T) {
873874
}
874875
}
875876

877+
func TestFileBasedImagePullManager_PruneUnknownRecords(t *testing.T) {
878+
tests := []struct {
879+
name string
880+
imageList []string
881+
gcStartTime time.Time
882+
pulledFiles []string
883+
wantFiles sets.Set[string]
884+
}{
885+
{
886+
name: "all images present",
887+
imageList: []string{"testimage-anonpull", "testimageref", "testemptycredmapping"},
888+
gcStartTime: time.Date(2024, 12, 25, 00, 01, 00, 00, time.UTC),
889+
pulledFiles: []string{
890+
"sha256-a2eace2182b24cdbbb730798e47b10709b9ef5e0f0c1624a3bc06c8ca987727a",
891+
"sha256-b3c0cc4278800b03a308ceb2611161430df571ca733122f0a40ac8b9792a9064",
892+
"sha256-f8778b6393eaf39315e767a58cbeacf2c4b270d94b4d6926ee993d9e49444991",
893+
},
894+
wantFiles: sets.New(
895+
"sha256-a2eace2182b24cdbbb730798e47b10709b9ef5e0f0c1624a3bc06c8ca987727a",
896+
"sha256-b3c0cc4278800b03a308ceb2611161430df571ca733122f0a40ac8b9792a9064",
897+
"sha256-f8778b6393eaf39315e767a58cbeacf2c4b270d94b4d6926ee993d9e49444991",
898+
),
899+
},
900+
{
901+
name: "remove all records on empty list from the GC",
902+
imageList: []string{},
903+
gcStartTime: time.Date(2024, 12, 25, 00, 01, 00, 00, time.UTC),
904+
pulledFiles: []string{
905+
"sha256-a2eace2182b24cdbbb730798e47b10709b9ef5e0f0c1624a3bc06c8ca987727a",
906+
"sha256-b3c0cc4278800b03a308ceb2611161430df571ca733122f0a40ac8b9792a9064",
907+
"sha256-f8778b6393eaf39315e767a58cbeacf2c4b270d94b4d6926ee993d9e49444991",
908+
},
909+
},
910+
{
911+
name: "remove all records on list of untracked images from the GC",
912+
imageList: []string{"untracked1", "different-untracked"},
913+
gcStartTime: time.Date(2024, 12, 25, 00, 01, 00, 00, time.UTC),
914+
pulledFiles: []string{
915+
"sha256-a2eace2182b24cdbbb730798e47b10709b9ef5e0f0c1624a3bc06c8ca987727a",
916+
"sha256-b3c0cc4278800b03a308ceb2611161430df571ca733122f0a40ac8b9792a9064",
917+
"sha256-f8778b6393eaf39315e767a58cbeacf2c4b270d94b4d6926ee993d9e49444991",
918+
},
919+
},
920+
{
921+
name: "remove records without a match in the image list from the GC",
922+
imageList: []string{"testimage-anonpull", "untracked1", "testimageref", "different-untracked"},
923+
gcStartTime: time.Date(2024, 12, 25, 00, 01, 00, 00, time.UTC),
924+
pulledFiles: []string{
925+
"sha256-a2eace2182b24cdbbb730798e47b10709b9ef5e0f0c1624a3bc06c8ca987727a",
926+
"sha256-b3c0cc4278800b03a308ceb2611161430df571ca733122f0a40ac8b9792a9064",
927+
"sha256-f8778b6393eaf39315e767a58cbeacf2c4b270d94b4d6926ee993d9e49444991",
928+
},
929+
wantFiles: sets.New(
930+
"sha256-a2eace2182b24cdbbb730798e47b10709b9ef5e0f0c1624a3bc06c8ca987727a",
931+
"sha256-b3c0cc4278800b03a308ceb2611161430df571ca733122f0a40ac8b9792a9064",
932+
),
933+
},
934+
}
935+
for _, tt := range tests {
936+
t.Run(tt.name, func(t *testing.T) {
937+
encoder, decoder, err := createKubeletConfigSchemeEncoderDecoder()
938+
require.NoError(t, err)
939+
940+
testDir := t.TempDir()
941+
pulledDir := filepath.Join(testDir, "pulled")
942+
if err := os.MkdirAll(pulledDir, 0700); err != nil {
943+
t.Fatalf("failed to create testing dir %q: %v", pulledDir, err)
944+
}
945+
946+
copyTestData(t, pulledDir, "pulled", tt.pulledFiles)
947+
948+
fsRecordAccessor := &fsPullRecordsAccessor{
949+
pulledDir: pulledDir,
950+
encoder: encoder,
951+
decoder: decoder,
952+
}
953+
954+
f := &PullManager{
955+
recordsAccessor: fsRecordAccessor,
956+
pulledAccessors: NewStripedLockSet(10),
957+
}
958+
f.PruneUnknownRecords(tt.imageList, tt.gcStartTime)
959+
960+
filesLeft := sets.New[string]()
961+
err = filepath.Walk(pulledDir, func(path string, info fs.FileInfo, err error) error {
962+
if err != nil {
963+
return err
964+
}
965+
966+
if path == pulledDir {
967+
return nil
968+
}
969+
970+
filesLeft.Insert(info.Name())
971+
return nil
972+
})
973+
if err != nil {
974+
t.Fatalf("failed to walk the pull dir after prune: %v", err)
975+
}
976+
977+
if !tt.wantFiles.Equal(filesLeft) {
978+
t.Errorf("expected equal sets, diff: %s", cmp.Diff(tt.wantFiles, filesLeft))
979+
}
980+
})
981+
}
982+
}
983+
876984
func copyTestData(t *testing.T, dstDir string, testdataDir string, src []string) {
877985
for _, f := range src {
878986
testBytes, err := os.ReadFile(filepath.Join("testdata", testdataDir, f))

pkg/kubelet/kubelet.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
759759
}
760760
}
761761

762-
runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
762+
runtime, postImageGCHooks, err := kuberuntime.NewKubeGenericRuntimeManager(
763763
kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
764764
klet.livenessManager,
765765
klet.readinessManager,
@@ -883,7 +883,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
883883
klet.containerDeletor = newPodContainerDeletor(klet.containerRuntime, max(containerGCPolicy.MaxPerPodContainer, minDeadContainerInPod))
884884

885885
// setup imageManager
886-
imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, kubeDeps.TracerProvider)
886+
imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, postImageGCHooks, kubeDeps.Recorder, nodeRef, imageGCPolicy, kubeDeps.TracerProvider)
887887
if err != nil {
888888
return nil, fmt.Errorf("failed to initialize image manager: %v", err)
889889
}

pkg/kubelet/kubelet_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ func newTestKubeletWithImageList(
322322
HighThresholdPercent: 90,
323323
LowThresholdPercent: 80,
324324
}
325-
imageGCManager, err := images.NewImageGCManager(fakeRuntime, kubelet.StatsProvider, fakeRecorder, fakeNodeRef, fakeImageGCPolicy, noopoteltrace.NewTracerProvider())
325+
imageGCManager, err := images.NewImageGCManager(fakeRuntime, kubelet.StatsProvider, nil, fakeRecorder, fakeNodeRef, fakeImageGCPolicy, noopoteltrace.NewTracerProvider())
326326
assert.NoError(t, err)
327327
kubelet.imageManager = &fakeImageGCManager{
328328
fakeImageService: fakeRuntime,
@@ -3394,7 +3394,7 @@ func TestSyncPodSpans(t *testing.T) {
33943394
imageSvc, err := remote.NewRemoteImageService(endpoint, 15*time.Second, tp, &logger)
33953395
assert.NoError(t, err)
33963396

3397-
kubelet.containerRuntime, err = kuberuntime.NewKubeGenericRuntimeManager(
3397+
kubelet.containerRuntime, _, err = kuberuntime.NewKubeGenericRuntimeManager(
33983398
kubelet.recorder,
33993399
kubelet.livenessManager,
34003400
kubelet.readinessManager,

0 commit comments

Comments
 (0)