Skip to content

Commit b845968

Browse files
authored
Merge pull request kubernetes#128344 from kannon92/revert-128333-eviction-manager
Allow for container fs and image fs to be on the same drive but in a different partition
2 parents f8e64e1 + 48dc7d3 commit b845968

File tree

9 files changed

+126
-146
lines changed

9 files changed

+126
-146
lines changed

pkg/kubelet/container/container_gc.go

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ type GC interface {
4545
GarbageCollect(ctx context.Context) error
4646
// Deletes all unused containers, including containers belonging to pods that are terminated but not deleted
4747
DeleteAllUnusedContainers(ctx context.Context) error
48-
// IsContainerFsSeparateFromImageFs tells if writeable layer and read-only layer are separate.
49-
IsContainerFsSeparateFromImageFs(ctx context.Context) bool
5048
}
5149

5250
// SourcesReadyProvider knows how to determine if configuration sources are ready
@@ -88,22 +86,3 @@ func (cgc *realContainerGC) DeleteAllUnusedContainers(ctx context.Context) error
8886
klog.InfoS("Attempting to delete unused containers")
8987
return cgc.runtime.GarbageCollect(ctx, cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
9088
}
91-
92-
func (cgc *realContainerGC) IsContainerFsSeparateFromImageFs(ctx context.Context) bool {
93-
resp, err := cgc.runtime.ImageFsInfo(ctx)
94-
if err != nil {
95-
return false
96-
}
97-
// These fields can be empty if CRI implementation didn't populate.
98-
if resp.ContainerFilesystems == nil || resp.ImageFilesystems == nil || len(resp.ContainerFilesystems) == 0 || len(resp.ImageFilesystems) == 0 {
99-
return false
100-
}
101-
// KEP 4191 explains that multiple filesystems for images and containers is not
102-
// supported at the moment.
103-
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4191-split-image-filesystem#comment-on-future-extensions
104-
// for work needed to support multiple filesystems.
105-
if resp.ContainerFilesystems[0].FsId != nil && resp.ImageFilesystems[0].FsId != nil {
106-
return resp.ContainerFilesystems[0].FsId.Mountpoint != resp.ImageFilesystems[0].FsId.Mountpoint
107-
}
108-
return false
109-
}

pkg/kubelet/container/container_gc_test.go

Lines changed: 0 additions & 96 deletions
This file was deleted.

pkg/kubelet/eviction/eviction_manager.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -252,13 +252,20 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
252252
// build the ranking functions (if not yet known)
253253
// TODO: have a function in cadvisor that lets us know if global housekeeping has completed
254254
if m.dedicatedImageFs == nil {
255-
hasImageFs, splitDiskError := diskInfoProvider.HasDedicatedImageFs(ctx)
256-
if splitDiskError != nil {
257-
klog.ErrorS(splitDiskError, "Eviction manager: failed to get HasDedicatedImageFs")
258-
return nil, fmt.Errorf("eviction manager: failed to get HasDedicatedImageFs: %w", splitDiskError)
255+
hasImageFs, imageFsErr := diskInfoProvider.HasDedicatedImageFs(ctx)
256+
if imageFsErr != nil {
257+
// TODO: This should be refactored to log an error and retry the HasDedicatedImageFs
258+
// If we have a transient error this will never be retried and we will not set eviction signals
259+
klog.ErrorS(imageFsErr, "Eviction manager: failed to get HasDedicatedImageFs")
260+
return nil, fmt.Errorf("eviction manager: failed to get HasDedicatedImageFs: %w", imageFsErr)
259261
}
260262
m.dedicatedImageFs = &hasImageFs
261-
splitContainerImageFs := m.containerGC.IsContainerFsSeparateFromImageFs(ctx)
263+
splitContainerImageFs, splitErr := diskInfoProvider.HasDedicatedContainerFs(ctx)
264+
if splitErr != nil {
265+
// A common error case is when there is no split filesystem
266+
// there is an error finding the split filesystem label and we want to ignore these errors
267+
klog.ErrorS(splitErr, "eviction manager: failed to check if we have separate container filesystem. Ignoring.")
268+
}
262269

263270
// If we are a split filesystem but the feature is turned off
264271
// we should return an error.

pkg/kubelet/eviction/eviction_manager_test.go

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -66,22 +66,27 @@ func (m *mockPodKiller) killPodNow(pod *v1.Pod, evict bool, gracePeriodOverride
6666

6767
// mockDiskInfoProvider is used to simulate testing.
6868
type mockDiskInfoProvider struct {
69-
dedicatedImageFs *bool
69+
dedicatedImageFs *bool
70+
dedicatedContainerFs *bool
7071
}
7172

7273
// HasDedicatedImageFs returns the mocked value
7374
func (m *mockDiskInfoProvider) HasDedicatedImageFs(_ context.Context) (bool, error) {
7475
return ptr.Deref(m.dedicatedImageFs, false), nil
7576
}
7677

78+
// HasDedicatedContainerFs returns the mocked value
79+
func (m *mockDiskInfoProvider) HasDedicatedContainerFs(_ context.Context) (bool, error) {
80+
return ptr.Deref(m.dedicatedContainerFs, false), nil
81+
}
82+
7783
// mockDiskGC is used to simulate invoking image and container garbage collection.
7884
type mockDiskGC struct {
79-
err error
80-
imageGCInvoked bool
81-
containerGCInvoked bool
82-
readAndWriteSeparate bool
83-
fakeSummaryProvider *fakeSummaryProvider
84-
summaryAfterGC *statsapi.Summary
85+
err error
86+
imageGCInvoked bool
87+
containerGCInvoked bool
88+
fakeSummaryProvider *fakeSummaryProvider
89+
summaryAfterGC *statsapi.Summary
8590
}
8691

8792
// DeleteUnusedImages returns the mocked values.
@@ -102,10 +107,6 @@ func (m *mockDiskGC) DeleteAllUnusedContainers(_ context.Context) error {
102107
return m.err
103108
}
104109

105-
func (m *mockDiskGC) IsContainerFsSeparateFromImageFs(_ context.Context) bool {
106-
return m.readAndWriteSeparate
107-
}
108-
109110
func makePodWithMemoryStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) {
110111
pod := newPod(name, priority, []v1.Container{
111112
newContainer(name, requests, limits),
@@ -384,7 +385,7 @@ func TestPIDPressure_VerifyPodStatus(t *testing.T) {
384385

385386
fakeClock := testingclock.NewFakeClock(time.Now())
386387
podKiller := &mockPodKiller{}
387-
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
388+
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false), dedicatedContainerFs: ptr.To(false)}
388389
diskGC := &mockDiskGC{err: nil}
389390
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
390391

@@ -562,8 +563,8 @@ func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) {
562563

563564
fakeClock := testingclock.NewFakeClock(time.Now())
564565
podKiller := &mockPodKiller{}
565-
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
566-
diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
566+
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly}
567+
diskGC := &mockDiskGC{err: nil}
567568
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
568569

569570
config := Config{
@@ -1307,8 +1308,8 @@ func TestDiskPressureNodeFs(t *testing.T) {
13071308

13081309
fakeClock := testingclock.NewFakeClock(time.Now())
13091310
podKiller := &mockPodKiller{}
1310-
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
1311-
diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
1311+
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly}
1312+
diskGC := &mockDiskGC{err: nil}
13121313
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
13131314

13141315
config := Config{
@@ -1829,7 +1830,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
18291830

18301831
fakeClock := testingclock.NewFakeClock(time.Now())
18311832
podKiller := &mockPodKiller{}
1832-
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
1833+
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly}
18331834
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
18341835

18351836
config := Config{
@@ -1846,7 +1847,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
18461847
// This is a constant that we use to test that disk pressure is over. Don't change!
18471848
diskStatConst := diskStatStart
18481849
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStatStart)}
1849-
diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
1850+
diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil}
18501851
manager := &managerImpl{
18511852
clock: fakeClock,
18521853
killPodFunc: podKiller.killPodNow,
@@ -2292,8 +2293,8 @@ func TestInodePressureFsInodes(t *testing.T) {
22922293

22932294
fakeClock := testingclock.NewFakeClock(time.Now())
22942295
podKiller := &mockPodKiller{}
2295-
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
2296-
diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
2296+
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly}
2297+
diskGC := &mockDiskGC{err: nil}
22972298
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
22982299

22992300
config := Config{

pkg/kubelet/eviction/types.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ type Manager interface {
7575
type DiskInfoProvider interface {
7676
// HasDedicatedImageFs returns true if the imagefs is on a separate device from the rootfs.
7777
HasDedicatedImageFs(ctx context.Context) (bool, error)
78+
// HasDedicatedContainerFs returns true if the container fs is on a separate device from the rootfs.
79+
HasDedicatedContainerFs(ctx context.Context) (bool, error)
7880
}
7981

8082
// ImageGC is responsible for performing garbage collection of unused images.
@@ -87,8 +89,6 @@ type ImageGC interface {
8789
type ContainerGC interface {
8890
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
8991
DeleteAllUnusedContainers(ctx context.Context) error
90-
// IsContainerFsSeparateFromImageFs checks if container filesystem is split from image filesystem.
91-
IsContainerFsSeparateFromImageFs(ctx context.Context) bool
9292
}
9393

9494
// KillPodFunc kills a pod.

pkg/kubelet/stats/cadvisor_stats_provider.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,11 +276,11 @@ func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (imageFsRet *s
276276
if imageStats == nil || len(imageStats.ImageFilesystems) == 0 || len(imageStats.ContainerFilesystems) == 0 {
277277
return nil, nil, fmt.Errorf("missing image stats: %+v", imageStats)
278278
}
279+
279280
splitFileSystem := false
280281
imageFs := imageStats.ImageFilesystems[0]
281282
containerFs := imageStats.ContainerFilesystems[0]
282283
if imageFs.FsId != nil && containerFs.FsId != nil && imageFs.FsId.Mountpoint != containerFs.FsId.Mountpoint {
283-
klog.InfoS("Detect Split Filesystem", "ImageFilesystems", imageFs, "ContainerFilesystems", containerFs)
284284
splitFileSystem = true
285285
}
286286
var imageFsInodesUsed *uint64
@@ -312,6 +312,12 @@ func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (imageFsRet *s
312312
if err != nil {
313313
return nil, nil, fmt.Errorf("failed to get container fs info: %w", err)
314314
}
315+
// ImageFs and ContainerFs could be on different paths on the same device.
316+
if containerFsInfo.Device == imageFsInfo.Device {
317+
return fsStats, fsStats, nil
318+
}
319+
320+
klog.InfoS("Detect Split Filesystem", "ImageFilesystems", imageStats.ImageFilesystems[0], "ContainerFilesystems", imageStats.ContainerFilesystems[0])
315321

316322
var containerFsInodesUsed *uint64
317323
if containerFsInfo.Inodes != nil && containerFsInfo.InodesFree != nil {

pkg/kubelet/stats/cadvisor_stats_provider_test.go

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,7 @@ func TestCadvisorSplitImagesFsStats(t *testing.T) {
680680
mockRuntime = containertest.NewMockRuntime(t)
681681

682682
seed = 1000
683-
imageFsInfo = getTestFsInfo(seed)
683+
imageFsInfo = getTestFsInfoWithDifferentMount(seed, "image")
684684
containerSeed = 1001
685685
containerFsInfo = getTestFsInfo(containerSeed)
686686
)
@@ -723,7 +723,59 @@ func TestCadvisorSplitImagesFsStats(t *testing.T) {
723723
assert.Equal(containerFsInfo.InodesFree, containerfs.InodesFree)
724724
assert.Equal(containerFsInfo.Inodes, containerfs.Inodes)
725725
assert.Equal(*containerFsInfo.Inodes-*containerFsInfo.InodesFree, *containerfs.InodesUsed)
726+
}
726727

728+
func TestCadvisorSameDiskDifferentLocations(t *testing.T) {
729+
ctx := context.Background()
730+
var (
731+
assert = assert.New(t)
732+
mockCadvisor = cadvisortest.NewMockInterface(t)
733+
mockRuntime = containertest.NewMockRuntime(t)
734+
735+
seed = 1000
736+
imageFsInfo = getTestFsInfo(seed)
737+
containerSeed = 1001
738+
containerFsInfo = getTestFsInfo(containerSeed)
739+
)
740+
imageFsInfoCRI := &runtimeapi.FilesystemUsage{
741+
Timestamp: imageFsInfo.Timestamp.Unix(),
742+
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "images"},
743+
UsedBytes: &runtimeapi.UInt64Value{Value: imageFsInfo.Usage},
744+
InodesUsed: &runtimeapi.UInt64Value{Value: *imageFsInfo.Inodes},
745+
}
746+
containerFsInfoCRI := &runtimeapi.FilesystemUsage{
747+
Timestamp: containerFsInfo.Timestamp.Unix(),
748+
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "containers"},
749+
UsedBytes: &runtimeapi.UInt64Value{Value: containerFsInfo.Usage},
750+
InodesUsed: &runtimeapi.UInt64Value{Value: *containerFsInfo.Inodes},
751+
}
752+
imageFsInfoResponse := &runtimeapi.ImageFsInfoResponse{
753+
ImageFilesystems: []*runtimeapi.FilesystemUsage{imageFsInfoCRI},
754+
ContainerFilesystems: []*runtimeapi.FilesystemUsage{containerFsInfoCRI},
755+
}
756+
757+
mockCadvisor.EXPECT().ImagesFsInfo().Return(imageFsInfo, nil)
758+
mockCadvisor.EXPECT().ContainerFsInfo().Return(containerFsInfo, nil)
759+
mockRuntime.EXPECT().ImageFsInfo(ctx).Return(imageFsInfoResponse, nil)
760+
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, true)
761+
762+
provider := newCadvisorStatsProvider(mockCadvisor, &fakeResourceAnalyzer{}, mockRuntime, nil, NewFakeHostStatsProvider())
763+
stats, containerfs, err := provider.ImageFsStats(ctx)
764+
require.NoError(t, err, "imageFsStats should have no error")
765+
766+
assert.Equal(imageFsInfo.Timestamp, stats.Time.Time)
767+
assert.Equal(imageFsInfo.Available, *stats.AvailableBytes)
768+
assert.Equal(imageFsInfo.Capacity, *stats.CapacityBytes)
769+
assert.Equal(imageFsInfo.InodesFree, stats.InodesFree)
770+
assert.Equal(imageFsInfo.Inodes, stats.Inodes)
771+
assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *stats.InodesUsed)
772+
773+
assert.Equal(imageFsInfo.Timestamp, containerfs.Time.Time)
774+
assert.Equal(imageFsInfo.Available, *containerfs.AvailableBytes)
775+
assert.Equal(imageFsInfo.Capacity, *containerfs.CapacityBytes)
776+
assert.Equal(imageFsInfo.InodesFree, containerfs.InodesFree)
777+
assert.Equal(imageFsInfo.Inodes, containerfs.Inodes)
778+
assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *containerfs.InodesUsed)
727779
}
728780

729781
func TestCadvisorListPodStatsWhenContainerLogFound(t *testing.T) {

pkg/kubelet/stats/provider.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,20 @@ func (p *Provider) HasDedicatedImageFs(ctx context.Context) (bool, error) {
197197
return device != rootFsInfo.Device, nil
198198
}
199199

200+
// HasDedicatedImageFs returns true if a dedicated image filesystem exists for storing images.
201+
// KEP Issue Number 4191: Enhanced this to allow for the containers to be separate from images.
202+
func (p *Provider) HasDedicatedContainerFs(ctx context.Context) (bool, error) {
203+
imageFs, err := p.cadvisor.ImagesFsInfo()
204+
if err != nil {
205+
return false, err
206+
}
207+
containerFs, err := p.cadvisor.ContainerFsInfo()
208+
if err != nil {
209+
return false, err
210+
}
211+
return imageFs.Device != containerFs.Device, nil
212+
}
213+
200214
func equalFileSystems(a, b *statsapi.FsStats) bool {
201215
if a == nil || b == nil {
202216
return false

0 commit comments

Comments
 (0)