Skip to content

Commit 8ce1b53

Browse files
authored
Merge pull request kubernetes#80831 from odinuge/hugetlb-pagesizes-cleanup
Add support for removing unsupported huge page sizes
2 parents 4f5ce1f + a233b9a commit 8ce1b53

File tree

4 files changed

+365
-5
lines changed

4 files changed

+365
-5
lines changed

pkg/kubelet/kubelet_node_status.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"k8s.io/apimachinery/pkg/api/resource"
3232
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3333
"k8s.io/apimachinery/pkg/types"
34+
"k8s.io/apimachinery/pkg/util/sets"
3435
cloudprovider "k8s.io/cloud-provider"
3536
cloudproviderapi "k8s.io/cloud-provider/api"
3637
"k8s.io/klog/v2"
@@ -116,6 +117,7 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
116117
requiresUpdate := kl.reconcileCMADAnnotationWithExistingNode(node, existingNode)
117118
requiresUpdate = kl.updateDefaultLabels(node, existingNode) || requiresUpdate
118119
requiresUpdate = kl.reconcileExtendedResource(node, existingNode) || requiresUpdate
120+
requiresUpdate = kl.reconcileHugePageResource(node, existingNode) || requiresUpdate
119121
if requiresUpdate {
120122
if _, _, err := nodeutil.PatchNodeStatus(kl.kubeClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, existingNode); err != nil {
121123
klog.Errorf("Unable to reconcile node %q with API server: error updating node: %v", kl.nodeName, err)
@@ -126,6 +128,53 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
126128
return true
127129
}
128130

131+
// reconcileHugePageResource will update huge page capacity for each page size and remove huge page sizes no longer supported
132+
func (kl *Kubelet) reconcileHugePageResource(initialNode, existingNode *v1.Node) bool {
133+
requiresUpdate := false
134+
supportedHugePageResources := sets.String{}
135+
136+
for resourceName := range initialNode.Status.Capacity {
137+
if !v1helper.IsHugePageResourceName(resourceName) {
138+
continue
139+
}
140+
supportedHugePageResources.Insert(string(resourceName))
141+
142+
initialCapacity := initialNode.Status.Capacity[resourceName]
143+
initialAllocatable := initialNode.Status.Allocatable[resourceName]
144+
145+
capacity, resourceIsSupported := existingNode.Status.Capacity[resourceName]
146+
allocatable := existingNode.Status.Allocatable[resourceName]
147+
148+
// Add or update capacity if it the size was previously unsupported or has changed
149+
if !resourceIsSupported || capacity.Cmp(initialCapacity) != 0 {
150+
existingNode.Status.Capacity[resourceName] = initialCapacity.DeepCopy()
151+
requiresUpdate = true
152+
}
153+
154+
// Add or update allocatable if it the size was previously unsupported or has changed
155+
if !resourceIsSupported || allocatable.Cmp(initialAllocatable) != 0 {
156+
existingNode.Status.Allocatable[resourceName] = initialAllocatable.DeepCopy()
157+
requiresUpdate = true
158+
}
159+
160+
}
161+
162+
for resourceName := range existingNode.Status.Capacity {
163+
if !v1helper.IsHugePageResourceName(resourceName) {
164+
continue
165+
}
166+
167+
// If huge page size no longer is supported, we remove it from the node
168+
if !supportedHugePageResources.Has(string(resourceName)) {
169+
delete(existingNode.Status.Capacity, resourceName)
170+
delete(existingNode.Status.Allocatable, resourceName)
171+
klog.Infof("Removing now unsupported huge page resource named: %s", resourceName)
172+
requiresUpdate = true
173+
}
174+
}
175+
return requiresUpdate
176+
}
177+
129178
// Zeros out extended resource capacity during reconciliation.
130179
func (kl *Kubelet) reconcileExtendedResource(initialNode, node *v1.Node) bool {
131180
requiresUpdate := false

pkg/kubelet/kubelet_node_status_test.go

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1694,6 +1694,255 @@ func TestUpdateDefaultLabels(t *testing.T) {
16941694
}
16951695
}
16961696

1697+
func TestReconcileHugePageResource(t *testing.T) {
1698+
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
1699+
hugePageResourceName64Ki := v1.ResourceName("hugepages-64Ki")
1700+
hugePageResourceName2Mi := v1.ResourceName("hugepages-2Mi")
1701+
hugePageResourceName1Gi := v1.ResourceName("hugepages-1Gi")
1702+
1703+
cases := []struct {
1704+
name string
1705+
testKubelet *TestKubelet
1706+
initialNode *v1.Node
1707+
existingNode *v1.Node
1708+
expectedNode *v1.Node
1709+
needsUpdate bool
1710+
}{
1711+
{
1712+
name: "no update needed when all huge page resources are similar",
1713+
testKubelet: testKubelet,
1714+
needsUpdate: false,
1715+
initialNode: &v1.Node{
1716+
Status: v1.NodeStatus{
1717+
Capacity: v1.ResourceList{
1718+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1719+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1720+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1721+
hugePageResourceName2Mi: resource.MustParse("100Mi"),
1722+
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
1723+
},
1724+
Allocatable: v1.ResourceList{
1725+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1726+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1727+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1728+
hugePageResourceName2Mi: resource.MustParse("100Mi"),
1729+
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
1730+
},
1731+
},
1732+
},
1733+
existingNode: &v1.Node{
1734+
Status: v1.NodeStatus{
1735+
Capacity: v1.ResourceList{
1736+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1737+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1738+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1739+
hugePageResourceName2Mi: resource.MustParse("100Mi"),
1740+
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
1741+
},
1742+
Allocatable: v1.ResourceList{
1743+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1744+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1745+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1746+
hugePageResourceName2Mi: resource.MustParse("100Mi"),
1747+
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
1748+
},
1749+
},
1750+
},
1751+
expectedNode: &v1.Node{
1752+
Status: v1.NodeStatus{
1753+
Capacity: v1.ResourceList{
1754+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1755+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1756+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1757+
hugePageResourceName2Mi: resource.MustParse("100Mi"),
1758+
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
1759+
},
1760+
Allocatable: v1.ResourceList{
1761+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1762+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1763+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1764+
hugePageResourceName2Mi: resource.MustParse("100Mi"),
1765+
hugePageResourceName64Ki: *resource.NewQuantity(0, resource.BinarySI),
1766+
},
1767+
},
1768+
},
1769+
}, {
1770+
name: "update needed when new huge page resources is supported",
1771+
testKubelet: testKubelet,
1772+
needsUpdate: true,
1773+
initialNode: &v1.Node{
1774+
Status: v1.NodeStatus{
1775+
Capacity: v1.ResourceList{
1776+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1777+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1778+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1779+
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
1780+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1781+
},
1782+
Allocatable: v1.ResourceList{
1783+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1784+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1785+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1786+
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
1787+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1788+
},
1789+
},
1790+
},
1791+
existingNode: &v1.Node{
1792+
Status: v1.NodeStatus{
1793+
Capacity: v1.ResourceList{
1794+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1795+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1796+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1797+
hugePageResourceName2Mi: resource.MustParse("100Mi"),
1798+
},
1799+
Allocatable: v1.ResourceList{
1800+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1801+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1802+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1803+
hugePageResourceName2Mi: resource.MustParse("100Mi"),
1804+
},
1805+
},
1806+
},
1807+
expectedNode: &v1.Node{
1808+
Status: v1.NodeStatus{
1809+
Capacity: v1.ResourceList{
1810+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1811+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1812+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1813+
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
1814+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1815+
},
1816+
Allocatable: v1.ResourceList{
1817+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1818+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1819+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1820+
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
1821+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1822+
},
1823+
},
1824+
},
1825+
}, {
1826+
name: "update needed when huge page resource quantity has changed",
1827+
testKubelet: testKubelet,
1828+
needsUpdate: true,
1829+
initialNode: &v1.Node{
1830+
Status: v1.NodeStatus{
1831+
Capacity: v1.ResourceList{
1832+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1833+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1834+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1835+
hugePageResourceName1Gi: resource.MustParse("4Gi"),
1836+
},
1837+
Allocatable: v1.ResourceList{
1838+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1839+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1840+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1841+
hugePageResourceName1Gi: resource.MustParse("4Gi"),
1842+
},
1843+
},
1844+
},
1845+
existingNode: &v1.Node{
1846+
Status: v1.NodeStatus{
1847+
Capacity: v1.ResourceList{
1848+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1849+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1850+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1851+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1852+
},
1853+
Allocatable: v1.ResourceList{
1854+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1855+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1856+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1857+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1858+
},
1859+
},
1860+
},
1861+
expectedNode: &v1.Node{
1862+
Status: v1.NodeStatus{
1863+
Capacity: v1.ResourceList{
1864+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1865+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1866+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1867+
hugePageResourceName1Gi: resource.MustParse("4Gi"),
1868+
},
1869+
Allocatable: v1.ResourceList{
1870+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1871+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1872+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1873+
hugePageResourceName1Gi: resource.MustParse("4Gi"),
1874+
},
1875+
},
1876+
},
1877+
}, {
1878+
name: "update needed when a huge page resources is no longer supported",
1879+
testKubelet: testKubelet,
1880+
needsUpdate: true,
1881+
initialNode: &v1.Node{
1882+
Status: v1.NodeStatus{
1883+
Capacity: v1.ResourceList{
1884+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1885+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1886+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1887+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1888+
},
1889+
Allocatable: v1.ResourceList{
1890+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1891+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1892+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1893+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1894+
},
1895+
},
1896+
},
1897+
existingNode: &v1.Node{
1898+
Status: v1.NodeStatus{
1899+
Capacity: v1.ResourceList{
1900+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1901+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1902+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1903+
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
1904+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1905+
},
1906+
Allocatable: v1.ResourceList{
1907+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1908+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1909+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1910+
hugePageResourceName2Mi: *resource.NewQuantity(0, resource.BinarySI),
1911+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1912+
},
1913+
},
1914+
},
1915+
expectedNode: &v1.Node{
1916+
Status: v1.NodeStatus{
1917+
Capacity: v1.ResourceList{
1918+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1919+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1920+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1921+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1922+
},
1923+
Allocatable: v1.ResourceList{
1924+
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
1925+
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
1926+
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
1927+
hugePageResourceName1Gi: resource.MustParse("2Gi"),
1928+
},
1929+
},
1930+
},
1931+
},
1932+
}
1933+
1934+
for _, tc := range cases {
1935+
t.Run(tc.name, func(T *testing.T) {
1936+
defer testKubelet.Cleanup()
1937+
kubelet := testKubelet.kubelet
1938+
1939+
needsUpdate := kubelet.reconcileHugePageResource(tc.initialNode, tc.existingNode)
1940+
assert.Equal(t, tc.needsUpdate, needsUpdate, tc.name)
1941+
assert.Equal(t, tc.expectedNode, tc.existingNode, tc.name)
1942+
})
1943+
}
1944+
1945+
}
16971946
func TestReconcileExtendedResource(t *testing.T) {
16981947
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
16991948
testKubelet.kubelet.kubeClient = nil // ensure only the heartbeat client is used

test/e2e_node/hugepages_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
v1 "k8s.io/api/core/v1"
2929
"k8s.io/apimachinery/pkg/api/resource"
3030
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
31+
"k8s.io/apimachinery/pkg/types"
3132
"k8s.io/apimachinery/pkg/util/uuid"
3233
"k8s.io/kubernetes/pkg/kubelet/cm"
3334
"k8s.io/kubernetes/test/e2e/framework"
@@ -183,12 +184,57 @@ func runHugePagesTests(f *framework.Framework) {
183184
err := e2epod.WaitForPodSuccessInNamespace(f.ClientSet, verifyPod.Name, f.Namespace.Name)
184185
framework.ExpectNoError(err)
185186
})
187+
188+
ginkgo.It("should add resources for new huge page sizes on kubelet restart", func() {
189+
ginkgo.By("Stopping kubelet")
190+
startKubelet := stopKubelet()
191+
ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`)
192+
patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`)
193+
result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(context.TODO())
194+
framework.ExpectNoError(result.Error(), "while patching")
195+
196+
ginkgo.By("Starting kubelet again")
197+
startKubelet()
198+
199+
ginkgo.By("verifying that the hugepages-2Mi resource is present")
200+
gomega.Eventually(func() bool {
201+
node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
202+
framework.ExpectNoError(err, "while getting node status")
203+
_, isPresent := node.Status.Capacity["hugepages-2Mi"]
204+
return isPresent
205+
}, 30*time.Second, framework.Poll).Should(gomega.Equal(true))
206+
})
186207
}
187208

188209
// Serial because the test updates kubelet configuration.
189210
var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeFeature:HugePages]", func() {
190211
f := framework.NewDefaultFramework("hugepages-test")
191212

213+
ginkgo.It("should remove resources for huge page sizes no longer supported", func() {
214+
ginkgo.By("mimicking support for 9Mi of 3Mi huge page memory by patching the node status")
215+
patch := []byte(`[{"op": "add", "path": "/status/capacity/hugepages-3Mi", "value": "9Mi"}, {"op": "add", "path": "/status/allocatable/hugepages-3Mi", "value": "9Mi"}]`)
216+
result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(context.TODO())
217+
framework.ExpectNoError(result.Error(), "while patching")
218+
219+
node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
220+
framework.ExpectNoError(err, "while getting node status")
221+
222+
ginkgo.By("Verifying that the node now supports huge pages with size 3Mi")
223+
value, ok := node.Status.Capacity["hugepages-3Mi"]
224+
framework.ExpectEqual(ok, true, "capacity should contain resouce hugepages-3Mi")
225+
framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported")
226+
227+
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
228+
restartKubelet()
229+
230+
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
231+
gomega.Eventually(func() bool {
232+
node, err = f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
233+
framework.ExpectNoError(err, "while getting node status")
234+
_, isPresent := node.Status.Capacity["hugepages-3Mi"]
235+
return isPresent
236+
}, 30*time.Second, framework.Poll).Should(gomega.Equal(false))
237+
})
192238
ginkgo.Context("With config updated with hugepages feature enabled", func() {
193239
ginkgo.BeforeEach(func() {
194240
ginkgo.By("verifying hugepages are supported")

0 commit comments

Comments
 (0)