Skip to content

Commit 5993cc3

Browse files
authored
Merge pull request #1797 from Sneha-at/data-cache
Restrict data caching setup to labelled node pool
2 parents 36554c2 + b2843f6 commit 5993cc3

File tree

3 files changed

+51
-5
lines changed

3 files changed

+51
-5
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@ import (
2727
"strings"
2828
"time"
2929

30+
"k8s.io/client-go/kubernetes"
31+
"k8s.io/client-go/rest"
3032
"k8s.io/klog/v2"
3133

34+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3235
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
3336
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/deviceutils"
3437
gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute"
@@ -73,6 +76,7 @@ var (
7376
enableStoragePoolsFlag = flag.Bool("enable-storage-pools", false, "If set to true, the CSI Driver will allow volumes to be provisioned in Storage Pools")
7477
enableControllerDataCacheFlag = flag.Bool("enable-controller-data-cache", false, "If set to true, the CSI Driver will allow volumes to be provisioned with data cache configuration")
7578
enableNodeDataCacheFlag = flag.Bool("enable-node-data-cache", false, "If set to true, the CSI Driver will allow volumes to be provisioned with data cache configuration")
79+
nodeName = flag.String("node-name", "", "The node this driver is running on")
7680

7781
multiZoneVolumeHandleDiskTypesFlag = flag.String("multi-zone-volume-handle-disk-types", "", "Comma separated list of allowed disk types that can use the multi-zone volumeHandle. Used only if --multi-zone-volume-handle-enable")
7882
multiZoneVolumeHandleEnableFlag = flag.Bool("multi-zone-volume-handle-enable", false, "If set to true, the multi-zone volumeHandle feature will be enabled")
@@ -91,7 +95,9 @@ var (
9195
)
9296

9397
const (
94-
driverName = "pd.csi.storage.gke.io"
98+
driverName = "pd.csi.storage.gke.io"
99+
dataCacheLabel = "datacache-storage-gke-io"
100+
dataCacheLabelValue = "enabled"
95101
)
96102

97103
func init() {
@@ -235,12 +241,14 @@ func handle() {
235241
}
236242

237243
if *enableNodeDataCacheFlag {
238-
klog.V(2).Info("Raiding local ssds to setup data cache")
239-
err := driver.RaidLocalSsds()
240-
if err != nil {
241-
klog.Fatalf("Failed to Raid local SSDs, unable to setup data caching, got error %v", err)
244+
if nodeName == nil || *nodeName == "" {
245+
klog.Fatalf("Data cache enabled, but --node-name not passed")
246+
}
247+
if err := setupDataCache(ctx, *nodeName); err != nil {
248+
klog.Fatalf("DataCache setup failed: %v", err)
242249
}
243250
}
251+
244252
err = gceDriver.SetupGCEDriver(driverName, version, extraVolumeLabels, extraTags, identityServer, controllerServer, nodeServer)
245253
if err != nil {
246254
klog.Fatalf("Failed to initialize GCE CSI Driver: %v", err.Error())
@@ -289,3 +297,31 @@ func urlFlag(target **url.URL, name string, usage string) {
289297
return err
290298
})
291299
}
300+
301+
func setupDataCache(ctx context.Context, nodeName string) error {
302+
cfg, err := rest.InClusterConfig()
303+
if err != nil {
304+
return err
305+
}
306+
kubeClient, err := kubernetes.NewForConfig(cfg)
307+
if err != nil {
308+
return err
309+
}
310+
node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
311+
if err != nil {
312+
// We could retry, but this error will also crashloop the driver which may be as good a way to retry as any.
313+
return err
314+
}
315+
if val, found := node.GetLabels()[dataCacheLabel]; !found || val != dataCacheLabelValue {
316+
klog.V(2).Infof("Datacache not enabled for node %s; node label %s=%s and not %s", nodeName, dataCacheLabel, val, dataCacheLabelValue)
317+
return nil
318+
}
319+
// Setup data cache only if enabled fro nodes
320+
klog.V(2).Info("Raiding local ssds to setup data cache")
321+
if err := driver.RaidLocalSsds(); err != nil {
322+
return fmt.Errorf("Failed to Raid local SSDs, unable to setup data caching, got error %v", err)
323+
}
324+
325+
klog.V(2).Infof("Datacache enabled for node %s", nodeName)
326+
return nil
327+
}

deploy/kubernetes/base/controller/cluster_setup.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ rules:
199199
verbs: ['use']
200200
resourceNames:
201201
- csi-gce-pd-node-psp
202+
- apiGroups: [""]
203+
resources: ["nodes"]
204+
verbs: ["get", "list"]
202205
---
203206

204207
kind: ClusterRole

deploy/kubernetes/base/node_linux/node.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,15 @@ spec:
4646
- "--v=5"
4747
- "--endpoint=unix:/csi/csi.sock"
4848
- "--run-controller-service=false"
49+
- "--enable-node-data-cache"
50+
- "--node-name=$(KUBE_NODE_NAME)"
4951
securityContext:
5052
privileged: true
53+
env:
54+
- name: KUBE_NODE_NAME
55+
valueFrom:
56+
fieldRef:
57+
fieldPath: spec.nodeName
5158
volumeMounts:
5259
- name: kubelet-dir
5360
mountPath: /var/lib/kubelet

0 commit comments

Comments
 (0)