Skip to content

Commit cb563a1

Browse files
authored
Merge pull request #1711 from Sneha-at/data-cache
Add data caching feature with Local SSDs for PD
2 parents 586f6c8 + a27526b commit cb563a1

File tree

27 files changed

+1220
-114
lines changed

27 files changed

+1220
-114
lines changed

Dockerfile

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ RUN GOARCH=$(echo $TARGETPLATFORM | cut -f2 -d '/') GCE_PD_CSI_STAGING_VERSION=$
2525
FROM gke.gcr.io/debian-base:bullseye-v1.4.3-gke.5 as debian
2626
# Install necessary dependencies
2727
# google_nvme_id script depends on the following packages: nvme-cli, xxd, bash
28-
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs nvme-cli xxd bash
28+
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs nvme-cli xxd bash kmod lvm2 mdadm
2929

3030
# Since we're leveraging apt to pull in dependencies, we use `gcr.io/distroless/base` because it includes glibc.
3131
FROM gcr.io/distroless/base-debian11 as distroless-base
@@ -54,6 +54,35 @@ COPY --from=debian /sbin/e2fsck /sbin/e2fsck
5454
COPY --from=debian /sbin/fsck /sbin/fsck
5555
COPY --from=debian /sbin/fsck* /sbin/
5656
COPY --from=debian /sbin/fsck.xfs /sbin/fsck.xfs
57+
# Add dependencies for LVM
58+
COPY --from=debian /etc/lvm /etc/lvm
59+
COPY --from=debian /etc/lvm* /etc/
60+
COPY --from=debian /lib/systemd/system/blk-availability.service /lib/systemd/system/blk-availability.service
61+
COPY --from=debian /lib/systemd/system/lvm2-lvmpolld.service /lib/systemd/system/lvm2-lvmpolld.service
62+
COPY --from=debian /lib/systemd/system/lvm2-lvmpolld.socket /lib/systemd/system/lvm2-lvmpolld.socket
63+
COPY --from=debian /lib/systemd/system/lvm2-monitor.service /lib/systemd/system/lvm2-monitor.service
64+
COPY --from=debian /lib/udev/rules.d/56-lvm.rules /lib/udev/rules.d/56-lvm.rules
65+
COPY --from=debian /sbin/fsadm /sbin/fsadm
66+
COPY --from=debian /sbin/lvm /sbin/lvm
67+
COPY --from=debian /sbin/lvmdump /sbin/lvmdump
68+
COPY --from=debian /sbin/lvmpolld /sbin/lvmpolld
69+
COPY --from=debian /usr/lib/tmpfiles.d /usr/lib/tmpfiles.d
70+
COPY --from=debian /usr/lib/tmpfiles.d/lvm2.conf /usr/lib/tmpfiles.d/lvm2.conf
71+
COPY --from=debian /sbin/lv* /sbin/
72+
COPY --from=debian /sbin/pv* /sbin/
73+
COPY --from=debian /sbin/vg* /sbin/
74+
COPY --from=debian /sbin/modprobe /sbin/modprobe
75+
COPY --from=debian /lib/udev /lib/udev
76+
COPY --from=debian /lib/udev/rules.d /lib/udev/rules.d
77+
COPY --from=debian /lib/udev/rules.d/55-dm.rules /lib/udev/rules.d/55-dm.rules
78+
COPY --from=debian /lib/udev/rules.d/60-persistent-storage-dm.rules /lib/udev/rules.d/60-persistent-storage-dm.rules
79+
COPY --from=debian /lib/udev/rules.d/95-dm-notify.rules /lib/udev/rules.d/95-dm-notify.rules
80+
COPY --from=debian /sbin/blkdeactivate /sbin/blkdeactivate
81+
COPY --from=debian /sbin/dmsetup /sbin/dmsetup
82+
COPY --from=debian /sbin/dmstats /sbin/dmstats
83+
COPY --from=debian /bin/ls /bin/ls
84+
# End of dependencies for LVM
85+
COPY --from=debian /sbin/mdadm /sbin/mdadm
5786
COPY --from=debian /sbin/mke2fs /sbin/mke2fs
5887
COPY --from=debian /sbin/mkfs* /sbin/
5988
COPY --from=debian /sbin/resize2fs /sbin/resize2fs
@@ -76,12 +105,18 @@ COPY --from=debian /lib/${LIB_DIR_PREFIX}-linux-gnu/libpcre.so.3 \
76105
/lib/${LIB_DIR_PREFIX}-linux-gnu/libselinux.so.1 \
77106
/lib/${LIB_DIR_PREFIX}-linux-gnu/libtinfo.so.6 \
78107
/lib/${LIB_DIR_PREFIX}-linux-gnu/libe2p.so.2 \
108+
# The following does not exist in either lib or usr/lib
109+
# /lib/${LIB_DIR_PREFIX}-linux-gnu/libcap.so.2 \
79110
/lib/${LIB_DIR_PREFIX}-linux-gnu/libcom_err.so.2 \
80111
/lib/${LIB_DIR_PREFIX}-linux-gnu/libdevmapper.so.1.02.1 \
112+
/lib/${LIB_DIR_PREFIX}-linux-gnu/libm.so.6 \
113+
/lib/${LIB_DIR_PREFIX}-linux-gnu/libc.so.6 \
114+
/lib/${LIB_DIR_PREFIX}-linux-gnu/libdevmapper-event.so.1.02.1 \
81115
/lib/${LIB_DIR_PREFIX}-linux-gnu/libext2fs.so.2 \
82116
/lib/${LIB_DIR_PREFIX}-linux-gnu/libgcc_s.so.1 \
83117
/lib/${LIB_DIR_PREFIX}-linux-gnu/liblzma.so.5 \
84118
/lib/${LIB_DIR_PREFIX}-linux-gnu/libreadline.so.8 \
119+
/lib/${LIB_DIR_PREFIX}-linux-gnu/libgpg-error.so.0 \
85120
/lib/${LIB_DIR_PREFIX}-linux-gnu/libz.so.1 /lib/${LIB_DIR_PREFIX}-linux-gnu/
86121

87122
COPY --from=debian /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libblkid.so.1 \
@@ -90,6 +125,11 @@ COPY --from=debian /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libblkid.so.1 \
90125
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libmount.so.1 \
91126
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libudev.so.1 \
92127
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libuuid.so.1 \
128+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libzstd.so.1 \
129+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libaio.so.1 \
130+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libgcrypt.so.20 \
131+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libsystemd.so.0 \
132+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/liblz4.so.1 \
93133
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libacl.so.1 \
94134
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libattr.so.1 \
95135
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libedit.so.2 \
@@ -104,6 +144,11 @@ COPY --from=debian /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libblkid.so.1 \
104144
# Copy NVME support required script and rules into distroless base.
105145
COPY deploy/kubernetes/udev/google_nvme_id /lib/udev_containerized/google_nvme_id
106146

147+
SHELL ["/bin/bash", "-c"]
148+
RUN /bin/sed -i -e "s/.*allow_mixed_block_sizes = 0.*/ allow_mixed_block_sizes = 1/" /etc/lvm/lvm.conf
149+
RUN /bin/sed -i -e "s/.*udev_sync = 1.*/ udev_sync = 0/" /etc/lvm/lvm.conf
150+
RUN /bin/sed -i -e "s/.*udev_rules = 1.*/ udev_rules = 0/" /etc/lvm/lvm.conf
151+
107152
# Build stage used for validation of the output-image
108153
# See validate-container-linux-* targets in Makefile
109154
FROM output-image as validation-image

cmd/gce-pd-csi-driver/main.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ var (
7171
formatAndMountTimeout = flag.Duration("format-and-mount-timeout", 1*time.Minute, "The maximum duration of a format and mount operation before another such operation will be started. Used only if --serialize-format-and-mount")
7272
fallbackRequisiteZonesFlag = flag.String("fallback-requisite-zones", "", "Comma separated list of requisite zones that will be used if there are not sufficient zones present in requisite topologies when provisioning a disk")
7373
enableStoragePoolsFlag = flag.Bool("enable-storage-pools", false, "If set to true, the CSI Driver will allow volumes to be provisioned in Storage Pools")
74+
enableDataCacheFlag = flag.Bool("enable-data-cache", false, "If set to true, the CSI Driver will allow volumes to be provisioned with data cache configuration")
7475

7576
multiZoneVolumeHandleDiskTypesFlag = flag.String("multi-zone-volume-handle-disk-types", "", "Comma separated list of allowed disk types that can use the multi-zone volumeHandle. Used only if --multi-zone-volume-handle-enable")
7677
multiZoneVolumeHandleEnableFlag = flag.Bool("multi-zone-volume-handle-enable", false, "If set to true, the multi-zone volumeHandle feature will be enabled")
@@ -208,7 +209,7 @@ func handle() {
208209
}
209210
initialBackoffDuration := time.Duration(*errorBackoffInitialDurationMs) * time.Millisecond
210211
maxBackoffDuration := time.Duration(*errorBackoffMaxDurationMs) * time.Millisecond
211-
controllerServer = driver.NewControllerServer(gceDriver, cloudProvider, initialBackoffDuration, maxBackoffDuration, fallbackRequisiteZones, *enableStoragePoolsFlag, multiZoneVolumeHandleConfig, listVolumesConfig)
212+
controllerServer = driver.NewControllerServer(gceDriver, cloudProvider, initialBackoffDuration, maxBackoffDuration, fallbackRequisiteZones, *enableStoragePoolsFlag, *enableDataCacheFlag, multiZoneVolumeHandleConfig, listVolumesConfig)
212213
} else if *cloudConfigFilePath != "" {
213214
klog.Warningf("controller service is disabled but cloud config given - it has no effect")
214215
}
@@ -226,12 +227,19 @@ func handle() {
226227
if err != nil {
227228
klog.Fatalf("Failed to set up metadata service: %v", err.Error())
228229
}
229-
nodeServer = driver.NewNodeServer(gceDriver, mounter, deviceUtils, meta, statter)
230+
nodeServer = driver.NewNodeServer(gceDriver, mounter, deviceUtils, meta, statter, *enableDataCacheFlag)
230231
if *maxConcurrentFormatAndMount > 0 {
231232
nodeServer = nodeServer.WithSerializedFormatAndMount(*formatAndMountTimeout, *maxConcurrentFormatAndMount)
232233
}
233234
}
234235

236+
if *enableDataCacheFlag {
237+
klog.V(2).Info("Raiding local ssds to setup data cache")
238+
err := driver.RaidLocalSsds()
239+
if err != nil {
240+
klog.Fatalf("Failed to Raid local SSDs, unable to setup data caching, got error %v", err)
241+
}
242+
}
235243
err = gceDriver.SetupGCEDriver(driverName, version, extraVolumeLabels, extraTags, identityServer, controllerServer, nodeServer)
236244
if err != nil {
237245
klog.Fatalf("Failed to initialize GCE CSI Driver: %v", err.Error())

deploy/kubernetes/base/node_linux/node.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ spec:
6666
mountPath: /run/udev
6767
- name: sys
6868
mountPath: /sys
69+
- name: lib-modules
70+
mountPath: /lib/modules
6971
volumes:
7072
- name: registration-dir
7173
hostPath:
@@ -101,6 +103,10 @@ spec:
101103
hostPath:
102104
path: /sys
103105
type: Directory
106+
- name: lib-modules
107+
hostPath:
108+
path: /lib/modules
109+
type: Directory
104110
# https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
105111
# See "special case". This will tolerate everything. Node component should
106112
# be scheduled on all nodes.

pkg/common/constants.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,14 @@ const (
3232

3333
// Label that is set on a disk when it is used by a 'multi-zone' VolumeHandle
3434
MultiZoneLabel = "goog-gke-multi-zone"
35+
36+
// Data cache mode
37+
DataCacheModeWriteBack = "writeback"
38+
DataCacheModeWriteThrough = "writethrough"
39+
40+
ContextDataCacheSize = "data-cache-size"
41+
ContextDataCacheMode = "data-cache-mode"
42+
43+
// Keys in the publish context
44+
ContexLocalSsdCacheSize = "local-ssd-cache-size"
3545
)

pkg/common/parameters.go

Lines changed: 56 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ package common
1818

1919
import (
2020
"fmt"
21+
"strconv"
2122
"strings"
23+
24+
"k8s.io/klog/v2"
2225
)
2326

2427
const (
@@ -32,7 +35,11 @@ const (
3235
ParameterAvailabilityClass = "availability-class"
3336
ParameterKeyEnableConfidentialCompute = "enable-confidential-storage"
3437
ParameterKeyStoragePools = "storage-pools"
35-
ParameterKeyResourceTags = "resource-tags"
38+
39+
// Parameters for Data Cache
40+
ParameterKeyDataCacheSize = "data-cache-size"
41+
ParameterKeyDataCacheMode = "data-cache-mode"
42+
ParameterKeyResourceTags = "resource-tags"
3643

3744
// Parameters for VolumeSnapshotClass
3845
ParameterKeyStorageLocations = "storage-locations"
@@ -68,6 +75,15 @@ const (
6875
tagKeyCreatedForSnapshotContentName = "kubernetes.io/created-for/volumesnapshotcontent/name"
6976
)
7077

78+
type DataCacheParameters struct {
79+
// Values: {string} in int64 form
80+
// Default: ""
81+
DataCacheSize string
82+
// Values: writethrough, writeback
83+
// Default: writethrough
84+
DataCacheMode string
85+
}
86+
7187
// DiskParameters contains normalized and defaulted disk parameters
7288
type DiskParameters struct {
7389
// Values: pd-standard, pd-balanced, pd-ssd, or any other PD disk type. Not validated.
@@ -125,7 +141,7 @@ type StoragePool struct {
125141
// put them into a well defined struct making sure to default unspecified fields.
126142
// extraVolumeLabels are added as labels; if there are also labels specified in
127143
// parameters, any matching extraVolumeLabels will be overridden.
128-
func ExtractAndDefaultParameters(parameters map[string]string, driverName string, extraVolumeLabels map[string]string, enableStoragePools bool, extraTags map[string]string) (DiskParameters, error) {
144+
func ExtractAndDefaultParameters(parameters map[string]string, driverName string, extraVolumeLabels map[string]string, enableStoragePools bool, enableDataCache bool, extraTags map[string]string) (DiskParameters, DataCacheParameters, error) {
129145
p := DiskParameters{
130146
DiskType: "pd-standard", // Default
131147
ReplicationType: replicationTypeNone, // Default
@@ -135,6 +151,12 @@ func ExtractAndDefaultParameters(parameters map[string]string, driverName string
135151
ResourceTags: make(map[string]string), // Default
136152
}
137153

154+
// Set data cache mode default
155+
d := DataCacheParameters{}
156+
if enableDataCache && parameters[ParameterKeyDataCacheSize] != "" {
157+
d.DataCacheMode = DataCacheModeWriteThrough
158+
}
159+
138160
for k, v := range extraVolumeLabels {
139161
p.Labels[k] = v
140162
}
@@ -169,7 +191,7 @@ func ExtractAndDefaultParameters(parameters map[string]string, driverName string
169191
case ParameterKeyLabels:
170192
paramLabels, err := ConvertLabelsStringToMap(v)
171193
if err != nil {
172-
return p, fmt.Errorf("parameters contain invalid labels parameter: %w", err)
194+
return p, d, fmt.Errorf("parameters contain invalid labels parameter: %w", err)
173195
}
174196
// Override any existing labels with those from this parameter.
175197
for labelKey, labelValue := range paramLabels {
@@ -178,58 +200,79 @@ func ExtractAndDefaultParameters(parameters map[string]string, driverName string
178200
case ParameterKeyProvisionedIOPSOnCreate:
179201
paramProvisionedIOPSOnCreate, err := ConvertStringToInt64(v)
180202
if err != nil {
181-
return p, fmt.Errorf("parameters contain invalid provisionedIOPSOnCreate parameter: %w", err)
203+
return p, d, fmt.Errorf("parameters contain invalid provisionedIOPSOnCreate parameter: %w", err)
182204
}
183205
p.ProvisionedIOPSOnCreate = paramProvisionedIOPSOnCreate
184206
case ParameterKeyProvisionedThroughputOnCreate:
185207
paramProvisionedThroughputOnCreate, err := ConvertMiStringToInt64(v)
186208
if err != nil {
187-
return p, fmt.Errorf("parameters contain invalid provisionedThroughputOnCreate parameter: %w", err)
209+
return p, d, fmt.Errorf("parameters contain invalid provisionedThroughputOnCreate parameter: %w", err)
188210
}
189211
p.ProvisionedThroughputOnCreate = paramProvisionedThroughputOnCreate
190212
case ParameterAvailabilityClass:
191213
paramAvailabilityClass, err := ConvertStringToAvailabilityClass(v)
192214
if err != nil {
193-
return p, fmt.Errorf("parameters contain invalid availability class parameter: %w", err)
215+
return p, d, fmt.Errorf("parameters contain invalid availability class parameter: %w", err)
194216
}
195217
if paramAvailabilityClass == ParameterRegionalHardFailoverClass {
196218
p.ForceAttach = true
197219
}
198220
case ParameterKeyEnableConfidentialCompute:
199221
paramEnableConfidentialCompute, err := ConvertStringToBool(v)
200222
if err != nil {
201-
return p, fmt.Errorf("parameters contain invalid value for enable-confidential-storage parameter: %w", err)
223+
return p, d, fmt.Errorf("parameters contain invalid value for enable-confidential-storage parameter: %w", err)
202224
}
203225

204226
if paramEnableConfidentialCompute {
205227
// DiskEncryptionKmsKey is needed to enable confidentialStorage
206228
if val, ok := parameters[ParameterKeyDiskEncryptionKmsKey]; !ok || !isValidDiskEncryptionKmsKey(val) {
207-
return p, fmt.Errorf("Valid %v is required to enable ConfidentialStorage", ParameterKeyDiskEncryptionKmsKey)
229+
return p, d, fmt.Errorf("Valid %v is required to enable ConfidentialStorage", ParameterKeyDiskEncryptionKmsKey)
208230
}
209231
}
210232

211233
p.EnableConfidentialCompute = paramEnableConfidentialCompute
212234
case ParameterKeyStoragePools:
213235
if !enableStoragePools {
214-
return p, fmt.Errorf("parameters contains invalid option %q", ParameterKeyStoragePools)
236+
return p, d, fmt.Errorf("parameters contains invalid option %q", ParameterKeyStoragePools)
215237
}
216238
storagePools, err := ParseStoragePools(v)
217239
if err != nil {
218-
return p, fmt.Errorf("parameters contain invalid value for %s parameter: %w", ParameterKeyStoragePools, err)
240+
return p, d, fmt.Errorf("parameters contain invalid value for %s parameter: %w", ParameterKeyStoragePools, err)
219241
}
220242
p.StoragePools = storagePools
243+
case ParameterKeyDataCacheSize:
244+
if !enableDataCache {
245+
return p, d, fmt.Errorf("data caching enabled: %v; parameters contains invalid option %q", enableDataCache, ParameterKeyDataCacheSize)
246+
}
247+
// TODO: need to parse or validate the string
248+
249+
paramDataCacheSize, err := ConvertGiStringToInt64(v)
250+
if err != nil {
251+
return p, d, fmt.Errorf("parameters contain invalid dataCacheSize parameter: %w", err)
252+
}
253+
d.DataCacheSize = strconv.FormatInt(paramDataCacheSize, 10)
254+
klog.V(2).Infof("====== Data cache size is %v ======", v)
255+
case ParameterKeyDataCacheMode:
256+
if !enableDataCache {
257+
return p, d, fmt.Errorf("data caching enabled %v; parameters contains invalid option %q", enableDataCache, ParameterKeyDataCacheSize)
258+
}
259+
if err := ValidateDataCacheMode(v); err != nil {
260+
return p, d, fmt.Errorf("parameters contains invalid option: %w", err)
261+
}
262+
d.DataCacheMode = v
263+
klog.V(2).Infof("====== Data cache mode is %v ======", v)
221264
case ParameterKeyResourceTags:
222265
if err := extractResourceTagsParameter(v, p.ResourceTags); err != nil {
223-
return p, err
266+
return p, d, err
224267
}
225268
default:
226-
return p, fmt.Errorf("parameters contains invalid option %q", k)
269+
return p, d, fmt.Errorf("parameters contains invalid option %q", k)
227270
}
228271
}
229272
if len(p.Tags) > 0 {
230273
p.Tags[tagKeyCreatedBy] = driverName
231274
}
232-
return p, nil
275+
return p, d, nil
233276
}
234277

235278
func ExtractAndDefaultSnapshotParameters(parameters map[string]string, driverName string, extraTags map[string]string) (SnapshotParameters, error) {

0 commit comments

Comments
 (0)