Skip to content

Commit 8b9242f

Browse files
committed
Address PR comments and enable data cache flag
1 parent 99e80bb commit 8b9242f

File tree

6 files changed

+91
-65
lines changed

6 files changed

+91
-65
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,7 @@ var (
7171
formatAndMountTimeout = flag.Duration("format-and-mount-timeout", 1*time.Minute, "The maximum duration of a format and mount operation before another such operation will be started. Used only if --serialize-format-and-mount")
7272
fallbackRequisiteZonesFlag = flag.String("fallback-requisite-zones", "", "Comma separated list of requisite zones that will be used if there are not sufficient zones present in requisite topologies when provisioning a disk")
7373
enableStoragePoolsFlag = flag.Bool("enable-storage-pools", false, "If set to true, the CSI Driver will allow volumes to be provisioned in Storage Pools")
74-
// TODO: set enableDataCacheFlag default to false after testing
75-
enableDataCacheFlag = flag.Bool("enable-data-cache", true, "If set to true, the CSI Driver will allow volumes to be provisioned with data cache configuration")
74+
enableDataCacheFlag = flag.Bool("enable-data-cache", false, "If set to true, the CSI Driver will allow volumes to be provisioned with data cache configuration")
7675

7776
multiZoneVolumeHandleDiskTypesFlag = flag.String("multi-zone-volume-handle-disk-types", "", "Comma separated list of allowed disk types that can use the multi-zone volumeHandle. Used only if --multi-zone-volume-handle-enable")
7877
multiZoneVolumeHandleEnableFlag = flag.Bool("multi-zone-volume-handle-enable", false, "If set to true, the multi-zone volumeHandle feature will be enabled")
@@ -235,8 +234,11 @@ func handle() {
235234
}
236235

237236
if *enableDataCacheFlag {
238-
klog.V(2).Info("Raiding local ssd")
239-
driver.RaidLocalSsds()
237+
klog.V(2).Info("Raiding local ssds to setup data cache")
238+
err := driver.RaidLocalSsds()
239+
if err != nil {
240+
klog.Fatalf("Failed to Raid local SSDs, unable to setup data caching, got error %v", err)
241+
}
240242
}
241243
err = gceDriver.SetupGCEDriver(driverName, version, extraVolumeLabels, extraTags, identityServer, controllerServer, nodeServer)
242244
if err != nil {

pkg/gce-pd-csi-driver/cache.go

Lines changed: 76 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,19 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
2626
volumeGroupName := getVolumeGroupName(nodeId)
2727
mainDevicePath := "/dev/" + volumeGroupName + "/" + getLvName(mainLvSuffix, volumeId)
2828
mainLvName := getLvName(mainLvSuffix, volumeId)
29-
klog.V(2).Infof("====== Start LVM PoC NodeStageVolume Steps ======")
30-
klog.V(2).Infof("====== volumeGroupName is %v ======", volumeGroupName)
29+
klog.V(2).Infof("============================== Start LVM PoC NodeStageVolume Steps ==============================")
30+
klog.V(2).Infof("============================== volumeGroupName is %v ==============================", volumeGroupName)
3131

32-
klog.V(2).Infof("====== vgscan before vgcreate ======")
32+
klog.V(2).Infof("============================== vgscan before vgcreate ==============================")
3333
args := []string{}
3434
info, err := common.RunCommand("vgscan", args...)
3535
if err != nil {
3636
klog.Errorf("vgscan error %v: %s", err, info)
3737
}
38-
klog.V(2).Infof("====== vgscan info contains volumeGroupName or not %v ======", strings.Contains(string(info), volumeGroupName))
38+
klog.V(2).Infof("============================== vgscan info contains volumeGroupName or not %v ==============================", strings.Contains(string(info), volumeGroupName))
3939
// Check if the required volume group already exists
4040
if strings.Contains(string(info), volumeGroupName) {
41-
klog.V(2).Infof("============= VG exists, now check if PD is part of VG============")
41+
klog.V(2).Infof("============================== VG exists, now check if PD is part of VG ==============================")
4242

4343
// Clean up Volume Group before adding the PD
4444
reduceVolumeGroup(volumeGroupName, true)
@@ -60,48 +60,51 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
6060
if err != nil {
6161
klog.Errorf("errored while checking physical volume details %v: %s", err, info)
6262
// On error info contains the error message which we cannot use for further steps
63+
info = nil
6364
}
6465

65-
klog.V(2).Infof("==========Got Volume group details from PV %s=======", info)
66-
6766
infoString := strings.TrimSpace(strings.ReplaceAll(string(info), "\n", " "))
6867
infoString = strings.ReplaceAll(infoString, ".", "")
6968
infoString = strings.ReplaceAll(infoString, "\"", "")
7069
infoSlice := strings.Split(strings.TrimSpace(infoString), " ")
7170
vgNameForPv := strings.TrimSpace(infoSlice[(len(infoSlice) - 1)])
72-
klog.V(2).Infof("============ Physical volume is part of Volume group: %v=======", vgNameForPv)
71+
klog.V(2).Infof("============================== Physical volume is part of Volume group: %v ==============================", vgNameForPv)
7372
if vgNameForPv == volumeGroupName {
74-
klog.V(2).Infof("====Physical Volume(PV) already exists in the Volume Group=====")
73+
klog.V(2).Infof("============================== Physical Volume(PV) already exists in the Volume Group ==============================")
7574
} else if vgNameForPv != "VG" && vgNameForPv != "" {
7675

77-
klog.V(2).Infof("=========Deactivate VG %s========", vgNameForPv)
76+
klog.V(2).Infof("============================== Deactivate VG %s ==============================", vgNameForPv)
7877
info, err = common.RunCommand("vgchange", []string{"-an", vgNameForPv}...)
7978
if err != nil {
8079
klog.Errorf("Errored while deactivating VG %v: err: %v: %s", vgNameForPv, err, info)
8180
}
8281

8382
reduceVolumeGroup(vgNameForPv, false)
84-
// Uncache LV
85-
args = []string{
86-
"--uncache",
87-
vgNameForPv + "/" + mainLvName,
88-
"--force",
89-
}
90-
info, err = common.RunCommand("lvconvert", args...)
91-
if err != nil {
92-
klog.Errorf("errored while uncaching main LV %v: %s", err, info)
93-
// On error info contains the error message which we cannot use for further steps
94-
}
83+
_, isCached := isCachingSetup(raidedLocalSsdPath, mainLvName)
84+
// We will continue to uncache even if it errors to check caching as it is not a terminal issue.
9585

96-
reduceVolumeGroup(vgNameForPv, false)
97-
klog.V(2).Infof("==========Merge VG %v to Node VG %v==========", vgNameForPv, volumeGroupName)
86+
if !isCached {
87+
klog.Infof("============================== Uncaching the LV %v==============================", mainLvName)
88+
// Uncache LV
89+
args = []string{
90+
"--uncache",
91+
vgNameForPv + "/" + mainLvName,
92+
}
93+
info, err = common.RunCommand("lvconvert", args...)
94+
if err != nil {
95+
klog.Errorf("errored while uncaching main LV. %v: %s", err, info)
96+
}
97+
98+
reduceVolumeGroup(vgNameForPv, false)
99+
}
100+
klog.V(2).Infof("============================== Merge VG %v to Node VG %v ==============================", vgNameForPv, volumeGroupName)
98101
info, err = common.RunCommand("vgmerge", []string{volumeGroupName, vgNameForPv}...)
99102
if err != nil {
100103
klog.Errorf("Errored while merging Volume group %s into %s %v: %s", vgNameForPv, volumeGroupName, err, info)
101104
}
102105

103106
} else {
104-
klog.V(2).Infof("==========Extend Node VG %v for PV %v==========", volumeGroupName, devicePath)
107+
klog.V(2).Infof("============================== Extend Node VG %v for PV %v ==============================", volumeGroupName, devicePath)
105108
info, err := common.RunCommand("vgextend", []string{volumeGroupName, devicePath}...)
106109
if err != nil {
107110
klog.Errorf("Errored while extending VGs %v: %s", err, info)
@@ -119,10 +122,10 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
119122
if err != nil {
120123
return mainDevicePath, fmt.Errorf("lv list error %w: %s", err, info)
121124
}
122-
klog.Infof("=============== Got LVs %s on Volume group %s ============", string(lvList), volumeGroupName)
125+
klog.Infof("==============================Got LVs %s on Volume group %s ==============================", string(lvList), volumeGroupName)
123126
if !strings.Contains(string(lvList), mainLvName) {
124127
// lvcreate -n main -l 100%PVS cachegroup /dev/sdb
125-
klog.V(2).Infof("====== lvcreate main cache layer ======")
128+
klog.V(2).Infof("============================== lvcreate main cache layer ==============================")
126129
args = []string{
127130
"--yes",
128131
"-n",
@@ -138,44 +141,36 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
138141
}
139142

140143
}
141-
cachePoolName := raidedLocalSsdPath
142-
// Verify caching is setup for PD
143-
args = []string{
144-
"--select",
145-
"lv_name=" + mainLvName,
146-
"-o",
147-
"pool_lv",
148-
}
149-
poolName, err := common.RunCommand("lvs", args...)
144+
err, isCached := isCachingSetup(raidedLocalSsdPath, mainLvName)
150145
if err != nil {
151-
return mainDevicePath, fmt.Errorf("lvcreate error %w: %s", err, info)
146+
klog.Errorf("faild to check if caching ius setup for LV. Continuing to setup caching.")
152147
}
153148
cacheLvName := getLvName(cacheSuffix, volumeId)
154-
if strings.Contains(string(poolName), "csi-fast") {
149+
if isCached {
155150
// Validate that cache is setup for required size
156-
klog.V(2).Infof("================Validate Cache is setup for correct size and mode===============")
151+
klog.V(2).Infof("==============================Assuming valid data cache size and mode, resizing is not supported==============================")
157152
} else {
158153
fastCacheSize := req.GetPublishContext()[common.ContexLocalSsdCacheSize]
159154
chunkSize := "960" // Cannot use default chunk size(64KiB) as it errors on maxChunksAllowed. Unit - KiB
160-
klog.V(2).Infof("====== fastCacheSize is %v ======", fastCacheSize)
161-
klog.V(2).Infof("====== lvcreate fast cache layer again with the VolumeGroup %v======", volumeGroupName)
155+
klog.V(2).Infof("============================== fastCacheSize is %v ==============================", fastCacheSize)
156+
klog.V(2).Infof("============================== lvcreate fast cache layer again with the VolumeGroup %v==============================", volumeGroupName)
162157
args = []string{
163158
"--yes",
164159
"-n",
165160
cacheLvName,
166161
"-L",
167162
fastCacheSize,
168163
volumeGroupName,
169-
cachePoolName,
164+
raidedLocalSsdPath,
170165
}
171166
info, err = common.RunCommand("lvcreate", args...)
172167
if err != nil {
173-
klog.V(2).Infof("====== lvcreate error %v: %s ======", err, info)
168+
klog.V(2).Infof("============================== lvcreate error %v: %s ==============================", err, info)
174169
return mainDevicePath, fmt.Errorf("lvcreate error %w: %s", err, info)
175170
}
176171

177172
// Once caching is setup, link the PD to cache
178-
klog.V(2).Infof("====== lvconvert fast and main to cache ======")
173+
klog.V(2).Infof("============================== lvconvert fast and main to cache ==============================")
179174
args = []string{
180175
"--type",
181176
"cache",
@@ -193,13 +188,13 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
193188
}
194189
info, err = common.RunCommand("lvconvert", args...)
195190
if err != nil {
196-
klog.V(2).Infof("====== lvconvert error %v: %s ======", err, info)
191+
klog.V(2).Infof("============================== lvconvert error %v: %s ==============================", err, info)
197192
return mainDevicePath, fmt.Errorf("lvconvert error %w: %s", err, info)
198193
}
199194
}
200195

201196
// activate all the LVs in the Volume group
202-
klog.V(2).Infof("====== Activate Volume group %s ======", volumeGroupName)
197+
klog.V(2).Infof("============================== Activate Volume group %s ==============================", volumeGroupName)
203198
info, err = common.RunCommand("vgchange", []string{"-ay", volumeGroupName}...)
204199
if err != nil {
205200
klog.Errorf("Failed to activate VG %v %v:%s", volumeGroupName, err, info)
@@ -211,18 +206,19 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
211206
func cleanupCache(volumeId string, nodeId string) error {
212207

213208
volumeGroupName := getVolumeGroupName(nodeId)
214-
klog.V(2).Infof("=============Deactivating volume %s/%s=====", volumeGroupName, volumeId)
209+
mainLvName := getLvName(mainLvSuffix, volumeId)
210+
klog.V(2).Infof("============================== Deactivating volume %s/%s ==============================", volumeGroupName, mainLvName)
215211
args := []string{
216212
"-an",
217-
"/dev/" + volumeGroupName + "/" + getLvName(mainLvSuffix, volumeId),
213+
"/dev/" + volumeGroupName + "/" + mainLvName,
218214
}
219215
info, err := common.RunCommand("lvchange", args...)
220216
if err != nil {
221217
klog.Errorf("Errored while deactivating the disk %v: %s", err, info)
222218
}
223219
args = []string{
224220
"--uncache",
225-
volumeGroupName + "/" + getLvName(mainLvSuffix, volumeId),
221+
volumeGroupName + "/" + mainLvName,
226222
}
227223
info, err = common.RunCommand("lvconvert", args...)
228224
if err != nil {
@@ -244,8 +240,7 @@ func getLvName(suffix string, volumeId string) string {
244240
}
245241

246242
func createVg(volumeGroupName string, devicePath string, raidedLocalSsds string) error {
247-
// No existing volume group
248-
klog.V(2).Infof("====== vgcreate ======")
243+
klog.V(2).Infof("============================== vgcreate ==============================")
249244
args := []string{
250245
"--zero",
251246
"y",
@@ -258,19 +253,19 @@ func createVg(volumeGroupName string, devicePath string, raidedLocalSsds string)
258253
return fmt.Errorf("vgcreate error %w: %s", err, info)
259254
}
260255

261-
klog.V(2).Infof("====== vgscan after vgcreate ======")
256+
klog.V(2).Infof("============================== vgscan after vgcreate ==============================")
262257
args = []string{}
263258
info, err = common.RunCommand("vgscan", args...)
264259
if err != nil {
265260
klog.Errorf("vgscan error %v: %s", err, info)
266261
} else {
267-
klog.V(2).Infof("====== vgscan info %s ======", info)
262+
klog.V(2).Infof("============================== vgscan info %s ==============================", info)
268263
}
269264
return nil
270265
}
271266

272267
func reduceVolumeGroup(volumeGroupName string, force bool) {
273-
klog.V(2).Infof("=========Cleanup VG========")
268+
klog.V(2).Infof("============================== Cleanup VG %s ==============================", volumeGroupName)
274269
args := []string{
275270
"--removemissing",
276271
volumeGroupName,
@@ -287,17 +282,17 @@ func reduceVolumeGroup(volumeGroupName string, force bool) {
287282
func RaidLocalSsds() error {
288283
isAlreadyRaided, err := isRaided()
289284
if err != nil {
290-
klog.V(2).Infof("======Errored while scanning for available LocalSSDs err:%v; continuing Raiding=======", err)
285+
klog.V(2).Infof("============================== Errored while scanning for available LocalSSDs err:%v; continuing Raiding ==============================", err)
291286
} else if isAlreadyRaided {
292-
klog.V(2).Infof("===============Local SSDs are already RAIDed==============")
287+
klog.V(2).Infof("============================== Local SSDs are already RAIDed ==============================")
293288
return nil
294289
}
295290
info, err := common.RunCommand("nvme", []string{"list", "-o", "json"}...)
296291
if err != nil {
297292
return fmt.Errorf("errored while scanning available NVME disks info: %v; err:%v", info, err)
298293
}
299294
infoString := strings.TrimSpace(strings.ReplaceAll(string(info), "\n", " "))
300-
klog.V(2).Infof("=============NVME list %v ============", infoString)
295+
klog.V(2).Infof("============================== NVME list %v ==============================", infoString)
301296
infoString = strings.ReplaceAll(infoString, "\"", "")
302297
infoString = strings.ReplaceAll(infoString, " :", ":")
303298
infoString = strings.ReplaceAll(infoString, ": ", ":")
@@ -313,7 +308,11 @@ func RaidLocalSsds() error {
313308
}
314309
nvmeDiskCount := len(diskList)
315310
nvmeDiskList := strings.Join(diskList, " ")
316-
klog.V(2).Infof("========= nvmeDiskCount %v; nvmeDiskList: %v; diskList %v================", nvmeDiskCount, nvmeDiskList, diskList)
311+
if nvmeDiskCount == 0 {
312+
klog.Infof("No NVME disks found for RAIDing")
313+
return nil
314+
}
315+
klog.V(2).Infof("============================== nvmeDiskCount %v; nvmeDiskList: %v; diskList %v ==============================", nvmeDiskCount, nvmeDiskList, diskList)
317316
args := []string{
318317
"--create",
319318
raidedLssdPrefix + raidedLocalSsdName,
@@ -331,7 +330,7 @@ func RaidLocalSsds() error {
331330
// Validate if Raided successfully
332331
isAlreadyRaided, err = isRaided()
333332
if err != nil {
334-
klog.V(2).Infof("======Errored while scanning for available raided LocalSSDs err:%v=======", err)
333+
klog.V(2).Infof("============================== Errored while scanning for available raided LocalSSDs err:%v ==============================", err)
335334
}
336335
if !isAlreadyRaided {
337336
return fmt.Errorf("failed raiding, raided device not found on scanning")
@@ -348,9 +347,27 @@ func isRaided() (bool, error) {
348347
if err != nil {
349348
return false, fmt.Errorf("errored while scanning for raided LSSD %v: %s", err, info)
350349
}
351-
klog.V(2).Infof("=========== Got LSSDs %v===========", string(info))
350+
klog.V(2).Infof("============================== Got LSSDs %v ==============================", string(info))
352351
if info != nil && strings.Contains(string(info), raidedLocalSsdName) {
353352
return true, nil
354353
}
355354
return false, nil
356355
}
356+
357+
func isCachingSetup(cachePoolName, mainLvName string) (error, bool) {
358+
// Verify caching is setup for PD
359+
args := []string{
360+
"--select",
361+
"lv_name=" + mainLvName,
362+
"-o",
363+
"pool_lv",
364+
}
365+
poolName, err := common.RunCommand("lvs", args...)
366+
if err != nil {
367+
return fmt.Errorf("lvcreate error %w", err), false
368+
}
369+
if strings.Contains(string(poolName), "csi-fast") {
370+
return nil, true
371+
}
372+
return nil, false
373+
}

test/e2e/tests/single_zone_e2e_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1288,7 +1288,7 @@ var _ = Describe("GCE PD CSI Driver", func() {
12881288
Expect(err).To(BeNil(), "no error expected when passed valid compute url")
12891289

12901290
zone := "us-central1-c"
1291-
nodeID := fmt.Sprintf("gce-pd-csi-e2e-%s-0", zone)
1291+
nodeID := fmt.Sprintf("gce-pd-csi-e2e-%s", zone)
12921292
i, err := remote.SetupInstance(getRemoteInstanceConfig(), zone, nodeID, computeService, 0 /* localSSDCount */)
12931293

12941294
if err != nil {

test/e2e/utils/utils.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ func GCEClientAndDriverSetup(instance *remote.InstanceInfo, computeEndpoint stri
6060
"--multi-zone-volume-handle-disk-types=pd-standard",
6161
"--use-instance-api-to-poll-attachment-disk-types=pd-ssd",
6262
"--use-instance-api-to-list-volumes-published-nodes",
63+
"--enable-data-cache",
6364
}
6465
extra_flags = append(extra_flags, fmt.Sprintf("--compute-endpoint=%s", computeEndpoint))
6566

test/remote/setup-teardown.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@ func SetupNewDriverAndClient(instance *InstanceInfo, config *ClientConfig) (*Tes
8484
}
8585
}()
8686

87+
// Copy dependencies
88+
output, err := instance.SSH("apt-get", "install", "-y", "mdadm", "lvm2")
89+
if err != nil {
90+
return nil, fmt.Errorf("failed to install dependencis. Output: %v, errror: %v", output, err.Error())
91+
}
92+
8793
// Upload archive to instance and run binaries
8894
driverPID, err := instance.UploadAndRun(archivePath, config.WorkspaceDir, config.RunDriverCmd)
8995
if err != nil {

test/run-e2e-local.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ if hostname | grep -q c.googlers.com ; then
1616
CLOUDTOP_HOST=--cloudtop-host
1717
fi
1818

19-
ginkgo --v "test/e2e/tests" -- --project "${PROJECT}" --service-account "${IAM_NAME}" "${CLOUDTOP_HOST}" --v=6 --logtostderr
19+
ginkgo --v --focus "Should create->attach->setup caching->write->detach->attach to different node->mount->read" "test/e2e/tests" -- --project "${PROJECT}" --service-account "${IAM_NAME}" "${CLOUDTOP_HOST}" --v=6 --logtostderr

0 commit comments

Comments
 (0)