Skip to content

Commit 21a8ece

Browse files
committed
Fix typo and use strings.Fields for whitespace
splitting to fix issues with strings.Split in case of multiple consecutive spaces Check nodeName empty string before feature check Remove log line that also removed on later release branches Minor changes to bring cache.go into the same state in release-1.17 branch
1 parent daa3ff5 commit 21a8ece

File tree

2 files changed

+43
-21
lines changed

2 files changed

+43
-21
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ func urlFlag(target **url.URL, name string, usage string) {
333333
}
334334

335335
func isDataCacheEnabledNodePool(ctx context.Context, nodeName string) (bool, error) {
336-
if !*enableDataCacheFlag {
336+
if !*enableDataCacheFlag || nodeName == "" {
337337
return false, nil
338338
}
339339
if nodeName != common.TestNode { // disregard logic below when E2E testing.
@@ -356,7 +356,7 @@ func fetchLssdsForRaiding(lssdCount int) ([]string, error) {
356356
return nil, fmt.Errorf("Error listing RAIDed LSSDs %v", err)
357357
}
358358

359-
LSSDsWithEmptyMountPoint, err := driver.FetchLSSDsWihtEmptyMountPoint()
359+
LSSDsWithEmptyMountPoint, err := driver.FetchLSSDsWithEmptyMountPoint()
360360
if err != nil {
361361
return nil, fmt.Errorf("Error listing LSSDs with empty mountpoint: %v", err)
362362
}

pkg/gce-pd-csi-driver/cache.go

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,15 @@ import (
77
"regexp"
88
"strconv"
99
"strings"
10+
"time"
1011

1112
csi "github.com/container-storage-interface/spec/lib/go/csi"
1213
fsnotify "github.com/fsnotify/fsnotify"
14+
"google.golang.org/grpc/codes"
15+
"google.golang.org/grpc/status"
16+
v1 "k8s.io/api/core/v1"
1317
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+
"k8s.io/apimachinery/pkg/util/wait"
1419
"k8s.io/client-go/kubernetes"
1520
"k8s.io/client-go/rest"
1621
"k8s.io/klog/v2"
@@ -42,7 +47,7 @@ func fetchRAIDedLocalSsdPath() (string, error) {
4247
return "", fmt.Errorf("Error getting RAIDed device path for Data Cache %v, output:%v", err, string(info))
4348
}
4449
infoString := strings.TrimSpace(string(info))
45-
infoSlice := strings.Split(infoString, " ")
50+
infoSlice := strings.Fields(infoString)
4651

4752
// We want to get the second element in the array (sample: ARRAY /dev/md126 metadata=1.2 name=csi-driver-data-cache UUID=*),
4853
// which is the path to the RAIDed device
@@ -162,7 +167,7 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
162167
}
163168
err, isCached := isCachingSetup(mainLvName)
164169
if err != nil {
165-
klog.Errorf("faild to check if caching ius setup for LV, continuing to setup caching.")
170+
klog.Errorf("failed to check if caching is setup for LV, continuing to setup caching.")
166171
}
167172
cacheLvName := getLvName(cacheSuffix, volumeId)
168173
if isCached {
@@ -199,6 +204,9 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
199204
}
200205
info, err = common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "lvcreate", args...)
201206
if err != nil {
207+
if strings.Contains(err.Error(), "insufficient free space") {
208+
return mainDevicePath, status.Error(codes.InvalidArgument, fmt.Sprintf("Error setting up cache: %v", err.Error()))
209+
}
202210
return mainDevicePath, fmt.Errorf("Errored while creating cache %w: %s", err, info)
203211
}
204212
}
@@ -215,7 +223,7 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
215223
req.GetPublishContext()[common.ContextDataCacheMode],
216224
volumeGroupName + "/" + mainLvName,
217225
"--chunksize",
218-
chunkSize, // default unit is KiB
226+
chunkSize,
219227
"--force",
220228
"-y",
221229
}
@@ -250,18 +258,15 @@ func ValidateDataCacheConfig(dataCacheMode string, dataCacheSize string, ctx con
250258

251259
func GetDataCacheCountFromNodeLabel(ctx context.Context, nodeName string) (int, error) {
252260
cfg, err := rest.InClusterConfig()
253-
// We want to capture API errors with node label fetching, so return -1
254-
// in those cases instead of 0.
255261
if err != nil {
256262
return 0, err
257263
}
258264
kubeClient, err := kubernetes.NewForConfig(cfg)
259265
if err != nil {
260266
return 0, err
261267
}
262-
node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
268+
node, err := getNodeWithRetry(ctx, kubeClient, nodeName)
263269
if err != nil {
264-
// We could retry, but this error will also crashloop the driver which may be as good a way to retry as any.
265270
return 0, err
266271
}
267272
if val, found := node.GetLabels()[fmt.Sprintf(common.NodeLabelPrefix, common.DataCacheLssdCountLabel)]; found {
@@ -272,10 +277,33 @@ func GetDataCacheCountFromNodeLabel(ctx context.Context, nodeName string) (int,
272277
klog.V(4).Infof("Number of local SSDs requested for Data Cache: %v", dataCacheCount)
273278
return dataCacheCount, nil
274279
}
275-
// This will be returned for a non-Data-Cache node pool
276280
return 0, nil
277281
}
278282

283+
func getNodeWithRetry(ctx context.Context, kubeClient *kubernetes.Clientset, nodeName string) (*v1.Node, error) {
284+
var nodeObj *v1.Node
285+
backoff := wait.Backoff{
286+
Duration: 1 * time.Second,
287+
Factor: 2.0,
288+
Steps: 5,
289+
}
290+
err := wait.ExponentialBackoffWithContext(ctx, backoff, func() (bool, error) {
291+
node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
292+
if err != nil {
293+
klog.Warningf("Error getting node %s: %v, retrying...\n", nodeName, err)
294+
return false, nil
295+
}
296+
nodeObj = node
297+
klog.V(4).Infof("Successfully retrieved node info %s\n", nodeName)
298+
return true, nil
299+
})
300+
301+
if err != nil {
302+
klog.Errorf("Failed to get node %s after retries: %v\n", nodeName, err)
303+
}
304+
return nodeObj, err
305+
}
306+
279307
func FetchRaidedLssdCountForDatacache() (int, error) {
280308
raidedPath, err := fetchRAIDedLocalSsdPath()
281309
if err != nil {
@@ -342,7 +370,7 @@ func FetchAllLssds() ([]string, error) {
342370
for _, ssd := range infoList {
343371
ssd = strings.TrimSpace(ssd)
344372
if strings.HasPrefix(ssd, "/dev/nvme") {
345-
ssdDetails := strings.Split(ssd, " ")
373+
ssdDetails := strings.Fields(ssd)
346374
lssd := re.MatchString(ssdDetails[1])
347375
if lssd {
348376
diskList = append(diskList, strings.TrimSpace(ssdDetails[0]))
@@ -355,7 +383,7 @@ func FetchAllLssds() ([]string, error) {
355383
return diskList, nil
356384
}
357385

358-
func FetchLSSDsWihtEmptyMountPoint() ([]string, error) {
386+
func FetchLSSDsWithEmptyMountPoint() ([]string, error) {
359387
info, err := common.RunCommand("grep", []string{"-E", `^\S+\s*$`} /* pipeCmdArg */, "lsblk", []string{"-o", "NAME,MOUNTPOINT", "-pdn"}...)
360388
if err != nil {
361389
return nil, fmt.Errorf("Error while fetching disks with no mount point: %v; err:%v", info, err)
@@ -376,6 +404,7 @@ func checkVgExists(volumeGroupName string) bool {
376404
return false
377405
}
378406
// Check if the required volume group already exists
407+
klog.Infof("check vg exists output: %v, volumeGroupName: %v", string(info), volumeGroupName)
379408
return strings.Contains(string(info), volumeGroupName)
380409
}
381410

@@ -462,7 +491,6 @@ func createVg(volumeGroupName string, raidedLocalSsds string) error {
462491

463492
func reduceVolumeGroup(volumeGroupName string, force bool) {
464493
if !checkVgExists(volumeGroupName) {
465-
klog.V(2).Infof("Volume group %v not found, no further action needed", volumeGroupName)
466494
return
467495
}
468496
args := []string{
@@ -618,23 +646,17 @@ func watchDiskDetaches(watcher *fsnotify.Watcher, nodeName string, errorCh chan
618646
errorCh <- fmt.Errorf("disk update event errored: %v", err)
619647
// watch for events
620648
case <-watcher.Events:
621-
vgName := getVolumeGroupName(nodeName)
622-
if !checkVgExists(vgName) {
623-
// If the volume group doesn't exist, there's nothing to update.
624-
// Continue to the next event.
625-
continue
626-
}
627649
// In case of an event i.e. creation or deletion of any new PV, we update the VG metadata.
628650
// This might include some non-LVM changes, no harm in updating metadata multiple times.
629651
args := []string{
630652
"--updatemetadata",
631-
vgName,
653+
getVolumeGroupName(nodeName),
632654
}
633655
_, err := common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "vgck", args...)
634656
if err != nil {
635657
klog.Errorf("Error updating volume group's metadata: %v", err)
636658
}
637-
reduceVolumeGroup(vgName, true)
659+
reduceVolumeGroup(getVolumeGroupName(nodeName), true)
638660
}
639661
}
640662
}

0 commit comments

Comments
 (0)