Skip to content

Commit d9d489c

Browse files
Sneha-atsunnylovestiramisu
authored andcommitted
removed hyperdisk driver override, updated error messages and other pr suggestions
1 parent f07547f commit d9d489c

File tree

3 files changed

+59
-71
lines changed

3 files changed

+59
-71
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ func handle() {
9494
if *runControllerService && *httpEndpoint != "" {
9595
mm := metrics.NewMetricsManager()
9696
mm.InitializeHttpHandler(*httpEndpoint, *metricsPath)
97-
mm.RegisterHyperdiskMetric()
97+
mm.RegisterPDCSIMetric()
9898

9999
if metrics.IsGKEComponentVersionAvailable() {
100100
mm.EmitGKEComponentVersion()

pkg/gce-pd-csi-driver/controller.go

Lines changed: 39 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ const (
130130

131131
replicationTypeNone = "none"
132132
replicationTypeRegionalPD = "regional-pd"
133-
133+
diskNotFound = ""
134134
// The maximum number of entries that we can include in the
135135
// ListVolumesResposne
136136
// In reality, the limit here is 4MB (based on gRPC client response limits),
@@ -285,7 +285,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
285285
existingDisk, err := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gceAPIVersion)
286286
if err != nil {
287287
if !gce.IsGCEError(err, "notFound") {
288-
return nil, common.LoggedError("CreateVolume unknown get disk error when validating: ", err)
288+
return nil, common.LoggedError("CreateVolume, failed to getDisk when validating: ", err)
289289
}
290290
}
291291
if err == nil {
@@ -341,7 +341,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
341341
if gce.IsGCEError(err, "notFound") {
342342
return nil, status.Errorf(codes.NotFound, "CreateVolume source volume %s does not exist", volumeContentSourceVolumeID)
343343
} else {
344-
return nil, common.LoggedError("CreateVolume unknown get disk error when validating: ", err)
344+
return nil, common.LoggedError("CreateVolume, getDisk error when validating: ", err)
345345
}
346346
}
347347

@@ -401,9 +401,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
401401
disk, err = createSingleZoneDisk(ctx, gceCS.CloudProvider, name, zones, params, capacityRange, capBytes, snapshotID, volumeContentSourceVolumeID, multiWriter)
402402
if err != nil {
403403
// Emit metric for expected disk type from storage class
404-
if params.DiskType != "" {
405-
gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
406-
}
404+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
407405
return nil, common.LoggedError("CreateVolume failed to create single zonal disk "+name+": ", err)
408406
}
409407
case replicationTypeRegionalPD:
@@ -413,9 +411,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
413411
disk, err = createRegionalDisk(ctx, gceCS.CloudProvider, name, zones, params, capacityRange, capBytes, snapshotID, volumeContentSourceVolumeID, multiWriter)
414412
if err != nil {
415413
// Emit metric for expected disk type from storage class
416-
if params.DiskType != "" {
417-
gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
418-
}
414+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
419415
return nil, common.LoggedError("CreateVolume failed to create regional disk "+name+": ", err)
420416
}
421417
default:
@@ -425,9 +421,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
425421
ready, err := isDiskReady(disk)
426422
if err != nil {
427423
// Emit metric for expected disk type from storage class as the disk is not ready and might not have PD type populated
428-
if params.DiskType != "" {
429-
gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
430-
}
424+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
431425
return nil, status.Errorf(codes.Internal, "CreateVolume disk %v had error checking ready status: %v", volKey, err.Error())
432426
}
433427
if !ready {
@@ -470,7 +464,7 @@ func (gceCS *GCEControllerServer) DeleteVolume(ctx context.Context, req *csi.Del
470464

471465
err = gceCS.CloudProvider.DeleteDisk(ctx, project, volKey)
472466
if err != nil {
473-
return nil, common.LoggedError("unknown Delete disk error: ", err)
467+
return nil, common.LoggedError("Failed to delete disk: ", err)
474468
}
475469

476470
klog.V(4).Infof("DeleteVolume succeeded for disk %v", volKey)
@@ -570,10 +564,11 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
570564
defer gceCS.volumeLocks.Release(lockingVolumeID)
571565
diskToPublish, err := gceCS.CloudProvider.GetDisk(ctx, project, volKey, gce.GCEAPIVersionV1)
572566
if err != nil {
567+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, diskNotFound)
573568
if gce.IsGCENotFoundError(err) {
574569
return nil, status.Errorf(codes.NotFound, "Could not find disk %v: %v", volKey.String(), err.Error())
575570
}
576-
return nil, status.Errorf(codes.Internal, "Unknown get disk error: %v", err.Error())
571+
return nil, status.Errorf(codes.Internal, "Failed to getDisk: %v", err.Error())
577572
}
578573
instanceZone, instanceName, err := common.NodeIDToZoneAndName(nodeID)
579574
if err != nil {
@@ -584,7 +579,7 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
584579
if gce.IsGCENotFoundError(err) {
585580
return nil, status.Errorf(codes.NotFound, "Could not find instance %v: %v", nodeID, err.Error())
586581
}
587-
return nil, status.Errorf(codes.Internal, "Unknown get instance error: %v", err.Error())
582+
return nil, status.Errorf(codes.Internal, "Failed to get instance: %v", err.Error())
588583
}
589584

590585
readWrite := "READ_WRITE"
@@ -620,16 +615,17 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
620615
return nil, status.Errorf(codes.InvalidArgument, "'%s' is not a compatible disk type with the machine type %s, please review the GCP online documentation for available persistent disk options", udErr.DiskType, machineType)
621616
}
622617
// Emit metric for error
623-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, diskToPublish.GetPDType())
624-
return nil, status.Errorf(codes.Internal, "unknown Attach error: %v", err.Error())
618+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, metrics.GetDiskType(diskToPublish))
619+
return nil, status.Errorf(codes.Internal, "Failed to Attach: %v", err.Error())
625620
}
626621

627622
err = gceCS.CloudProvider.WaitForAttach(ctx, project, volKey, instanceZone, instanceName)
628623
if err != nil {
629624
// Emit metric for error
630-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, diskToPublish.GetPDType())
631-
return nil, status.Errorf(codes.Internal, "unknown WaitForAttach error: %v", err.Error())
625+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, metrics.GetDiskType(diskToPublish))
626+
return nil, status.Errorf(codes.Internal, "Errored during WaitForAttach: %v", err.Error())
632627
}
628+
633629
klog.V(4).Infof("ControllerPublishVolume succeeded for disk %v to instance %v", volKey, nodeID)
634630
return pubVolResp, nil
635631
}
@@ -729,15 +725,13 @@ func (gceCS *GCEControllerServer) executeControllerUnpublishVolume(ctx context.C
729725
}
730726
diskToUnpublish, _ := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gce.GCEAPIVersionV1)
731727
if err != nil {
732-
common.LoggedError("Unknown get disk error: ", err)
728+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerUnpublishVolume", err, diskNotFound)
729+
common.LoggedError("Failed to getDisk: ", err)
733730
}
734731
err = gceCS.CloudProvider.DetachDisk(ctx, project, deviceName, instanceZone, instanceName)
735732
if err != nil {
736-
//Do not emit metric if disk is unknown
737-
if diskToUnpublish != nil {
738-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerUnpublishVolume", err, diskToUnpublish.GetPDType())
739-
}
740-
return nil, common.LoggedError("unknown detach error: ", err)
733+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerUnpublishVolume", err, metrics.GetDiskType(diskToUnpublish))
734+
return nil, common.LoggedError("Failed to detach: ", err)
741735
}
742736

743737
klog.V(4).Infof("ControllerUnpublishVolume succeeded for disk %v from node %v", volKey, nodeID)
@@ -775,7 +769,7 @@ func (gceCS *GCEControllerServer) ValidateVolumeCapabilities(ctx context.Context
775769
if gce.IsGCENotFoundError(err) {
776770
return nil, status.Errorf(codes.NotFound, "Could not find disk %v: %v", volKey.Name, err.Error())
777771
}
778-
return nil, common.LoggedError("Unknown get disk error: ", err)
772+
return nil, common.LoggedError("Failed to getDisk: ", err)
779773
}
780774

781775
// Check Volume Context is Empty
@@ -838,7 +832,7 @@ func (gceCS *GCEControllerServer) ListVolumes(ctx context.Context, req *csi.List
838832
if gce.IsGCEInvalidError(err) {
839833
return nil, status.Errorf(codes.Aborted, "ListVolumes error with invalid request: %v", err.Error())
840834
}
841-
return nil, common.LoggedError("Unknown list disk error: ", err)
835+
return nil, common.LoggedError("Failed to list disk: ", err)
842836
}
843837
gceCS.disks = diskList
844838
gceCS.seen = map[string]int{}
@@ -921,7 +915,7 @@ func (gceCS *GCEControllerServer) CreateSnapshot(ctx context.Context, req *csi.C
921915
if gce.IsGCENotFoundError(err) {
922916
return nil, status.Errorf(codes.NotFound, "CreateSnapshot could not find disk %v: %v", volKey.String(), err.Error())
923917
}
924-
return nil, common.LoggedError("CreateSnapshot unknown get disk error: ", err)
918+
return nil, common.LoggedError("CreateSnapshot, failed to getDisk: ", err)
925919
}
926920

927921
snapshotParams, err := common.ExtractAndDefaultSnapshotParameters(req.GetParameters(), gceCS.Driver.name)
@@ -956,35 +950,30 @@ func (gceCS *GCEControllerServer) createPDSnapshot(ctx context.Context, project
956950
}
957951
sourceDisk, err := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gce.GCEAPIVersionV1)
958952
if err != nil {
959-
common.LoggedError("Unknown get disk error: ", err)
953+
common.LoggedError("Failed to getDisk: ", err)
960954
}
961955
// Check if PD snapshot already exists
962956
var snapshot *compute.Snapshot
963957
snapshot, err = gceCS.CloudProvider.GetSnapshot(ctx, project, snapshotName)
964958
if err != nil {
965959
if !gce.IsGCEError(err, "notFound") {
966-
return nil, status.Errorf(codes.Internal, "Unknown get snapshot error: %v", err.Error())
960+
return nil, status.Errorf(codes.Internal, "Failed to get snapshot: %v", err.Error())
967961
}
968962
// If we could not find the snapshot, we create a new one
969963
snapshot, err = gceCS.CloudProvider.CreateSnapshot(ctx, project, volKey, snapshotName, snapshotParams)
970964
if err != nil {
971965
if gce.IsGCEError(err, "notFound") {
966+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, diskNotFound)
972967
return nil, status.Errorf(codes.NotFound, "Could not find volume with ID %v: %v", volKey.String(), err.Error())
973968
}
974-
//Do not emit metric if disk is unknown
975-
if sourceDisk != nil {
976-
gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, sourceDisk.GetPDType())
977-
}
978-
return nil, common.LoggedError("Unknown create snapshot error: ", err)
969+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, metrics.GetDiskType(sourceDisk))
970+
return nil, common.LoggedError("Failed to create snapshot: ", err)
979971
}
980972
}
981973

982974
err = gceCS.validateExistingSnapshot(snapshot, volKey)
983975
if err != nil {
984-
//Do not emit metric if disk is unknown
985-
if sourceDisk != nil {
986-
gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, sourceDisk.GetPDType())
987-
}
976+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, metrics.GetDiskType(sourceDisk))
988977
return nil, status.Errorf(codes.AlreadyExists, "Error in creating snapshot: %v", err.Error())
989978
}
990979

@@ -1018,15 +1007,15 @@ func (gceCS *GCEControllerServer) createImage(ctx context.Context, project strin
10181007
image, err = gceCS.CloudProvider.GetImage(ctx, project, imageName)
10191008
if err != nil {
10201009
if !gce.IsGCEError(err, "notFound") {
1021-
return nil, common.LoggedError("Unknown get image error: ", err)
1010+
return nil, common.LoggedError("Failed to get image: ", err)
10221011
}
10231012
// create a new image
10241013
image, err = gceCS.CloudProvider.CreateImage(ctx, project, volKey, imageName, snapshotParams)
10251014
if err != nil {
10261015
if gce.IsGCEError(err, "notFound") {
10271016
return nil, status.Errorf(codes.NotFound, "Could not find volume with ID %v: %v", volKey.String(), err.Error())
10281017
}
1029-
return nil, common.LoggedError("Unknown create image error: ", err)
1018+
return nil, common.LoggedError("Failed to create image: ", err)
10301019
}
10311020
}
10321021

@@ -1156,12 +1145,12 @@ func (gceCS *GCEControllerServer) DeleteSnapshot(ctx context.Context, req *csi.D
11561145
case common.DiskSnapshotType:
11571146
err = gceCS.CloudProvider.DeleteSnapshot(ctx, project, key)
11581147
if err != nil {
1159-
return nil, common.LoggedError("unknown Delete snapshot error: ", err)
1148+
return nil, common.LoggedError("Failed to DeleteSnapshot: ", err)
11601149
}
11611150
case common.DiskImageType:
11621151
err = gceCS.CloudProvider.DeleteImage(ctx, project, key)
11631152
if err != nil {
1164-
return nil, common.LoggedError("unknown Delete image error: ", err)
1153+
return nil, common.LoggedError("Failed to DeleteImage error: ", err)
11651154
}
11661155
default:
11671156
return nil, status.Errorf(codes.InvalidArgument, "unknown snapshot type %s", snapshotType)
@@ -1193,7 +1182,7 @@ func (gceCS *GCEControllerServer) ListSnapshots(ctx context.Context, req *csi.Li
11931182
if gce.IsGCEInvalidError(err) {
11941183
return nil, status.Errorf(codes.Aborted, "ListSnapshots error with invalid request: %v", err.Error())
11951184
}
1196-
return nil, common.LoggedError("Unknown list snapshots error: ", err)
1185+
return nil, common.LoggedError("Failed to list snapshots: ", err)
11971186
}
11981187
gceCS.snapshots = snapshotList
11991188
gceCS.snapshotTokens = map[string]int{}
@@ -1239,21 +1228,19 @@ func (gceCS *GCEControllerServer) ControllerExpandVolume(ctx context.Context, re
12391228

12401229
project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey)
12411230
if err != nil {
1231+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerExpandVolume", err, diskNotFound)
12421232
if gce.IsGCENotFoundError(err) {
12431233
return nil, status.Errorf(codes.NotFound, "ControllerExpandVolume could not find volume with ID %v: %v", volumeID, err.Error())
12441234
}
12451235
return nil, common.LoggedError("ControllerExpandVolume error repairing underspecified volume key: ", err)
12461236
}
12471237
sourceDisk, err := gceCS.CloudProvider.GetDisk(ctx, project, volKey, gce.GCEAPIVersionV1)
12481238
if err != nil {
1249-
common.LoggedError("Unknown get disk error: ", err)
1239+
common.LoggedError("Failed to getDisk: ", err)
12501240
}
12511241
resizedGb, err := gceCS.CloudProvider.ResizeDisk(ctx, project, volKey, reqBytes)
12521242
if err != nil {
1253-
//Do not emit metric if disk is unknown
1254-
if sourceDisk != nil {
1255-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerExpandVolume", err, sourceDisk.GetPDType())
1256-
}
1243+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerExpandVolume", err, metrics.GetDiskType(sourceDisk))
12571244
return nil, common.LoggedError("ControllerExpandVolume failed to resize disk: ", err)
12581245
}
12591246

@@ -1277,15 +1264,15 @@ func (gceCS *GCEControllerServer) getSnapshots(ctx context.Context, req *csi.Lis
12771264
if gce.IsGCEError(err, "invalid") {
12781265
return nil, status.Errorf(codes.Aborted, "Invalid error: %v", err.Error())
12791266
}
1280-
return nil, common.LoggedError("Unknown list snapshot error: ", err)
1267+
return nil, common.LoggedError("Failed to list snapshot: ", err)
12811268
}
12821269

12831270
images, _, err = gceCS.CloudProvider.ListImages(ctx, filter)
12841271
if err != nil {
12851272
if gce.IsGCEError(err, "invalid") {
12861273
return nil, status.Errorf(codes.Aborted, "Invalid error: %v", err.Error())
12871274
}
1288-
return nil, common.LoggedError("Unknown list image error: ", err)
1275+
return nil, common.LoggedError("Failed to list image: ", err)
12891276
}
12901277

12911278
entries := []*csi.ListSnapshotsResponse_Entry{}
@@ -1326,7 +1313,7 @@ func (gceCS *GCEControllerServer) getSnapshotByID(ctx context.Context, snapshotI
13261313
// return empty list if no snapshot is found
13271314
return &csi.ListSnapshotsResponse{}, nil
13281315
}
1329-
return nil, common.LoggedError("Unknown list snapshot error: ", err)
1316+
return nil, common.LoggedError("Failed to list snapshot: ", err)
13301317
}
13311318
e, err := generateDiskSnapshotEntry(snapshot)
13321319
if err != nil {

pkg/metrics/metrics.go

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,18 @@ import (
2020
"fmt"
2121
"net/http"
2222
"os"
23-
"strings"
2423

2524
"k8s.io/component-base/metrics"
2625
"k8s.io/klog/v2"
2726
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
27+
gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute"
2828
)
2929

3030
const (
3131
// envGKEPDCSIVersion is an environment variable set in the PDCSI controller manifest
3232
// with the current version of the GKE component.
33-
envGKEPDCSIVersion = "GKE_PDCSI_VERSION"
34-
hyperdiskDriverName = "hyperdisk.csi.storage.gke.io"
35-
pdcsiDriverName = "pd.csi.storage.gke.io"
33+
envGKEPDCSIVersion = "GKE_PDCSI_VERSION"
34+
pdcsiDriverName = "pd.csi.storage.gke.io"
3635
)
3736

3837
var (
@@ -42,15 +41,14 @@ var (
4241
Help: "Metric to expose the version of the PDCSI GKE component.",
4342
}, []string{"component_version"})
4443

45-
pdcsiOperationErrorsMetric = metrics.NewGaugeVec(
46-
&metrics.GaugeOpts{
44+
pdcsiOperationErrorsMetric = metrics.NewCounterVec(
45+
&metrics.CounterOpts{
4746
Subsystem: "csidriver",
48-
Name: "pdcsi_operation_errors",
47+
Name: "operation_errors",
4948
Help: "CSI server side error metrics",
5049
StabilityLevel: metrics.ALPHA,
5150
},
52-
[]string{"driver_name", "method_name", "grpc_status_code", "disk_type"},
53-
)
51+
[]string{"driver_name", "method_name", "grpc_status_code", "disk_type"})
5452
)
5553

5654
type MetricsManager struct {
@@ -72,7 +70,7 @@ func (mm *MetricsManager) registerComponentVersionMetric() {
7270
mm.registry.MustRegister(gkeComponentVersion)
7371
}
7472

75-
func (mm *MetricsManager) RegisterHyperdiskMetric() {
73+
func (mm *MetricsManager) RegisterPDCSIMetric() {
7674
mm.registry.MustRegister(pdcsiOperationErrorsMetric)
7775
}
7876

@@ -92,14 +90,7 @@ func (mm *MetricsManager) RecordOperationErrorMetrics(
9290
operationName string,
9391
operationErr error,
9492
diskType string) {
95-
var driverName string
96-
if strings.Contains(diskType, "hyperdisk") {
97-
driverName = hyperdiskDriverName
98-
}
99-
if strings.Contains(diskType, "pd") {
100-
driverName = pdcsiDriverName
101-
}
102-
pdcsiOperationErrorsMetric.WithLabelValues(driverName, "/csi.v1.Controller/"+operationName, common.CodeForError(operationErr).String(), diskType).Set(1.0)
93+
pdcsiOperationErrorsMetric.WithLabelValues(pdcsiDriverName, "/csi.v1.Controller/"+operationName, common.CodeForError(operationErr).String(), diskType).Inc()
10394
}
10495

10596
func (mm *MetricsManager) EmitGKEComponentVersion() error {
@@ -155,3 +146,13 @@ func IsGKEComponentVersionAvailable() bool {
155146

156147
return true
157148
}
149+
150+
func GetDiskType(disk *gce.CloudDisk) string {
151+
var diskType string
152+
if disk != nil {
153+
diskType = disk.GetPDType()
154+
} else {
155+
diskType = ""
156+
}
157+
return diskType
158+
}

0 commit comments

Comments
 (0)