Skip to content

Commit db70a82

Browse files
Sneha-atsunnylovestiramisu
authored andcommitted
removed hyperdisk driver override, updated error messages and other pr suggestions
1 parent f19d757 commit db70a82

File tree

3 files changed

+59
-71
lines changed

3 files changed

+59
-71
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ func handle() {
9797
if *runControllerService && *httpEndpoint != "" {
9898
mm := metrics.NewMetricsManager()
9999
mm.InitializeHttpHandler(*httpEndpoint, *metricsPath)
100-
mm.RegisterHyperdiskMetric()
100+
mm.RegisterPDCSIMetric()
101101

102102
if metrics.IsGKEComponentVersionAvailable() {
103103
mm.EmitGKEComponentVersion()

pkg/gce-pd-csi-driver/controller.go

Lines changed: 39 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ const (
122122

123123
replicationTypeNone = "none"
124124
replicationTypeRegionalPD = "regional-pd"
125-
125+
diskNotFound = ""
126126
// The maximum number of entries that we can include in the
127127
// ListVolumesResposne
128128
// In reality, the limit here is 4MB (based on gRPC client response limits),
@@ -277,7 +277,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
277277
existingDisk, err := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gceAPIVersion)
278278
if err != nil {
279279
if !gce.IsGCEError(err, "notFound") {
280-
return nil, common.LoggedError("CreateVolume unknown get disk error when validating: ", err)
280+
return nil, common.LoggedError("CreateVolume, failed to getDisk when validating: ", err)
281281
}
282282
}
283283
if err == nil {
@@ -333,7 +333,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
333333
if gce.IsGCEError(err, "notFound") {
334334
return nil, status.Errorf(codes.NotFound, "CreateVolume source volume %s does not exist", volumeContentSourceVolumeID)
335335
} else {
336-
return nil, common.LoggedError("CreateVolume unknown get disk error when validating: ", err)
336+
return nil, common.LoggedError("CreateVolume, getDisk error when validating: ", err)
337337
}
338338
}
339339

@@ -393,9 +393,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
393393
disk, err = createSingleZoneDisk(ctx, gceCS.CloudProvider, name, zones, params, capacityRange, capBytes, snapshotID, volumeContentSourceVolumeID, multiWriter)
394394
if err != nil {
395395
// Emit metric for expected disk type from storage class
396-
if params.DiskType != "" {
397-
gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
398-
}
396+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
399397
return nil, common.LoggedError("CreateVolume failed to create single zonal disk "+name+": ", err)
400398
}
401399
case replicationTypeRegionalPD:
@@ -405,9 +403,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
405403
disk, err = createRegionalDisk(ctx, gceCS.CloudProvider, name, zones, params, capacityRange, capBytes, snapshotID, volumeContentSourceVolumeID, multiWriter)
406404
if err != nil {
407405
// Emit metric for expected disk type from storage class
408-
if params.DiskType != "" {
409-
gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
410-
}
406+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
411407
return nil, common.LoggedError("CreateVolume failed to create regional disk "+name+": ", err)
412408
}
413409
default:
@@ -417,9 +413,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
417413
ready, err := isDiskReady(disk)
418414
if err != nil {
419415
// Emit metric for expected disk type from storage class as the disk is not ready and might not have PD type populated
420-
if params.DiskType != "" {
421-
gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
422-
}
416+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
423417
return nil, status.Errorf(codes.Internal, "CreateVolume disk %v had error checking ready status: %v", volKey, err.Error())
424418
}
425419
if !ready {
@@ -462,7 +456,7 @@ func (gceCS *GCEControllerServer) DeleteVolume(ctx context.Context, req *csi.Del
462456

463457
err = gceCS.CloudProvider.DeleteDisk(ctx, project, volKey)
464458
if err != nil {
465-
return nil, common.LoggedError("unknown Delete disk error: ", err)
459+
return nil, common.LoggedError("Failed to delete disk: ", err)
466460
}
467461

468462
klog.V(4).Infof("DeleteVolume succeeded for disk %v", volKey)
@@ -562,10 +556,11 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
562556
defer gceCS.volumeLocks.Release(lockingVolumeID)
563557
diskToPublish, err := gceCS.CloudProvider.GetDisk(ctx, project, volKey, gce.GCEAPIVersionV1)
564558
if err != nil {
559+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, diskNotFound)
565560
if gce.IsGCENotFoundError(err) {
566561
return nil, status.Errorf(codes.NotFound, "Could not find disk %v: %v", volKey.String(), err.Error())
567562
}
568-
return nil, status.Errorf(codes.Internal, "Unknown get disk error: %v", err.Error())
563+
return nil, status.Errorf(codes.Internal, "Failed to getDisk: %v", err.Error())
569564
}
570565
instanceZone, instanceName, err := common.NodeIDToZoneAndName(nodeID)
571566
if err != nil {
@@ -576,7 +571,7 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
576571
if gce.IsGCENotFoundError(err) {
577572
return nil, status.Errorf(codes.NotFound, "Could not find instance %v: %v", nodeID, err.Error())
578573
}
579-
return nil, status.Errorf(codes.Internal, "Unknown get instance error: %v", err.Error())
574+
return nil, status.Errorf(codes.Internal, "Failed to get instance: %v", err.Error())
580575
}
581576

582577
readWrite := "READ_WRITE"
@@ -612,16 +607,17 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
612607
return nil, status.Errorf(codes.InvalidArgument, "'%s' is not a compatible disk type with the machine type %s, please review the GCP online documentation for available persistent disk options", udErr.DiskType, machineType)
613608
}
614609
// Emit metric for error
615-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, diskToPublish.GetPDType())
616-
return nil, status.Errorf(codes.Internal, "unknown Attach error: %v", err.Error())
610+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, metrics.GetDiskType(diskToPublish))
611+
return nil, status.Errorf(codes.Internal, "Failed to Attach: %v", err.Error())
617612
}
618613

619614
err = gceCS.CloudProvider.WaitForAttach(ctx, project, volKey, instanceZone, instanceName)
620615
if err != nil {
621616
// Emit metric for error
622-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, diskToPublish.GetPDType())
623-
return nil, status.Errorf(codes.Internal, "unknown WaitForAttach error: %v", err.Error())
617+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, metrics.GetDiskType(diskToPublish))
618+
return nil, status.Errorf(codes.Internal, "Errored during WaitForAttach: %v", err.Error())
624619
}
620+
625621
klog.V(4).Infof("ControllerPublishVolume succeeded for disk %v to instance %v", volKey, nodeID)
626622
return pubVolResp, nil
627623
}
@@ -721,15 +717,13 @@ func (gceCS *GCEControllerServer) executeControllerUnpublishVolume(ctx context.C
721717
}
722718
diskToUnpublish, _ := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gce.GCEAPIVersionV1)
723719
if err != nil {
724-
common.LoggedError("Unknown get disk error: ", err)
720+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerUnpublishVolume", err, diskNotFound)
721+
common.LoggedError("Failed to getDisk: ", err)
725722
}
726723
err = gceCS.CloudProvider.DetachDisk(ctx, project, deviceName, instanceZone, instanceName)
727724
if err != nil {
728-
//Do not emit metric if disk is unknown
729-
if diskToUnpublish != nil {
730-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerUnpublishVolume", err, diskToUnpublish.GetPDType())
731-
}
732-
return nil, common.LoggedError("unknown detach error: ", err)
725+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerUnpublishVolume", err, metrics.GetDiskType(diskToUnpublish))
726+
return nil, common.LoggedError("Failed to detach: ", err)
733727
}
734728

735729
klog.V(4).Infof("ControllerUnpublishVolume succeeded for disk %v from node %v", volKey, nodeID)
@@ -767,7 +761,7 @@ func (gceCS *GCEControllerServer) ValidateVolumeCapabilities(ctx context.Context
767761
if gce.IsGCENotFoundError(err) {
768762
return nil, status.Errorf(codes.NotFound, "Could not find disk %v: %v", volKey.Name, err.Error())
769763
}
770-
return nil, common.LoggedError("Unknown get disk error: ", err)
764+
return nil, common.LoggedError("Failed to getDisk: ", err)
771765
}
772766

773767
// Check Volume Context is Empty
@@ -830,7 +824,7 @@ func (gceCS *GCEControllerServer) ListVolumes(ctx context.Context, req *csi.List
830824
if gce.IsGCEInvalidError(err) {
831825
return nil, status.Errorf(codes.Aborted, "ListVolumes error with invalid request: %v", err.Error())
832826
}
833-
return nil, common.LoggedError("Unknown list disk error: ", err)
827+
return nil, common.LoggedError("Failed to list disk: ", err)
834828
}
835829
gceCS.disks = diskList
836830
gceCS.seen = map[string]int{}
@@ -913,7 +907,7 @@ func (gceCS *GCEControllerServer) CreateSnapshot(ctx context.Context, req *csi.C
913907
if gce.IsGCENotFoundError(err) {
914908
return nil, status.Errorf(codes.NotFound, "CreateSnapshot could not find disk %v: %v", volKey.String(), err.Error())
915909
}
916-
return nil, common.LoggedError("CreateSnapshot unknown get disk error: ", err)
910+
return nil, common.LoggedError("CreateSnapshot, failed to getDisk: ", err)
917911
}
918912

919913
snapshotParams, err := common.ExtractAndDefaultSnapshotParameters(req.GetParameters(), gceCS.Driver.name)
@@ -948,35 +942,30 @@ func (gceCS *GCEControllerServer) createPDSnapshot(ctx context.Context, project
948942
}
949943
sourceDisk, err := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gce.GCEAPIVersionV1)
950944
if err != nil {
951-
common.LoggedError("Unknown get disk error: ", err)
945+
common.LoggedError("Failed to getDisk: ", err)
952946
}
953947
// Check if PD snapshot already exists
954948
var snapshot *compute.Snapshot
955949
snapshot, err = gceCS.CloudProvider.GetSnapshot(ctx, project, snapshotName)
956950
if err != nil {
957951
if !gce.IsGCEError(err, "notFound") {
958-
return nil, status.Errorf(codes.Internal, "Unknown get snapshot error: %v", err.Error())
952+
return nil, status.Errorf(codes.Internal, "Failed to get snapshot: %v", err.Error())
959953
}
960954
// If we could not find the snapshot, we create a new one
961955
snapshot, err = gceCS.CloudProvider.CreateSnapshot(ctx, project, volKey, snapshotName, snapshotParams)
962956
if err != nil {
963957
if gce.IsGCEError(err, "notFound") {
958+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, diskNotFound)
964959
return nil, status.Errorf(codes.NotFound, "Could not find volume with ID %v: %v", volKey.String(), err.Error())
965960
}
966-
//Do not emit metric if disk is unknown
967-
if sourceDisk != nil {
968-
gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, sourceDisk.GetPDType())
969-
}
970-
return nil, common.LoggedError("Unknown create snapshot error: ", err)
961+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, metrics.GetDiskType(sourceDisk))
962+
return nil, common.LoggedError("Failed to create snapshot: ", err)
971963
}
972964
}
973965

974966
err = gceCS.validateExistingSnapshot(snapshot, volKey)
975967
if err != nil {
976-
//Do not emit metric if disk is unknown
977-
if sourceDisk != nil {
978-
gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, sourceDisk.GetPDType())
979-
}
968+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, metrics.GetDiskType(sourceDisk))
980969
return nil, status.Errorf(codes.AlreadyExists, "Error in creating snapshot: %v", err.Error())
981970
}
982971

@@ -1010,15 +999,15 @@ func (gceCS *GCEControllerServer) createImage(ctx context.Context, project strin
1010999
image, err = gceCS.CloudProvider.GetImage(ctx, project, imageName)
10111000
if err != nil {
10121001
if !gce.IsGCEError(err, "notFound") {
1013-
return nil, common.LoggedError("Unknown get image error: ", err)
1002+
return nil, common.LoggedError("Failed to get image: ", err)
10141003
}
10151004
// create a new image
10161005
image, err = gceCS.CloudProvider.CreateImage(ctx, project, volKey, imageName, snapshotParams)
10171006
if err != nil {
10181007
if gce.IsGCEError(err, "notFound") {
10191008
return nil, status.Errorf(codes.NotFound, "Could not find volume with ID %v: %v", volKey.String(), err.Error())
10201009
}
1021-
return nil, common.LoggedError("Unknown create image error: ", err)
1010+
return nil, common.LoggedError("Failed to create image: ", err)
10221011
}
10231012
}
10241013

@@ -1148,12 +1137,12 @@ func (gceCS *GCEControllerServer) DeleteSnapshot(ctx context.Context, req *csi.D
11481137
case common.DiskSnapshotType:
11491138
err = gceCS.CloudProvider.DeleteSnapshot(ctx, project, key)
11501139
if err != nil {
1151-
return nil, common.LoggedError("unknown Delete snapshot error: ", err)
1140+
return nil, common.LoggedError("Failed to DeleteSnapshot: ", err)
11521141
}
11531142
case common.DiskImageType:
11541143
err = gceCS.CloudProvider.DeleteImage(ctx, project, key)
11551144
if err != nil {
1156-
return nil, common.LoggedError("unknown Delete image error: ", err)
1145+
return nil, common.LoggedError("Failed to DeleteImage error: ", err)
11571146
}
11581147
default:
11591148
return nil, status.Errorf(codes.InvalidArgument, "unknown snapshot type %s", snapshotType)
@@ -1185,7 +1174,7 @@ func (gceCS *GCEControllerServer) ListSnapshots(ctx context.Context, req *csi.Li
11851174
if gce.IsGCEInvalidError(err) {
11861175
return nil, status.Errorf(codes.Aborted, "ListSnapshots error with invalid request: %v", err.Error())
11871176
}
1188-
return nil, common.LoggedError("Unknown list snapshots error: ", err)
1177+
return nil, common.LoggedError("Failed to list snapshots: ", err)
11891178
}
11901179
gceCS.snapshots = snapshotList
11911180
gceCS.snapshotTokens = map[string]int{}
@@ -1231,21 +1220,19 @@ func (gceCS *GCEControllerServer) ControllerExpandVolume(ctx context.Context, re
12311220

12321221
project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey)
12331222
if err != nil {
1223+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerExpandVolume", err, diskNotFound)
12341224
if gce.IsGCENotFoundError(err) {
12351225
return nil, status.Errorf(codes.NotFound, "ControllerExpandVolume could not find volume with ID %v: %v", volumeID, err.Error())
12361226
}
12371227
return nil, common.LoggedError("ControllerExpandVolume error repairing underspecified volume key: ", err)
12381228
}
12391229
sourceDisk, err := gceCS.CloudProvider.GetDisk(ctx, project, volKey, gce.GCEAPIVersionV1)
12401230
if err != nil {
1241-
common.LoggedError("Unknown get disk error: ", err)
1231+
common.LoggedError("Failed to getDisk: ", err)
12421232
}
12431233
resizedGb, err := gceCS.CloudProvider.ResizeDisk(ctx, project, volKey, reqBytes)
12441234
if err != nil {
1245-
//Do not emit metric if disk is unknown
1246-
if sourceDisk != nil {
1247-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerExpandVolume", err, sourceDisk.GetPDType())
1248-
}
1235+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerExpandVolume", err, metrics.GetDiskType(sourceDisk))
12491236
return nil, common.LoggedError("ControllerExpandVolume failed to resize disk: ", err)
12501237
}
12511238

@@ -1269,15 +1256,15 @@ func (gceCS *GCEControllerServer) getSnapshots(ctx context.Context, req *csi.Lis
12691256
if gce.IsGCEError(err, "invalid") {
12701257
return nil, status.Errorf(codes.Aborted, "Invalid error: %v", err.Error())
12711258
}
1272-
return nil, common.LoggedError("Unknown list snapshot error: ", err)
1259+
return nil, common.LoggedError("Failed to list snapshot: ", err)
12731260
}
12741261

12751262
images, _, err = gceCS.CloudProvider.ListImages(ctx, filter)
12761263
if err != nil {
12771264
if gce.IsGCEError(err, "invalid") {
12781265
return nil, status.Errorf(codes.Aborted, "Invalid error: %v", err.Error())
12791266
}
1280-
return nil, common.LoggedError("Unknown list image error: ", err)
1267+
return nil, common.LoggedError("Failed to list image: ", err)
12811268
}
12821269

12831270
entries := []*csi.ListSnapshotsResponse_Entry{}
@@ -1318,7 +1305,7 @@ func (gceCS *GCEControllerServer) getSnapshotByID(ctx context.Context, snapshotI
13181305
// return empty list if no snapshot is found
13191306
return &csi.ListSnapshotsResponse{}, nil
13201307
}
1321-
return nil, common.LoggedError("Unknown list snapshot error: ", err)
1308+
return nil, common.LoggedError("Failed to list snapshot: ", err)
13221309
}
13231310
e, err := generateDiskSnapshotEntry(snapshot)
13241311
if err != nil {

pkg/metrics/metrics.go

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,18 @@ import (
2020
"fmt"
2121
"net/http"
2222
"os"
23-
"strings"
2423

2524
"k8s.io/component-base/metrics"
2625
"k8s.io/klog/v2"
2726
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
27+
gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute"
2828
)
2929

3030
const (
3131
// envGKEPDCSIVersion is an environment variable set in the PDCSI controller manifest
3232
// with the current version of the GKE component.
33-
envGKEPDCSIVersion = "GKE_PDCSI_VERSION"
34-
hyperdiskDriverName = "hyperdisk.csi.storage.gke.io"
35-
pdcsiDriverName = "pd.csi.storage.gke.io"
33+
envGKEPDCSIVersion = "GKE_PDCSI_VERSION"
34+
pdcsiDriverName = "pd.csi.storage.gke.io"
3635
)
3736

3837
var (
@@ -42,15 +41,14 @@ var (
4241
Help: "Metric to expose the version of the PDCSI GKE component.",
4342
}, []string{"component_version"})
4443

45-
pdcsiOperationErrorsMetric = metrics.NewGaugeVec(
46-
&metrics.GaugeOpts{
44+
pdcsiOperationErrorsMetric = metrics.NewCounterVec(
45+
&metrics.CounterOpts{
4746
Subsystem: "csidriver",
48-
Name: "pdcsi_operation_errors",
47+
Name: "operation_errors",
4948
Help: "CSI server side error metrics",
5049
StabilityLevel: metrics.ALPHA,
5150
},
52-
[]string{"driver_name", "method_name", "grpc_status_code", "disk_type"},
53-
)
51+
[]string{"driver_name", "method_name", "grpc_status_code", "disk_type"})
5452
)
5553

5654
type MetricsManager struct {
@@ -72,7 +70,7 @@ func (mm *MetricsManager) registerComponentVersionMetric() {
7270
mm.registry.MustRegister(gkeComponentVersion)
7371
}
7472

75-
func (mm *MetricsManager) RegisterHyperdiskMetric() {
73+
func (mm *MetricsManager) RegisterPDCSIMetric() {
7674
mm.registry.MustRegister(pdcsiOperationErrorsMetric)
7775
}
7876

@@ -92,14 +90,7 @@ func (mm *MetricsManager) RecordOperationErrorMetrics(
9290
operationName string,
9391
operationErr error,
9492
diskType string) {
95-
var driverName string
96-
if strings.Contains(diskType, "hyperdisk") {
97-
driverName = hyperdiskDriverName
98-
}
99-
if strings.Contains(diskType, "pd") {
100-
driverName = pdcsiDriverName
101-
}
102-
pdcsiOperationErrorsMetric.WithLabelValues(driverName, "/csi.v1.Controller/"+operationName, common.CodeForError(operationErr).String(), diskType).Set(1.0)
93+
pdcsiOperationErrorsMetric.WithLabelValues(pdcsiDriverName, "/csi.v1.Controller/"+operationName, common.CodeForError(operationErr).String(), diskType).Inc()
10394
}
10495

10596
func (mm *MetricsManager) EmitGKEComponentVersion() error {
@@ -155,3 +146,13 @@ func IsGKEComponentVersionAvailable() bool {
155146

156147
return true
157148
}
149+
150+
func GetDiskType(disk *gce.CloudDisk) string {
151+
var diskType string
152+
if disk != nil {
153+
diskType = disk.GetPDType()
154+
} else {
155+
diskType = ""
156+
}
157+
return diskType
158+
}

0 commit comments

Comments
 (0)