Skip to content

Commit b51143f

Browse files
Sneha-atsunnylovestiramisu
authored andcommitted
removed hyperdisk driver override, updated error messages and other pr suggestions
1 parent 01052ac commit b51143f

File tree

3 files changed

+59
-71
lines changed

3 files changed

+59
-71
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ func handle() {
9999
if *runControllerService && *httpEndpoint != "" {
100100
mm := metrics.NewMetricsManager()
101101
mm.InitializeHttpHandler(*httpEndpoint, *metricsPath)
102-
mm.RegisterHyperdiskMetric()
102+
mm.RegisterPDCSIMetric()
103103

104104
if metrics.IsGKEComponentVersionAvailable() {
105105
mm.EmitGKEComponentVersion()

pkg/gce-pd-csi-driver/controller.go

Lines changed: 39 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ const (
113113

114114
replicationTypeNone = "none"
115115
replicationTypeRegionalPD = "regional-pd"
116-
116+
diskNotFound = ""
117117
// The maximum number of entries that we can include in the
118118
// ListVolumesResposne
119119
// In reality, the limit here is 4MB (based on gRPC client response limits),
@@ -221,7 +221,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
221221
existingDisk, err := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gceAPIVersion)
222222
if err != nil {
223223
if !gce.IsGCEError(err, "notFound") {
224-
return nil, common.LoggedError("CreateVolume unknown get disk error when validating: ", err)
224+
return nil, common.LoggedError("CreateVolume, failed to getDisk when validating: ", err)
225225
}
226226
}
227227
if err == nil {
@@ -277,7 +277,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
277277
if gce.IsGCEError(err, "notFound") {
278278
return nil, status.Errorf(codes.NotFound, "CreateVolume source volume %s does not exist", volumeContentSourceVolumeID)
279279
} else {
280-
return nil, common.LoggedError("CreateVolume unknown get disk error when validating: ", err)
280+
return nil, common.LoggedError("CreateVolume, getDisk error when validating: ", err)
281281
}
282282
}
283283

@@ -337,9 +337,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
337337
disk, err = createSingleZoneDisk(ctx, gceCS.CloudProvider, name, zones, params, capacityRange, capBytes, snapshotID, volumeContentSourceVolumeID, multiWriter)
338338
if err != nil {
339339
// Emit metric for expected disk type from storage class
340-
if params.DiskType != "" {
341-
gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
342-
}
340+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
343341
return nil, common.LoggedError("CreateVolume failed to create single zonal disk "+name+": ", err)
344342
}
345343
case replicationTypeRegionalPD:
@@ -349,9 +347,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
349347
disk, err = createRegionalDisk(ctx, gceCS.CloudProvider, name, zones, params, capacityRange, capBytes, snapshotID, volumeContentSourceVolumeID, multiWriter)
350348
if err != nil {
351349
// Emit metric for expected disk type from storage class
352-
if params.DiskType != "" {
353-
gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
354-
}
350+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
355351
return nil, common.LoggedError("CreateVolume failed to create regional disk "+name+": ", err)
356352
}
357353
default:
@@ -361,9 +357,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
361357
ready, err := isDiskReady(disk)
362358
if err != nil {
363359
// Emit metric for expected disk type from storage class as the disk is not ready and might not have PD type populated
364-
if params.DiskType != "" {
365-
gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
366-
}
360+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateVolume", err, params.DiskType)
367361
return nil, status.Errorf(codes.Internal, "CreateVolume disk %v had error checking ready status: %v", volKey, err.Error())
368362
}
369363
if !ready {
@@ -406,7 +400,7 @@ func (gceCS *GCEControllerServer) DeleteVolume(ctx context.Context, req *csi.Del
406400

407401
err = gceCS.CloudProvider.DeleteDisk(ctx, project, volKey)
408402
if err != nil {
409-
return nil, common.LoggedError("unknown Delete disk error: ", err)
403+
return nil, common.LoggedError("Failed to delete disk: ", err)
410404
}
411405

412406
klog.V(4).Infof("DeleteVolume succeeded for disk %v", volKey)
@@ -506,10 +500,11 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
506500
defer gceCS.volumeLocks.Release(lockingVolumeID)
507501
diskToPublish, err := gceCS.CloudProvider.GetDisk(ctx, project, volKey, gce.GCEAPIVersionV1)
508502
if err != nil {
503+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, diskNotFound)
509504
if gce.IsGCENotFoundError(err) {
510505
return nil, status.Errorf(codes.NotFound, "Could not find disk %v: %v", volKey.String(), err.Error())
511506
}
512-
return nil, status.Errorf(codes.Internal, "Unknown get disk error: %v", err.Error())
507+
return nil, status.Errorf(codes.Internal, "Failed to getDisk: %v", err.Error())
513508
}
514509
instanceZone, instanceName, err := common.NodeIDToZoneAndName(nodeID)
515510
if err != nil {
@@ -520,7 +515,7 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
520515
if gce.IsGCENotFoundError(err) {
521516
return nil, status.Errorf(codes.NotFound, "Could not find instance %v: %v", nodeID, err.Error())
522517
}
523-
return nil, status.Errorf(codes.Internal, "Unknown get instance error: %v", err.Error())
518+
return nil, status.Errorf(codes.Internal, "Failed to get instance: %v", err.Error())
524519
}
525520

526521
readWrite := "READ_WRITE"
@@ -556,16 +551,17 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
556551
return nil, status.Errorf(codes.InvalidArgument, "'%s' is not a compatible disk type with the machine type %s, please review the GCP online documentation for available persistent disk options", udErr.DiskType, machineType)
557552
}
558553
// Emit metric for error
559-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, diskToPublish.GetPDType())
560-
return nil, status.Errorf(codes.Internal, "unknown Attach error: %v", err.Error())
554+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, metrics.GetDiskType(diskToPublish))
555+
return nil, status.Errorf(codes.Internal, "Failed to Attach: %v", err.Error())
561556
}
562557

563558
err = gceCS.CloudProvider.WaitForAttach(ctx, project, volKey, instanceZone, instanceName)
564559
if err != nil {
565560
// Emit metric for error
566-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, diskToPublish.GetPDType())
567-
return nil, status.Errorf(codes.Internal, "unknown WaitForAttach error: %v", err.Error())
561+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerPublishVolume", err, metrics.GetDiskType(diskToPublish))
562+
return nil, status.Errorf(codes.Internal, "Errored during WaitForAttach: %v", err.Error())
568563
}
564+
569565
klog.V(4).Infof("ControllerPublishVolume succeeded for disk %v to instance %v", volKey, nodeID)
570566
return pubVolResp, nil
571567
}
@@ -666,15 +662,13 @@ func (gceCS *GCEControllerServer) executeControllerUnpublishVolume(ctx context.C
666662
}
667663
diskToUnpublish, _ := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gce.GCEAPIVersionV1)
668664
if err != nil {
669-
common.LoggedError("Unknown get disk error: ", err)
665+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerUnpublishVolume", err, diskNotFound)
666+
common.LoggedError("Failed to getDisk: ", err)
670667
}
671668
err = gceCS.CloudProvider.DetachDisk(ctx, project, deviceName, instanceZone, instanceName)
672669
if err != nil {
673-
//Do not emit metric if disk is unknown
674-
if diskToUnpublish != nil {
675-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerUnpublishVolume", err, diskToUnpublish.GetPDType())
676-
}
677-
return nil, common.LoggedError("unknown detach error: ", err)
670+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerUnpublishVolume", err, metrics.GetDiskType(diskToUnpublish))
671+
return nil, common.LoggedError("Failed to detach: ", err)
678672
}
679673

680674
klog.V(4).Infof("ControllerUnpublishVolume succeeded for disk %v from node %v", volKey, nodeID)
@@ -712,7 +706,7 @@ func (gceCS *GCEControllerServer) ValidateVolumeCapabilities(ctx context.Context
712706
if gce.IsGCENotFoundError(err) {
713707
return nil, status.Errorf(codes.NotFound, "Could not find disk %v: %v", volKey.Name, err.Error())
714708
}
715-
return nil, common.LoggedError("Unknown get disk error: ", err)
709+
return nil, common.LoggedError("Failed to getDisk: ", err)
716710
}
717711

718712
// Check Volume Context is Empty
@@ -775,7 +769,7 @@ func (gceCS *GCEControllerServer) ListVolumes(ctx context.Context, req *csi.List
775769
if gce.IsGCEInvalidError(err) {
776770
return nil, status.Errorf(codes.Aborted, "ListVolumes error with invalid request: %v", err.Error())
777771
}
778-
return nil, common.LoggedError("Unknown list disk error: ", err)
772+
return nil, common.LoggedError("Failed to list disk: ", err)
779773
}
780774
gceCS.disks = diskList
781775
gceCS.seen = map[string]int{}
@@ -858,7 +852,7 @@ func (gceCS *GCEControllerServer) CreateSnapshot(ctx context.Context, req *csi.C
858852
if gce.IsGCENotFoundError(err) {
859853
return nil, status.Errorf(codes.NotFound, "CreateSnapshot could not find disk %v: %v", volKey.String(), err.Error())
860854
}
861-
return nil, common.LoggedError("CreateSnapshot unknown get disk error: ", err)
855+
return nil, common.LoggedError("CreateSnapshot, failed to getDisk: ", err)
862856
}
863857

864858
snapshotParams, err := common.ExtractAndDefaultSnapshotParameters(req.GetParameters(), gceCS.Driver.name)
@@ -893,35 +887,30 @@ func (gceCS *GCEControllerServer) createPDSnapshot(ctx context.Context, project
893887
}
894888
sourceDisk, err := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gce.GCEAPIVersionV1)
895889
if err != nil {
896-
common.LoggedError("Unknown get disk error: ", err)
890+
common.LoggedError("Failed to getDisk: ", err)
897891
}
898892
// Check if PD snapshot already exists
899893
var snapshot *compute.Snapshot
900894
snapshot, err = gceCS.CloudProvider.GetSnapshot(ctx, project, snapshotName)
901895
if err != nil {
902896
if !gce.IsGCEError(err, "notFound") {
903-
return nil, status.Errorf(codes.Internal, "Unknown get snapshot error: %v", err.Error())
897+
return nil, status.Errorf(codes.Internal, "Failed to get snapshot: %v", err.Error())
904898
}
905899
// If we could not find the snapshot, we create a new one
906900
snapshot, err = gceCS.CloudProvider.CreateSnapshot(ctx, project, volKey, snapshotName, snapshotParams)
907901
if err != nil {
908902
if gce.IsGCEError(err, "notFound") {
903+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, diskNotFound)
909904
return nil, status.Errorf(codes.NotFound, "Could not find volume with ID %v: %v", volKey.String(), err.Error())
910905
}
911-
//Do not emit metric if disk is unknown
912-
if sourceDisk != nil {
913-
gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, sourceDisk.GetPDType())
914-
}
915-
return nil, common.LoggedError("Unknown create snapshot error: ", err)
906+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, metrics.GetDiskType(sourceDisk))
907+
return nil, common.LoggedError("Failed to create snapshot: ", err)
916908
}
917909
}
918910

919911
err = gceCS.validateExistingSnapshot(snapshot, volKey)
920912
if err != nil {
921-
//Do not emit metric if disk is unknown
922-
if sourceDisk != nil {
923-
gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, sourceDisk.GetPDType())
924-
}
913+
defer gceCS.Metrics.RecordOperationErrorMetrics("CreateSnapshot", err, metrics.GetDiskType(sourceDisk))
925914
return nil, status.Errorf(codes.AlreadyExists, "Error in creating snapshot: %v", err.Error())
926915
}
927916

@@ -955,15 +944,15 @@ func (gceCS *GCEControllerServer) createImage(ctx context.Context, project strin
955944
image, err = gceCS.CloudProvider.GetImage(ctx, project, imageName)
956945
if err != nil {
957946
if !gce.IsGCEError(err, "notFound") {
958-
return nil, common.LoggedError("Unknown get image error: ", err)
947+
return nil, common.LoggedError("Failed to get image: ", err)
959948
}
960949
// create a new image
961950
image, err = gceCS.CloudProvider.CreateImage(ctx, project, volKey, imageName, snapshotParams)
962951
if err != nil {
963952
if gce.IsGCEError(err, "notFound") {
964953
return nil, status.Errorf(codes.NotFound, "Could not find volume with ID %v: %v", volKey.String(), err.Error())
965954
}
966-
return nil, common.LoggedError("Unknown create image error: ", err)
955+
return nil, common.LoggedError("Failed to create image: ", err)
967956
}
968957
}
969958

@@ -1093,12 +1082,12 @@ func (gceCS *GCEControllerServer) DeleteSnapshot(ctx context.Context, req *csi.D
10931082
case common.DiskSnapshotType:
10941083
err = gceCS.CloudProvider.DeleteSnapshot(ctx, project, key)
10951084
if err != nil {
1096-
return nil, common.LoggedError("unknown Delete snapshot error: ", err)
1085+
return nil, common.LoggedError("Failed to DeleteSnapshot: ", err)
10971086
}
10981087
case common.DiskImageType:
10991088
err = gceCS.CloudProvider.DeleteImage(ctx, project, key)
11001089
if err != nil {
1101-
return nil, common.LoggedError("unknown Delete image error: ", err)
1090+
return nil, common.LoggedError("Failed to DeleteImage error: ", err)
11021091
}
11031092
default:
11041093
return nil, status.Errorf(codes.InvalidArgument, "unknown snapshot type %s", snapshotType)
@@ -1130,7 +1119,7 @@ func (gceCS *GCEControllerServer) ListSnapshots(ctx context.Context, req *csi.Li
11301119
if gce.IsGCEInvalidError(err) {
11311120
return nil, status.Errorf(codes.Aborted, "ListSnapshots error with invalid request: %v", err.Error())
11321121
}
1133-
return nil, common.LoggedError("Unknown list snapshots error: ", err)
1122+
return nil, common.LoggedError("Failed to list snapshots: ", err)
11341123
}
11351124
gceCS.snapshots = snapshotList
11361125
gceCS.snapshotTokens = map[string]int{}
@@ -1176,21 +1165,19 @@ func (gceCS *GCEControllerServer) ControllerExpandVolume(ctx context.Context, re
11761165

11771166
project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey)
11781167
if err != nil {
1168+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerExpandVolume", err, diskNotFound)
11791169
if gce.IsGCENotFoundError(err) {
11801170
return nil, status.Errorf(codes.NotFound, "ControllerExpandVolume could not find volume with ID %v: %v", volumeID, err.Error())
11811171
}
11821172
return nil, common.LoggedError("ControllerExpandVolume error repairing underspecified volume key: ", err)
11831173
}
11841174
sourceDisk, err := gceCS.CloudProvider.GetDisk(ctx, project, volKey, gce.GCEAPIVersionV1)
11851175
if err != nil {
1186-
common.LoggedError("Unknown get disk error: ", err)
1176+
common.LoggedError("Failed to getDisk: ", err)
11871177
}
11881178
resizedGb, err := gceCS.CloudProvider.ResizeDisk(ctx, project, volKey, reqBytes)
11891179
if err != nil {
1190-
//Do not emit metric if disk is unknown
1191-
if sourceDisk != nil {
1192-
gceCS.Metrics.RecordOperationErrorMetrics("ControllerExpandVolume", err, sourceDisk.GetPDType())
1193-
}
1180+
defer gceCS.Metrics.RecordOperationErrorMetrics("ControllerExpandVolume", err, metrics.GetDiskType(sourceDisk))
11941181
return nil, common.LoggedError("ControllerExpandVolume failed to resize disk: ", err)
11951182
}
11961183

@@ -1214,15 +1201,15 @@ func (gceCS *GCEControllerServer) getSnapshots(ctx context.Context, req *csi.Lis
12141201
if gce.IsGCEError(err, "invalid") {
12151202
return nil, status.Errorf(codes.Aborted, "Invalid error: %v", err.Error())
12161203
}
1217-
return nil, common.LoggedError("Unknown list snapshot error: ", err)
1204+
return nil, common.LoggedError("Failed to list snapshot: ", err)
12181205
}
12191206

12201207
images, _, err = gceCS.CloudProvider.ListImages(ctx, filter)
12211208
if err != nil {
12221209
if gce.IsGCEError(err, "invalid") {
12231210
return nil, status.Errorf(codes.Aborted, "Invalid error: %v", err.Error())
12241211
}
1225-
return nil, common.LoggedError("Unknown list image error: ", err)
1212+
return nil, common.LoggedError("Failed to list image: ", err)
12261213
}
12271214

12281215
entries := []*csi.ListSnapshotsResponse_Entry{}
@@ -1263,7 +1250,7 @@ func (gceCS *GCEControllerServer) getSnapshotByID(ctx context.Context, snapshotI
12631250
// return empty list if no snapshot is found
12641251
return &csi.ListSnapshotsResponse{}, nil
12651252
}
1266-
return nil, common.LoggedError("Unknown list snapshot error: ", err)
1253+
return nil, common.LoggedError("Failed to list snapshot: ", err)
12671254
}
12681255
e, err := generateDiskSnapshotEntry(snapshot)
12691256
if err != nil {

pkg/metrics/metrics.go

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,18 @@ import (
2020
"fmt"
2121
"net/http"
2222
"os"
23-
"strings"
2423

2524
"k8s.io/component-base/metrics"
2625
"k8s.io/klog/v2"
2726
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
27+
gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute"
2828
)
2929

3030
const (
3131
// envGKEPDCSIVersion is an environment variable set in the PDCSI controller manifest
3232
// with the current version of the GKE component.
33-
envGKEPDCSIVersion = "GKE_PDCSI_VERSION"
34-
hyperdiskDriverName = "hyperdisk.csi.storage.gke.io"
35-
pdcsiDriverName = "pd.csi.storage.gke.io"
33+
envGKEPDCSIVersion = "GKE_PDCSI_VERSION"
34+
pdcsiDriverName = "pd.csi.storage.gke.io"
3635
)
3736

3837
var (
@@ -42,15 +41,14 @@ var (
4241
Help: "Metric to expose the version of the PDCSI GKE component.",
4342
}, []string{"component_version"})
4443

45-
pdcsiOperationErrorsMetric = metrics.NewGaugeVec(
46-
&metrics.GaugeOpts{
44+
pdcsiOperationErrorsMetric = metrics.NewCounterVec(
45+
&metrics.CounterOpts{
4746
Subsystem: "csidriver",
48-
Name: "pdcsi_operation_errors",
47+
Name: "operation_errors",
4948
Help: "CSI server side error metrics",
5049
StabilityLevel: metrics.ALPHA,
5150
},
52-
[]string{"driver_name", "method_name", "grpc_status_code", "disk_type"},
53-
)
51+
[]string{"driver_name", "method_name", "grpc_status_code", "disk_type"})
5452
)
5553

5654
type MetricsManager struct {
@@ -72,7 +70,7 @@ func (mm *MetricsManager) registerComponentVersionMetric() {
7270
mm.registry.MustRegister(gkeComponentVersion)
7371
}
7472

75-
func (mm *MetricsManager) RegisterHyperdiskMetric() {
73+
func (mm *MetricsManager) RegisterPDCSIMetric() {
7674
mm.registry.MustRegister(pdcsiOperationErrorsMetric)
7775
}
7876

@@ -92,14 +90,7 @@ func (mm *MetricsManager) RecordOperationErrorMetrics(
9290
operationName string,
9391
operationErr error,
9492
diskType string) {
95-
var driverName string
96-
if strings.Contains(diskType, "hyperdisk") {
97-
driverName = hyperdiskDriverName
98-
}
99-
if strings.Contains(diskType, "pd") {
100-
driverName = pdcsiDriverName
101-
}
102-
pdcsiOperationErrorsMetric.WithLabelValues(driverName, "/csi.v1.Controller/"+operationName, common.CodeForError(operationErr).String(), diskType).Set(1.0)
93+
pdcsiOperationErrorsMetric.WithLabelValues(pdcsiDriverName, "/csi.v1.Controller/"+operationName, common.CodeForError(operationErr).String(), diskType).Inc()
10394
}
10495

10596
func (mm *MetricsManager) EmitGKEComponentVersion() error {
@@ -155,3 +146,13 @@ func IsGKEComponentVersionAvailable() bool {
155146

156147
return true
157148
}
149+
150+
func GetDiskType(disk *gce.CloudDisk) string {
151+
var diskType string
152+
if disk != nil {
153+
diskType = disk.GetPDType()
154+
} else {
155+
diskType = ""
156+
}
157+
return diskType
158+
}

0 commit comments

Comments
 (0)