Skip to content

Commit f19d757

Browse files
Sneha-atsunnylovestiramisu
authored andcommitted
Adding new metric pdcsi_operation_errors to fetch error count
1 parent 9870f33 commit f19d757

File tree

7 files changed

+389
-189
lines changed

7 files changed

+389
-189
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,14 @@ func handle() {
9494
}
9595
klog.V(2).Infof("Driver vendor version %v", version)
9696

97-
if *runControllerService && *httpEndpoint != "" && metrics.IsGKEComponentVersionAvailable() {
97+
if *runControllerService && *httpEndpoint != "" {
9898
mm := metrics.NewMetricsManager()
9999
mm.InitializeHttpHandler(*httpEndpoint, *metricsPath)
100-
mm.EmitGKEComponentVersion()
100+
mm.RegisterHyperdiskMetric()
101+
102+
if metrics.IsGKEComponentVersionAvailable() {
103+
mm.EmitGKEComponentVersion()
104+
}
101105
}
102106

103107
if len(*extraVolumeLabelsStr) > 0 && !*runControllerService {

pkg/common/utils.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,21 @@ limitations under the License.
1717
package common
1818

1919
import (
20+
"context"
21+
"errors"
2022
"fmt"
23+
"net/http"
2124
"regexp"
2225
"strings"
2326

2427
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
28+
"google.golang.org/api/googleapi"
29+
"google.golang.org/grpc/codes"
30+
"google.golang.org/grpc/status"
2531
"k8s.io/apimachinery/pkg/api/resource"
2632
"k8s.io/apimachinery/pkg/util/sets"
2733
volumehelpers "k8s.io/cloud-provider/volume/helpers"
34+
"k8s.io/klog/v2"
2835
)
2936

3037
const (
@@ -288,3 +295,95 @@ func ConvertMiStringToInt64(str string) (int64, error) {
288295
}
289296
return volumehelpers.RoundUpToMiB(quantity)
290297
}
298+
299+
// ParseMachineType returns an extracted machineType from a URL, or empty if not found.
300+
// machineTypeUrl: Full or partial URL of the machine type resource, in the format:
301+
//
302+
// zones/zone/machineTypes/machine-type
303+
func ParseMachineType(machineTypeUrl string) (string, error) {
304+
machineType := machineTypeRegex.FindStringSubmatch(machineTypeUrl)
305+
if machineType == nil {
306+
return "", fmt.Errorf("failed to parse machineTypeUrl. Expected suffix: zones/{zone}/machineTypes/{machine-type}. Got: %s", machineTypeUrl)
307+
}
308+
return machineType[1], nil
309+
}
310+
311+
// CodeForError returns a pointer to the grpc error code that maps to the http
312+
// error code for the passed in user googleapi error or context error. Returns
313+
// codes.Internal if the given error is not a googleapi error caused by the user.
314+
// The following http error codes are considered user errors:
315+
// (1) http 400 Bad Request, returns grpc InvalidArgument,
316+
// (2) http 403 Forbidden, returns grpc PermissionDenied,
317+
// (3) http 404 Not Found, returns grpc NotFound
318+
// (4) http 429 Too Many Requests, returns grpc ResourceExhausted
319+
// The following errors are considered context errors:
320+
// (1) "context deadline exceeded", returns grpc DeadlineExceeded,
321+
// (2) "context canceled", returns grpc Canceled
322+
func CodeForError(err error) *codes.Code {
323+
if err == nil {
324+
return nil
325+
}
326+
327+
if errCode := existingErrorCode(err); errCode != nil {
328+
return errCode
329+
}
330+
if code := isContextError(err); code != nil {
331+
return code
332+
}
333+
334+
internalErrorCode := codes.Internal
335+
// Upwrap the error
336+
var apiErr *googleapi.Error
337+
if !errors.As(err, &apiErr) {
338+
return &internalErrorCode
339+
}
340+
341+
userErrors := map[int]codes.Code{
342+
http.StatusForbidden: codes.PermissionDenied,
343+
http.StatusBadRequest: codes.InvalidArgument,
344+
http.StatusTooManyRequests: codes.ResourceExhausted,
345+
http.StatusNotFound: codes.NotFound,
346+
}
347+
if code, ok := userErrors[apiErr.Code]; ok {
348+
return &code
349+
}
350+
351+
return &internalErrorCode
352+
}
353+
354+
// isContextError returns a pointer to the grpc error code DeadlineExceeded
355+
// if the passed in error contains the "context deadline exceeded" string and returns
356+
// the grpc error code Canceled if the error contains the "context canceled" string.
357+
func isContextError(err error) *codes.Code {
358+
if err == nil {
359+
return nil
360+
}
361+
362+
errStr := err.Error()
363+
if strings.Contains(errStr, context.DeadlineExceeded.Error()) {
364+
return errCodePtr(codes.DeadlineExceeded)
365+
}
366+
if strings.Contains(errStr, context.Canceled.Error()) {
367+
return errCodePtr(codes.Canceled)
368+
}
369+
return nil
370+
}
371+
372+
func existingErrorCode(err error) *codes.Code {
373+
if err == nil {
374+
return nil
375+
}
376+
if status, ok := status.FromError(err); ok {
377+
return errCodePtr(status.Code())
378+
}
379+
return nil
380+
}
381+
382+
func errCodePtr(code codes.Code) *codes.Code {
383+
return &code
384+
}
385+
386+
func LoggedError(msg string, err error) error {
387+
klog.Errorf(msg+"%v", err.Error())
388+
return status.Errorf(*CodeForError(err), msg+"%v", err.Error())
389+
}

pkg/common/utils_test.go

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,17 @@ limitations under the License.
1717
package common
1818

1919
import (
20+
"context"
21+
"errors"
2022
"fmt"
23+
"net/http"
2124
"reflect"
2225
"testing"
2326

2427
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
28+
"google.golang.org/api/googleapi"
29+
"google.golang.org/grpc/codes"
30+
"google.golang.org/grpc/status"
2531
)
2632

2733
const (
@@ -853,3 +859,163 @@ func TestConvertMiStringToInt64(t *testing.T) {
853859
})
854860
}
855861
}
862+
863+
func TestParseMachineType(t *testing.T) {
864+
tests := []struct {
865+
desc string
866+
inputMachineTypeUrl string
867+
expectedMachineType string
868+
expectError bool
869+
}{
870+
{
871+
desc: "full URL machine type",
872+
inputMachineTypeUrl: "https://www.googleapis.com/compute/v1/projects/my-project/zones/us-central1-c/machineTypes/c3-highcpu-4",
873+
expectedMachineType: "c3-highcpu-4",
874+
},
875+
{
876+
desc: "partial URL machine type",
877+
inputMachineTypeUrl: "zones/us-central1-c/machineTypes/n2-standard-4",
878+
expectedMachineType: "n2-standard-4",
879+
},
880+
{
881+
desc: "custom partial URL machine type",
882+
inputMachineTypeUrl: "zones/us-central1-c/machineTypes/e2-custom-2-4096",
883+
expectedMachineType: "e2-custom-2-4096",
884+
},
885+
{
886+
desc: "incorrect URL",
887+
inputMachineTypeUrl: "https://www.googleapis.com/compute/v1/projects/psch-gke-dev/zones/us-central1-c",
888+
expectError: true,
889+
},
890+
{
891+
desc: "incorrect partial URL",
892+
inputMachineTypeUrl: "zones/us-central1-c/machineTypes/",
893+
expectError: true,
894+
},
895+
{
896+
desc: "missing zone",
897+
inputMachineTypeUrl: "zones//machineTypes/n2-standard-4",
898+
expectError: true,
899+
},
900+
}
901+
for _, tc := range tests {
902+
t.Run(tc.desc, func(t *testing.T) {
903+
actualMachineFamily, err := ParseMachineType(tc.inputMachineTypeUrl)
904+
if err != nil && !tc.expectError {
905+
t.Errorf("Got error %v parsing machine type %s; expect no error", err, tc.inputMachineTypeUrl)
906+
}
907+
if err == nil && tc.expectError {
908+
t.Errorf("Got no error parsing machine type %s; expect an error", tc.inputMachineTypeUrl)
909+
}
910+
if err == nil && actualMachineFamily != tc.expectedMachineType {
911+
t.Errorf("Got %s parsing machine type; expect %s", actualMachineFamily, tc.expectedMachineType)
912+
}
913+
})
914+
}
915+
}
916+
917+
func TestCodeForError(t *testing.T) {
918+
internalErrorCode := codes.Internal
919+
userErrorCode := codes.InvalidArgument
920+
testCases := []struct {
921+
name string
922+
inputErr error
923+
expCode *codes.Code
924+
}{
925+
{
926+
name: "Not googleapi.Error",
927+
inputErr: errors.New("I am not a googleapi.Error"),
928+
expCode: &internalErrorCode,
929+
},
930+
{
931+
name: "User error",
932+
inputErr: &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"},
933+
expCode: &userErrorCode,
934+
},
935+
{
936+
name: "googleapi.Error but not a user error",
937+
inputErr: &googleapi.Error{Code: http.StatusInternalServerError, Message: "Internal error"},
938+
expCode: &internalErrorCode,
939+
},
940+
{
941+
name: "context canceled error",
942+
inputErr: context.Canceled,
943+
expCode: errCodePtr(codes.Canceled),
944+
},
945+
{
946+
name: "context deadline exceeded error",
947+
inputErr: context.DeadlineExceeded,
948+
expCode: errCodePtr(codes.DeadlineExceeded),
949+
},
950+
{
951+
name: "status error with Aborted error code",
952+
inputErr: status.Error(codes.Aborted, "aborted error"),
953+
expCode: errCodePtr(codes.Aborted),
954+
},
955+
{
956+
name: "nil error",
957+
inputErr: nil,
958+
expCode: nil,
959+
},
960+
}
961+
962+
for _, tc := range testCases {
963+
t.Logf("Running test: %v", tc.name)
964+
errCode := CodeForError(tc.inputErr)
965+
if (tc.expCode == nil) != (errCode == nil) {
966+
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
967+
}
968+
if tc.expCode != nil && *errCode != *tc.expCode {
969+
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
970+
}
971+
}
972+
}
973+
974+
func TestIsContextError(t *testing.T) {
975+
cases := []struct {
976+
name string
977+
err error
978+
expectedErrCode *codes.Code
979+
}{
980+
{
981+
name: "deadline exceeded error",
982+
err: context.DeadlineExceeded,
983+
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
984+
},
985+
{
986+
name: "contains 'context deadline exceeded'",
987+
err: fmt.Errorf("got error: %w", context.DeadlineExceeded),
988+
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
989+
},
990+
{
991+
name: "context canceled error",
992+
err: context.Canceled,
993+
expectedErrCode: errCodePtr(codes.Canceled),
994+
},
995+
{
996+
name: "contains 'context canceled'",
997+
err: fmt.Errorf("got error: %w", context.Canceled),
998+
expectedErrCode: errCodePtr(codes.Canceled),
999+
},
1000+
{
1001+
name: "does not contain 'context canceled' or 'context deadline exceeded'",
1002+
err: fmt.Errorf("unknown error"),
1003+
expectedErrCode: nil,
1004+
},
1005+
{
1006+
name: "nil error",
1007+
err: nil,
1008+
expectedErrCode: nil,
1009+
},
1010+
}
1011+
1012+
for _, test := range cases {
1013+
errCode := isContextError(test.err)
1014+
if (test.expectedErrCode == nil) != (errCode == nil) {
1015+
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
1016+
}
1017+
if test.expectedErrCode != nil && *errCode != *test.expectedErrCode {
1018+
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
1019+
}
1020+
}
1021+
}

0 commit comments

Comments
 (0)