Skip to content

Commit f07547f

Browse files
Sneha-atsunnylovestiramisu
authored andcommitted
Adding new metric pdcsi_operation_errors to fetch error count
1 parent bf1aafc commit f07547f

File tree

7 files changed

+401
-118
lines changed

7 files changed

+401
-118
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,14 @@ func handle() {
9191
}
9292
klog.V(2).Infof("Driver vendor version %v", version)
9393

94-
if *runControllerService && *httpEndpoint != "" && metrics.IsGKEComponentVersionAvailable() {
94+
if *runControllerService && *httpEndpoint != "" {
9595
mm := metrics.NewMetricsManager()
9696
mm.InitializeHttpHandler(*httpEndpoint, *metricsPath)
97-
mm.EmitGKEComponentVersion()
97+
mm.RegisterHyperdiskMetric()
98+
99+
if metrics.IsGKEComponentVersionAvailable() {
100+
mm.EmitGKEComponentVersion()
101+
}
98102
}
99103

100104
if len(*extraVolumeLabelsStr) > 0 && !*runControllerService {

pkg/common/utils.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,21 @@ limitations under the License.
1717
package common
1818

1919
import (
20+
"context"
21+
"errors"
2022
"fmt"
23+
"net/http"
2124
"regexp"
2225
"strings"
2326

2427
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
28+
"google.golang.org/api/googleapi"
29+
"google.golang.org/grpc/codes"
30+
"google.golang.org/grpc/status"
2531
"k8s.io/apimachinery/pkg/api/resource"
2632
"k8s.io/apimachinery/pkg/util/sets"
2733
volumehelpers "k8s.io/cloud-provider/volume/helpers"
34+
"k8s.io/klog/v2"
2835
)
2936

3037
const (
@@ -290,3 +297,98 @@ func ConvertMiBStringToInt64(str string) (int64, error) {
290297
quantity := resource.MustParse(str)
291298
return volumehelpers.RoundUpToMiB(quantity)
292299
}
300+
<<<<<<< HEAD
301+
=======
302+
303+
// ParseMachineType returns an extracted machineType from a URL, or empty if not found.
304+
// machineTypeUrl: Full or partial URL of the machine type resource, in the format:
305+
//
306+
// zones/zone/machineTypes/machine-type
307+
func ParseMachineType(machineTypeUrl string) (string, error) {
308+
machineType := machineTypeRegex.FindStringSubmatch(machineTypeUrl)
309+
if machineType == nil {
310+
return "", fmt.Errorf("failed to parse machineTypeUrl. Expected suffix: zones/{zone}/machineTypes/{machine-type}. Got: %s", machineTypeUrl)
311+
}
312+
return machineType[1], nil
313+
}
314+
315+
// CodeForError returns a pointer to the grpc error code that maps to the http
316+
// error code for the passed in user googleapi error or context error. Returns
317+
// codes.Internal if the given error is not a googleapi error caused by the user.
318+
// The following http error codes are considered user errors:
319+
// (1) http 400 Bad Request, returns grpc InvalidArgument,
320+
// (2) http 403 Forbidden, returns grpc PermissionDenied,
321+
// (3) http 404 Not Found, returns grpc NotFound
322+
// (4) http 429 Too Many Requests, returns grpc ResourceExhausted
323+
// The following errors are considered context errors:
324+
// (1) "context deadline exceeded", returns grpc DeadlineExceeded,
325+
// (2) "context canceled", returns grpc Canceled
326+
func CodeForError(err error) *codes.Code {
327+
if err == nil {
328+
return nil
329+
}
330+
331+
if errCode := existingErrorCode(err); errCode != nil {
332+
return errCode
333+
}
334+
if code := isContextError(err); code != nil {
335+
return code
336+
}
337+
338+
internalErrorCode := codes.Internal
339+
// Upwrap the error
340+
var apiErr *googleapi.Error
341+
if !errors.As(err, &apiErr) {
342+
return &internalErrorCode
343+
}
344+
345+
userErrors := map[int]codes.Code{
346+
http.StatusForbidden: codes.PermissionDenied,
347+
http.StatusBadRequest: codes.InvalidArgument,
348+
http.StatusTooManyRequests: codes.ResourceExhausted,
349+
http.StatusNotFound: codes.NotFound,
350+
}
351+
if code, ok := userErrors[apiErr.Code]; ok {
352+
return &code
353+
}
354+
355+
return &internalErrorCode
356+
}
357+
358+
// isContextError returns a pointer to the grpc error code DeadlineExceeded
359+
// if the passed in error contains the "context deadline exceeded" string and returns
360+
// the grpc error code Canceled if the error contains the "context canceled" string.
361+
func isContextError(err error) *codes.Code {
362+
if err == nil {
363+
return nil
364+
}
365+
366+
errStr := err.Error()
367+
if strings.Contains(errStr, context.DeadlineExceeded.Error()) {
368+
return errCodePtr(codes.DeadlineExceeded)
369+
}
370+
if strings.Contains(errStr, context.Canceled.Error()) {
371+
return errCodePtr(codes.Canceled)
372+
}
373+
return nil
374+
}
375+
376+
func existingErrorCode(err error) *codes.Code {
377+
if err == nil {
378+
return nil
379+
}
380+
if status, ok := status.FromError(err); ok {
381+
return errCodePtr(status.Code())
382+
}
383+
return nil
384+
}
385+
386+
func errCodePtr(code codes.Code) *codes.Code {
387+
return &code
388+
}
389+
390+
func LoggedError(msg string, err error) error {
391+
klog.Errorf(msg+"%v", err.Error())
392+
return status.Errorf(*CodeForError(err), msg+"%v", err.Error())
393+
}
394+
>>>>>>> Adding new metric pdcsi_operation_errors to fetch error count

pkg/common/utils_test.go

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,17 @@ limitations under the License.
1717
package common
1818

1919
import (
20+
"context"
21+
"errors"
2022
"fmt"
23+
"net/http"
2124
"reflect"
2225
"testing"
2326

2427
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
28+
"google.golang.org/api/googleapi"
29+
"google.golang.org/grpc/codes"
30+
"google.golang.org/grpc/status"
2531
)
2632

2733
const (
@@ -847,3 +853,166 @@ func TestConvertMiBStringToInt64(t *testing.T) {
847853
})
848854
}
849855
}
856+
<<<<<<< HEAD
857+
=======
858+
859+
func TestParseMachineType(t *testing.T) {
860+
tests := []struct {
861+
desc string
862+
inputMachineTypeUrl string
863+
expectedMachineType string
864+
expectError bool
865+
}{
866+
{
867+
desc: "full URL machine type",
868+
inputMachineTypeUrl: "https://www.googleapis.com/compute/v1/projects/my-project/zones/us-central1-c/machineTypes/c3-highcpu-4",
869+
expectedMachineType: "c3-highcpu-4",
870+
},
871+
{
872+
desc: "partial URL machine type",
873+
inputMachineTypeUrl: "zones/us-central1-c/machineTypes/n2-standard-4",
874+
expectedMachineType: "n2-standard-4",
875+
},
876+
{
877+
desc: "custom partial URL machine type",
878+
inputMachineTypeUrl: "zones/us-central1-c/machineTypes/e2-custom-2-4096",
879+
expectedMachineType: "e2-custom-2-4096",
880+
},
881+
{
882+
desc: "incorrect URL",
883+
inputMachineTypeUrl: "https://www.googleapis.com/compute/v1/projects/psch-gke-dev/zones/us-central1-c",
884+
expectError: true,
885+
},
886+
{
887+
desc: "incorrect partial URL",
888+
inputMachineTypeUrl: "zones/us-central1-c/machineTypes/",
889+
expectError: true,
890+
},
891+
{
892+
desc: "missing zone",
893+
inputMachineTypeUrl: "zones//machineTypes/n2-standard-4",
894+
expectError: true,
895+
},
896+
}
897+
for _, tc := range tests {
898+
t.Run(tc.desc, func(t *testing.T) {
899+
actualMachineFamily, err := ParseMachineType(tc.inputMachineTypeUrl)
900+
if err != nil && !tc.expectError {
901+
t.Errorf("Got error %v parsing machine type %s; expect no error", err, tc.inputMachineTypeUrl)
902+
}
903+
if err == nil && tc.expectError {
904+
t.Errorf("Got no error parsing machine type %s; expect an error", tc.inputMachineTypeUrl)
905+
}
906+
if err == nil && actualMachineFamily != tc.expectedMachineType {
907+
t.Errorf("Got %s parsing machine type; expect %s", actualMachineFamily, tc.expectedMachineType)
908+
}
909+
})
910+
}
911+
}
912+
913+
func TestCodeForError(t *testing.T) {
914+
internalErrorCode := codes.Internal
915+
userErrorCode := codes.InvalidArgument
916+
testCases := []struct {
917+
name string
918+
inputErr error
919+
expCode *codes.Code
920+
}{
921+
{
922+
name: "Not googleapi.Error",
923+
inputErr: errors.New("I am not a googleapi.Error"),
924+
expCode: &internalErrorCode,
925+
},
926+
{
927+
name: "User error",
928+
inputErr: &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"},
929+
expCode: &userErrorCode,
930+
},
931+
{
932+
name: "googleapi.Error but not a user error",
933+
inputErr: &googleapi.Error{Code: http.StatusInternalServerError, Message: "Internal error"},
934+
expCode: &internalErrorCode,
935+
},
936+
{
937+
name: "context canceled error",
938+
inputErr: context.Canceled,
939+
expCode: errCodePtr(codes.Canceled),
940+
},
941+
{
942+
name: "context deadline exceeded error",
943+
inputErr: context.DeadlineExceeded,
944+
expCode: errCodePtr(codes.DeadlineExceeded),
945+
},
946+
{
947+
name: "status error with Aborted error code",
948+
inputErr: status.Error(codes.Aborted, "aborted error"),
949+
expCode: errCodePtr(codes.Aborted),
950+
},
951+
{
952+
name: "nil error",
953+
inputErr: nil,
954+
expCode: nil,
955+
},
956+
}
957+
958+
for _, tc := range testCases {
959+
t.Logf("Running test: %v", tc.name)
960+
errCode := CodeForError(tc.inputErr)
961+
if (tc.expCode == nil) != (errCode == nil) {
962+
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
963+
}
964+
if tc.expCode != nil && *errCode != *tc.expCode {
965+
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
966+
}
967+
}
968+
}
969+
970+
func TestIsContextError(t *testing.T) {
971+
cases := []struct {
972+
name string
973+
err error
974+
expectedErrCode *codes.Code
975+
}{
976+
{
977+
name: "deadline exceeded error",
978+
err: context.DeadlineExceeded,
979+
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
980+
},
981+
{
982+
name: "contains 'context deadline exceeded'",
983+
err: fmt.Errorf("got error: %w", context.DeadlineExceeded),
984+
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
985+
},
986+
{
987+
name: "context canceled error",
988+
err: context.Canceled,
989+
expectedErrCode: errCodePtr(codes.Canceled),
990+
},
991+
{
992+
name: "contains 'context canceled'",
993+
err: fmt.Errorf("got error: %w", context.Canceled),
994+
expectedErrCode: errCodePtr(codes.Canceled),
995+
},
996+
{
997+
name: "does not contain 'context canceled' or 'context deadline exceeded'",
998+
err: fmt.Errorf("unknown error"),
999+
expectedErrCode: nil,
1000+
},
1001+
{
1002+
name: "nil error",
1003+
err: nil,
1004+
expectedErrCode: nil,
1005+
},
1006+
}
1007+
1008+
for _, test := range cases {
1009+
errCode := isContextError(test.err)
1010+
if (test.expectedErrCode == nil) != (errCode == nil) {
1011+
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
1012+
}
1013+
if test.expectedErrCode != nil && *errCode != *test.expectedErrCode {
1014+
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
1015+
}
1016+
}
1017+
}
1018+
>>>>>>> Adding new metric pdcsi_operation_errors to fetch error count

0 commit comments

Comments
 (0)