Skip to content

Commit 01052ac

Browse files
Sneha-atsunnylovestiramisu
authored andcommitted
Adding new metric pdcsi_operation_errors to fetch error count
1 parent 38af7cd commit 01052ac

File tree

7 files changed

+325
-191
lines changed

7 files changed

+325
-191
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,14 @@ func handle() {
9696
}
9797
klog.V(2).Infof("Driver vendor version %v", version)
9898

99-
if *runControllerService && *httpEndpoint != "" && metrics.IsGKEComponentVersionAvailable() {
99+
if *runControllerService && *httpEndpoint != "" {
100100
mm := metrics.NewMetricsManager()
101101
mm.InitializeHttpHandler(*httpEndpoint, *metricsPath)
102-
mm.EmitGKEComponentVersion()
102+
mm.RegisterHyperdiskMetric()
103+
104+
if metrics.IsGKEComponentVersionAvailable() {
105+
mm.EmitGKEComponentVersion()
106+
}
103107
}
104108

105109
if len(*extraVolumeLabelsStr) > 0 && !*runControllerService {

pkg/common/utils.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,21 @@ limitations under the License.
1717
package common
1818

1919
import (
20+
"context"
21+
"errors"
2022
"fmt"
23+
"net/http"
2124
"regexp"
2225
"strings"
2326

2427
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
28+
"google.golang.org/api/googleapi"
29+
"google.golang.org/grpc/codes"
30+
"google.golang.org/grpc/status"
2531
"k8s.io/apimachinery/pkg/api/resource"
2632
"k8s.io/apimachinery/pkg/util/sets"
2733
volumehelpers "k8s.io/cloud-provider/volume/helpers"
34+
"k8s.io/klog/v2"
2835
)
2936

3037
const (
@@ -288,3 +295,83 @@ func ParseMachineType(machineTypeUrl string) (string, error) {
288295
}
289296
return machineType[1], nil
290297
}
298+
299+
// CodeForError returns a pointer to the grpc error code that maps to the http
300+
// error code for the passed in user googleapi error or context error. Returns
301+
// codes.Internal if the given error is not a googleapi error caused by the user.
302+
// The following http error codes are considered user errors:
303+
// (1) http 400 Bad Request, returns grpc InvalidArgument,
304+
// (2) http 403 Forbidden, returns grpc PermissionDenied,
305+
// (3) http 404 Not Found, returns grpc NotFound
306+
// (4) http 429 Too Many Requests, returns grpc ResourceExhausted
307+
// The following errors are considered context errors:
308+
// (1) "context deadline exceeded", returns grpc DeadlineExceeded,
309+
// (2) "context canceled", returns grpc Canceled
310+
func CodeForError(err error) *codes.Code {
311+
if err == nil {
312+
return nil
313+
}
314+
315+
if errCode := existingErrorCode(err); errCode != nil {
316+
return errCode
317+
}
318+
if code := isContextError(err); code != nil {
319+
return code
320+
}
321+
322+
internalErrorCode := codes.Internal
323+
// Upwrap the error
324+
var apiErr *googleapi.Error
325+
if !errors.As(err, &apiErr) {
326+
return &internalErrorCode
327+
}
328+
329+
userErrors := map[int]codes.Code{
330+
http.StatusForbidden: codes.PermissionDenied,
331+
http.StatusBadRequest: codes.InvalidArgument,
332+
http.StatusTooManyRequests: codes.ResourceExhausted,
333+
http.StatusNotFound: codes.NotFound,
334+
}
335+
if code, ok := userErrors[apiErr.Code]; ok {
336+
return &code
337+
}
338+
339+
return &internalErrorCode
340+
}
341+
342+
// isContextError returns a pointer to the grpc error code DeadlineExceeded
343+
// if the passed in error contains the "context deadline exceeded" string and returns
344+
// the grpc error code Canceled if the error contains the "context canceled" string.
345+
func isContextError(err error) *codes.Code {
346+
if err == nil {
347+
return nil
348+
}
349+
350+
errStr := err.Error()
351+
if strings.Contains(errStr, context.DeadlineExceeded.Error()) {
352+
return errCodePtr(codes.DeadlineExceeded)
353+
}
354+
if strings.Contains(errStr, context.Canceled.Error()) {
355+
return errCodePtr(codes.Canceled)
356+
}
357+
return nil
358+
}
359+
360+
func existingErrorCode(err error) *codes.Code {
361+
if err == nil {
362+
return nil
363+
}
364+
if status, ok := status.FromError(err); ok {
365+
return errCodePtr(status.Code())
366+
}
367+
return nil
368+
}
369+
370+
func errCodePtr(code codes.Code) *codes.Code {
371+
return &code
372+
}
373+
374+
func LoggedError(msg string, err error) error {
375+
klog.Errorf(msg+"%v", err.Error())
376+
return status.Errorf(*CodeForError(err), msg+"%v", err.Error())
377+
}

pkg/common/utils_test.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,17 @@ limitations under the License.
1717
package common
1818

1919
import (
20+
"context"
21+
"errors"
2022
"fmt"
23+
"net/http"
2124
"reflect"
2225
"testing"
2326

2427
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
28+
"google.golang.org/api/googleapi"
29+
"google.golang.org/grpc/codes"
30+
"google.golang.org/grpc/status"
2531
)
2632

2733
const (
@@ -841,3 +847,109 @@ func TestParseMachineType(t *testing.T) {
841847
})
842848
}
843849
}
850+
851+
func TestCodeForError(t *testing.T) {
852+
internalErrorCode := codes.Internal
853+
userErrorCode := codes.InvalidArgument
854+
testCases := []struct {
855+
name string
856+
inputErr error
857+
expCode *codes.Code
858+
}{
859+
{
860+
name: "Not googleapi.Error",
861+
inputErr: errors.New("I am not a googleapi.Error"),
862+
expCode: &internalErrorCode,
863+
},
864+
{
865+
name: "User error",
866+
inputErr: &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"},
867+
expCode: &userErrorCode,
868+
},
869+
{
870+
name: "googleapi.Error but not a user error",
871+
inputErr: &googleapi.Error{Code: http.StatusInternalServerError, Message: "Internal error"},
872+
expCode: &internalErrorCode,
873+
},
874+
{
875+
name: "context canceled error",
876+
inputErr: context.Canceled,
877+
expCode: errCodePtr(codes.Canceled),
878+
},
879+
{
880+
name: "context deadline exceeded error",
881+
inputErr: context.DeadlineExceeded,
882+
expCode: errCodePtr(codes.DeadlineExceeded),
883+
},
884+
{
885+
name: "status error with Aborted error code",
886+
inputErr: status.Error(codes.Aborted, "aborted error"),
887+
expCode: errCodePtr(codes.Aborted),
888+
},
889+
{
890+
name: "nil error",
891+
inputErr: nil,
892+
expCode: nil,
893+
},
894+
}
895+
896+
for _, tc := range testCases {
897+
t.Logf("Running test: %v", tc.name)
898+
errCode := CodeForError(tc.inputErr)
899+
if (tc.expCode == nil) != (errCode == nil) {
900+
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
901+
}
902+
if tc.expCode != nil && *errCode != *tc.expCode {
903+
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
904+
}
905+
}
906+
}
907+
908+
func TestIsContextError(t *testing.T) {
909+
cases := []struct {
910+
name string
911+
err error
912+
expectedErrCode *codes.Code
913+
}{
914+
{
915+
name: "deadline exceeded error",
916+
err: context.DeadlineExceeded,
917+
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
918+
},
919+
{
920+
name: "contains 'context deadline exceeded'",
921+
err: fmt.Errorf("got error: %w", context.DeadlineExceeded),
922+
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
923+
},
924+
{
925+
name: "context canceled error",
926+
err: context.Canceled,
927+
expectedErrCode: errCodePtr(codes.Canceled),
928+
},
929+
{
930+
name: "contains 'context canceled'",
931+
err: fmt.Errorf("got error: %w", context.Canceled),
932+
expectedErrCode: errCodePtr(codes.Canceled),
933+
},
934+
{
935+
name: "does not contain 'context canceled' or 'context deadline exceeded'",
936+
err: fmt.Errorf("unknown error"),
937+
expectedErrCode: nil,
938+
},
939+
{
940+
name: "nil error",
941+
err: nil,
942+
expectedErrCode: nil,
943+
},
944+
}
945+
946+
for _, test := range cases {
947+
errCode := isContextError(test.err)
948+
if (test.expectedErrCode == nil) != (errCode == nil) {
949+
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
950+
}
951+
if test.expectedErrCode != nil && *errCode != *test.expectedErrCode {
952+
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
953+
}
954+
}
955+
}

0 commit comments

Comments
 (0)