Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 173 additions & 0 deletions credentials/google/gcp_service_account_identity_credentials.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
/*
*
* Copyright 2026 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package google

import (
"context"
"fmt"
"sync"
"time"

"cloud.google.com/go/auth"
"cloud.google.com/go/auth/credentials/idtoken"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/internal/backoff"
)

// earlyExpiry matches the hardcoded 5-minute early expiry used by the
// cloud.google.com/go/auth/credentials/idtoken package.
var earlyExpiry = 5 * time.Minute

type gcpServiceAccountIdentityCallCreds struct {
audience string
ts *auth.Credentials
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should the field be named something like "creds" instead of ts?

backoff backoff.Strategy

mu sync.Mutex
token *auth.Token

fetching chan struct{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a trailing comment for this chan

nextRetryTime time.Time // When we can try next (backoff)
retryAttempt int // consecutive failures
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how does the client know how many retry attempts are pending?

lastErr error // error from last attempt
}

// NewGcpServiceAccountIdentity creates a PerRPCCredentials that authenticates
// using a GCP Service Account Identity JWT token for the given audience.
//
// It uses the cloud.google.com/go/auth/credentials/idtoken package to
// automatically fetch ID token from the GCE metadata server. This credential
// is only valid to use in an environment running on GCP.
func NewGcpServiceAccountIdentity(audience string) (credentials.PerRPCCredentials, error) {
if audience == "" {
return nil, fmt.Errorf("audience cannot be empty")
}

creds, err := idtoken.NewCredentials(&idtoken.Options{
Audience: audience,
})
if err != nil {
return nil, fmt.Errorf("failed to create auth.Credentials for idtoken: %v", err)
}

return &gcpServiceAccountIdentityCallCreds{
audience: audience,
ts: creds,
backoff: backoff.DefaultExponential,
}, nil
}

// GetRequestMetadata gets the current request metadata, refreshing tokens if
// required. This implementation follows the PerRPCCredentials interface.
//
// It guarantees that only one underlying token fetch will be executed
// concurrently. If a valid token is cached, it is returned immediately. If
// a fetch recently failed, the cached error is returned until the backoff
// interval expires. Otherwise, it initiates a new token fetch or blocks
// waiting for an already-in-progress fetch to complete.
func (c *gcpServiceAccountIdentityCallCreds) GetRequestMetadata(ctx context.Context, _ ...string) (map[string]string, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The gRFC states : If the returned HTTP status maps to UNAVAILABLE in HTTP to gRPC Status Code Mapping, then the data plane RPCs will be failed with status UNAVAILABLE; otherwise, they will be failed with status UNAUTHENTICATED. If the request fails without an HTTP status (e.g., an I/O error), all queued data plane RPCs will be failed with UNAVAILABLE status.

Where exactly are these status code mappings handled?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ri, _ := credentials.RequestInfoFromContext(ctx)
if err := credentials.CheckSecurityLevel(ri.AuthInfo, credentials.PrivacyAndIntegrity); err != nil {
return nil, fmt.Errorf("cannot send secure credentials on an insecure connection: %v", err)
}

c.mu.Lock()

if c.isTokenValid() {
c.mu.Unlock()
return map[string]string{
"authorization": "Bearer " + c.token.Value,
Copy link
Contributor

@mbissa mbissa Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are reading the token value outside the lock - can lead to data race. The unlock should be defer?

}, nil
}

if c.lastErr != nil && time.Now().Before(c.nextRetryTime) {
c.mu.Unlock()
return nil, c.lastErr
}

if c.fetching == nil {
c.fetching = make(chan struct{})
c.mu.Unlock()

token, err := c.ts.TokenProvider.Token(context.Background())
Copy link
Contributor

@mbissa mbissa Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happens if this gets stuck or hangs indefinitely? Can we configure a timeout for this context? Else all other go routines will simply keep waiting for this to come back.


c.mu.Lock()

if err != nil {
c.setBackoff(err)
close(c.fetching)
c.fetching = nil
c.mu.Unlock()
return nil, err
}

c.setBackoff(nil)
c.token = token
close(c.fetching)
c.fetching = nil
c.mu.Unlock()
return map[string]string{
"authorization": "Bearer " + c.token.Value,
}, nil
Comment on lines +110 to +127
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic for handling the token fetch result can be simplified to reduce code duplication and improve readability. By using defer c.mu.Unlock() and consolidating the setBackoff call, you can make the code cleaner.

The setBackoff function already handles both error and non-error cases correctly, so you can call it once with the err from the token fetch.

Suggested change
c.mu.Lock()
if err != nil {
c.setBackoff(err)
close(c.fetching)
c.fetching = nil
c.mu.Unlock()
return nil, err
}
c.setBackoff(nil)
c.token = token
close(c.fetching)
c.fetching = nil
c.mu.Unlock()
return map[string]string{
"authorization": "Bearer " + c.token.Value,
}, nil
c.mu.Lock()
defer c.mu.Unlock()
close(c.fetching)
c.fetching = nil
c.setBackoff(err)
if err != nil {
return nil, err
}
c.token = token
return map[string]string{
"authorization": "Bearer " + c.token.Value,
}, nil

}
wait := c.fetching
c.mu.Unlock()
select {
case <-wait:
return func() (map[string]string, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.isTokenValid() {
return map[string]string{
"authorization": "Bearer " + c.token.Value,
}, nil
}
return nil, c.lastErr
}()
Comment on lines +132 to +142
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The use of an immediately-invoked function expression here makes the code slightly harder to read. You can simplify this by moving the logic out of the anonymous function directly into the case block. This makes the control flow more straightforward.

	case <-wait:
		c.mu.Lock()
		defer c.mu.Unlock()
		if c.isTokenValid() {
			return map[string]string{
				"authorization": "Bearer " + c.token.Value,
			}, nil
		}
		return nil, c.lastErr

case <-ctx.Done():
return nil, ctx.Err()
}
}

// RequireTransportSecurity indicates whether the credentials requires
// transport security.
func (c *gcpServiceAccountIdentityCallCreds) RequireTransportSecurity() bool {
return true
}

// isTokenValid checks if the cached token is still valid.
func (c *gcpServiceAccountIdentityCallCreds) isTokenValid() bool {
if c.token == nil {
return false
}
return !c.token.Expiry.Round(0).Add(-earlyExpiry).Before(time.Now())
}

func (c *gcpServiceAccountIdentityCallCreds) setBackoff(err error) {
if err == nil {
c.lastErr = nil
c.retryAttempt = 0
c.nextRetryTime = time.Time{}
return
}
c.lastErr = err
backoffDelay := c.backoff.Backoff(c.retryAttempt)
c.retryAttempt++
c.nextRetryTime = time.Now().Add(backoffDelay)
}
Loading
Loading