Skip to content

Commit 74d58a6

Browse files
authored
Merge pull request #152341 from souravcrl/blathers/backport-release-25.3-151105
release-25.3: pgwire: publish authentication latency internal metrics
2 parents 6aa47eb + 39b4a39 commit 74d58a6

File tree

6 files changed

+126
-1
lines changed

6 files changed

+126
-1
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,14 @@ layers:
864864
unit: NANOSECONDS
865865
aggregation: AVG
866866
derivative: NONE
867+
- name: auth.ldap.conn.latency.internal
868+
exported_name: auth_ldap_conn_latency_internal
869+
description: Internal Auth Latency to establish and authenticate a SQL connection using LDAP(excludes external LDAP calls)
870+
y_axis_label: Nanoseconds
871+
type: HISTOGRAM
872+
unit: NANOSECONDS
873+
aggregation: AVG
874+
derivative: NONE
867875
- name: auth.password.conn.latency
868876
exported_name: auth_password_conn_latency
869877
description: Latency to establish and authenticate a SQL connection using password

pkg/sql/pgwire/auth.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,17 @@ func (c *conn) handleAuthentication(
281281
duration := timeutil.Since(authStartTime).Nanoseconds()
282282
c.publishConnLatencyMetric(duration, hbaEntry.Method.String())
283283

284+
// Get the external auth duration which is time spent on external service by
285+
// the AuthMethod.
286+
externalDuration := behaviors.GetTotalExternalAuthTime().Nanoseconds()
287+
288+
// Compute the internal authentication latency needed to serve a SQL query.
289+
// The internal latency is the time spent on the connection for the actual
290+
// auth time (authentication +provisioning + authorization) time. We will get
291+
// the RTT times to the IDP server and subtract it from the total auth time.
292+
// The metric published is based on the authentication type.
293+
c.publishConnLatencyInternalMetric(duration-externalDuration, hbaEntry.Method.String())
294+
284295
server.lastLoginUpdater.updateLastLoginTime(ctx, dbUser)
285296

286297
return connClose, nil
@@ -305,6 +316,15 @@ func (c *conn) publishConnLatencyMetric(duration int64, authMethod string) {
305316
}
306317
}
307318

319+
// publishConnLatencyInternalMetric publishes the internal latency of the
320+
// connection based on the authentication method.
321+
func (c *conn) publishConnLatencyInternalMetric(internalDuration int64, authMethod string) {
322+
switch authMethod {
323+
case ldapHBAEntry.string():
324+
c.metrics.AuthLDAPConnLatencyInternal.RecordValue(internalDuration)
325+
}
326+
}
327+
308328
func (c *conn) authOKMessage() error {
309329
c.msgBuilder.initMsg(pgwirebase.ServerMsgAuth)
310330
c.msgBuilder.putInt32(authOK)

pkg/sql/pgwire/auth_behaviors.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package pgwire
88
import (
99
"context"
1010
"sync"
11+
"time"
1112

1213
"github.com/cockroachdb/cockroach/pkg/security/provisioning"
1314
"github.com/cockroachdb/cockroach/pkg/security/username"
@@ -33,7 +34,8 @@ type AuthBehaviors struct {
3334
sync.Once
3435
pc provisioning.UserProvisioningConfig
3536
}
36-
provisioner Provisioner
37+
provisioner Provisioner
38+
externalAuthTime time.Duration
3739
}
3840

3941
// Ensure that an AuthBehaviors is easily composable with itself.
@@ -178,3 +180,17 @@ func (b *AuthBehaviors) MaybeProvisionUser(
178180
}
179181
return nil
180182
}
183+
184+
// AddExternalAuthTime adds a time duration to the externalAuthTime during auth.
185+
// The AuthMethod implementation adds new time durations which track time spent
186+
// where auth is waiting on a response from an external service like making an
187+
// API call or validating a response from an external service.
188+
func (b *AuthBehaviors) AddExternalAuthTime(t time.Duration) {
189+
b.externalAuthTime += t
190+
}
191+
192+
// GetTotalExternalAuthTime returns the total aggregated time spent on external
193+
// service during authentication.
194+
func (b *AuthBehaviors) GetTotalExternalAuthTime() time.Duration {
195+
return b.externalAuthTime
196+
}

pkg/sql/pgwire/auth_methods.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
3333
"github.com/cockroachdb/cockroach/pkg/util/log"
3434
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
35+
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
3536
"github.com/cockroachdb/cockroach/pkg/util/uuid"
3637
"github.com/cockroachdb/errors"
3738
"github.com/cockroachdb/redact"
@@ -987,6 +988,9 @@ func AuthLDAP(
987988
b := &AuthBehaviors{}
988989
b.SetRoleMapper(UseSpecifiedIdentity(sessionUser))
989990

991+
// Audit the lookup start time.
992+
ldapLookupStartTime := timeutil.Now()
993+
990994
ldapUserDN, detailedErrors, authError := ldapManager.m.FetchLDAPUserDN(sCtx, execCfg.Settings, sessionUser, entry, identMap)
991995
if authError != nil {
992996
errForLog := authError
@@ -1001,6 +1005,9 @@ func AuthLDAP(
10011005
b.SetReplacementIdentity(ldapUserDN.String())
10021006
}
10031007

1008+
// Add the total lookup time to the external auth time.
1009+
b.AddExternalAuthTime(timeutil.Since(ldapLookupStartTime))
1010+
10041011
b.SetAuthenticator(func(
10051012
ctx context.Context, systemIdentity string, clientConnection bool, _ PasswordRetrievalFn, _ *ldap.DN,
10061013
) error {
@@ -1041,6 +1048,10 @@ func AuthLDAP(
10411048
if len(ldapPwd) == 0 {
10421049
return security.NewErrPasswordUserAuthFailed(sessionUser)
10431050
}
1051+
1052+
// Audit the authN start time.
1053+
ldapAuthNStartTime := timeutil.Now()
1054+
10441055
if detailedErrors, authError := ldapManager.m.ValidateLDAPLogin(
10451056
ctx, execCfg.Settings, ldapUserDN, sessionUser, ldapPwd, entry, identMap,
10461057
); authError != nil {
@@ -1051,6 +1062,9 @@ func AuthLDAP(
10511062
c.LogAuthFailed(ctx, eventpb.AuthFailReason_CREDENTIALS_INVALID, errForLog)
10521063
return authError
10531064
}
1065+
1066+
// Add the total authN time to the external auth time.
1067+
b.AddExternalAuthTime(timeutil.Since(ldapAuthNStartTime))
10541068
return nil
10551069
})
10561070

@@ -1087,6 +1101,9 @@ func AuthLDAP(
10871101
return err
10881102
}
10891103

1104+
// Audit the authZ start time.
1105+
ldapAuthZStartTime := timeutil.Now()
1106+
10901107
if ldapGroups, detailedErrors, authError := ldapManager.m.FetchLDAPGroups(
10911108
ctx, execCfg.Settings, ldapUserDN, sessionUser, entry, identMap,
10921109
); authError != nil {
@@ -1097,6 +1114,9 @@ func AuthLDAP(
10971114
c.LogAuthFailed(ctx, eventpb.AuthFailReason_AUTHORIZATION_ERROR, errForLog)
10981115
return authError
10991116
} else {
1117+
// Add the total authZ time to the external auth time.
1118+
b.AddExternalAuthTime(timeutil.Since(ldapAuthZStartTime))
1119+
11001120
c.LogAuthInfof(ctx, redact.Sprintf("LDAP authorization sync succeeded; attempting to assign roles for LDAP groups: %s", ldapGroups))
11011121
// Parse and apply transformation to LDAP group DNs for roles granter.
11021122
sqlRoles := make([]username.SQLUsername, 0, len(ldapGroups))

pkg/sql/pgwire/conn_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2303,6 +2303,8 @@ func TestPublishConnLatencyMetric(t *testing.T) {
23032303
getHistogramOptionsForIOLatency(AuthGSSConnLatency, time.Hour)),
23042304
AuthScramConnLatency: metric.NewHistogram(
23052305
getHistogramOptionsForIOLatency(AuthScramConnLatency, time.Hour)),
2306+
AuthLDAPConnLatencyInternal: metric.NewHistogram(
2307+
getHistogramOptionsForIOLatency(AuthLDAPConnLatencyInternal, time.Hour)),
23062308
},
23072309
}
23082310

@@ -2402,3 +2404,53 @@ func TestPublishConnLatencyMetric(t *testing.T) {
24022404
require.Equal(t, int64(2), count)
24032405
require.Equal(t, float64(9), sum)
24042406
}
2407+
2408+
// write unit tests for the function publishConnLatencyInternalMetric
2409+
func TestPublishConnLatencyInternalMetric(t *testing.T) {
2410+
defer leaktest.AfterTest(t)()
2411+
2412+
c := conn{
2413+
metrics: &tenantSpecificMetrics{
2414+
AuthLDAPConnLatency: metric.NewHistogram(
2415+
getHistogramOptionsForIOLatency(AuthLDAPConnLatency, time.Hour)),
2416+
AuthLDAPConnLatencyInternal: metric.NewHistogram(
2417+
getHistogramOptionsForIOLatency(AuthLDAPConnLatencyInternal, time.Hour)),
2418+
},
2419+
}
2420+
2421+
// LDAP
2422+
ldapDuration := int64(5)
2423+
ldapInternalDuration := int64(4)
2424+
c.publishConnLatencyMetric(ldapDuration, ldapHBAEntry.string())
2425+
c.publishConnLatencyInternalMetric(ldapInternalDuration, ldapHBAEntry.string())
2426+
w := c.metrics.AuthLDAPConnLatency.WindowedSnapshot()
2427+
count, sum := w.Total()
2428+
require.Equal(t, int64(1), count)
2429+
require.Equal(t, float64(5), sum)
2430+
2431+
wI := c.metrics.AuthLDAPConnLatencyInternal.WindowedSnapshot()
2432+
countI, sumI := wI.Total()
2433+
require.Equal(t, int64(1), countI)
2434+
require.Equal(t, float64(4), sumI)
2435+
2436+
// internal latency must be less than total latency.
2437+
require.Less(t, sumI, sum)
2438+
2439+
// republish on LDAP
2440+
ldapDuration = int64(2)
2441+
ldapInternalDuration = int64(1)
2442+
c.publishConnLatencyMetric(ldapDuration, ldapHBAEntry.string())
2443+
c.publishConnLatencyInternalMetric(ldapInternalDuration, ldapHBAEntry.string())
2444+
w = c.metrics.AuthLDAPConnLatency.WindowedSnapshot()
2445+
count, sum = w.Total()
2446+
require.Equal(t, int64(2), count)
2447+
require.Equal(t, float64(7), sum)
2448+
2449+
wI = c.metrics.AuthLDAPConnLatencyInternal.WindowedSnapshot()
2450+
countI, sumI = wI.Total()
2451+
require.Equal(t, int64(2), countI)
2452+
require.Equal(t, float64(5), sumI)
2453+
2454+
// internal latency must be less than total latency.
2455+
require.Less(t, sumI, sum)
2456+
}

pkg/sql/pgwire/server.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,12 @@ var (
224224
Measurement: "Nanoseconds",
225225
Unit: metric.Unit_NANOSECONDS,
226226
}
227+
AuthLDAPConnLatencyInternal = metric.Metadata{
228+
Name: "auth.ldap.conn.latency.internal",
229+
Help: "Internal Auth Latency to establish and authenticate a SQL connection using LDAP(excludes external LDAP calls)",
230+
Measurement: "Nanoseconds",
231+
Unit: metric.Unit_NANOSECONDS,
232+
}
227233
)
228234

229235
const (
@@ -377,6 +383,7 @@ type tenantSpecificMetrics struct {
377383
AuthLDAPConnLatency metric.IHistogram
378384
AuthGSSConnLatency metric.IHistogram
379385
AuthScramConnLatency metric.IHistogram
386+
AuthLDAPConnLatencyInternal metric.IHistogram
380387
}
381388

382389
func newTenantSpecificMetrics(
@@ -411,6 +418,8 @@ func newTenantSpecificMetrics(
411418
getHistogramOptionsForIOLatency(AuthGSSConnLatency, histogramWindow)),
412419
AuthScramConnLatency: metric.NewHistogram(
413420
getHistogramOptionsForIOLatency(AuthScramConnLatency, histogramWindow)),
421+
AuthLDAPConnLatencyInternal: metric.NewHistogram(
422+
getHistogramOptionsForIOLatency(AuthLDAPConnLatencyInternal, histogramWindow)),
414423
}
415424
}
416425

0 commit comments

Comments
 (0)