Skip to content

Commit 39b4a39

Browse files
committed
pgwire: publish authentication latency internal metrics
Currently, we don't have a way of estimating how much time external authentication methods like ldap take for an auth attempt. With provisioning and authorization enabled, we will be adding additional latency for the auth flow. Hence, we need to gather additional metrics for enabling user auth: - Introduce a logging metric to complement auth_ldap_conn_latency cockroachdbMetric. This will be called auth.ldap.conn.latency.internal - This will be the actual auth time (authentication +provisioning + authorization) time. We will get the RTT times to the ldap server (bind service plus search user plus bind user plus search groups) and subtract it from the total auth time. fixes #148874 Epic CRDB-21590 Release note(ops change): `auth.ldap.conn.latency.internal` has been added to denote the internal authentication time for ldap auth method.
1 parent 6aa47eb commit 39b4a39

File tree

6 files changed

+126
-1
lines changed

6 files changed

+126
-1
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,14 @@ layers:
864864
unit: NANOSECONDS
865865
aggregation: AVG
866866
derivative: NONE
867+
- name: auth.ldap.conn.latency.internal
868+
exported_name: auth_ldap_conn_latency_internal
869+
description: Internal Auth Latency to establish and authenticate a SQL connection using LDAP(excludes external LDAP calls)
870+
y_axis_label: Nanoseconds
871+
type: HISTOGRAM
872+
unit: NANOSECONDS
873+
aggregation: AVG
874+
derivative: NONE
867875
- name: auth.password.conn.latency
868876
exported_name: auth_password_conn_latency
869877
description: Latency to establish and authenticate a SQL connection using password

pkg/sql/pgwire/auth.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,17 @@ func (c *conn) handleAuthentication(
281281
duration := timeutil.Since(authStartTime).Nanoseconds()
282282
c.publishConnLatencyMetric(duration, hbaEntry.Method.String())
283283

284+
// Get the external auth duration which is time spent on external service by
285+
// the AuthMethod.
286+
externalDuration := behaviors.GetTotalExternalAuthTime().Nanoseconds()
287+
288+
// Compute the internal authentication latency needed to serve a SQL query.
289+
// The internal latency is the time spent on the connection for the actual
290+
// auth time (authentication +provisioning + authorization) time. We will get
291+
// the RTT times to the IDP server and subtract it from the total auth time.
292+
// The metric published is based on the authentication type.
293+
c.publishConnLatencyInternalMetric(duration-externalDuration, hbaEntry.Method.String())
294+
284295
server.lastLoginUpdater.updateLastLoginTime(ctx, dbUser)
285296

286297
return connClose, nil
@@ -305,6 +316,15 @@ func (c *conn) publishConnLatencyMetric(duration int64, authMethod string) {
305316
}
306317
}
307318

319+
// publishConnLatencyInternalMetric publishes the internal latency of the
320+
// connection based on the authentication method.
321+
func (c *conn) publishConnLatencyInternalMetric(internalDuration int64, authMethod string) {
322+
switch authMethod {
323+
case ldapHBAEntry.string():
324+
c.metrics.AuthLDAPConnLatencyInternal.RecordValue(internalDuration)
325+
}
326+
}
327+
308328
func (c *conn) authOKMessage() error {
309329
c.msgBuilder.initMsg(pgwirebase.ServerMsgAuth)
310330
c.msgBuilder.putInt32(authOK)

pkg/sql/pgwire/auth_behaviors.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package pgwire
88
import (
99
"context"
1010
"sync"
11+
"time"
1112

1213
"github.com/cockroachdb/cockroach/pkg/security/provisioning"
1314
"github.com/cockroachdb/cockroach/pkg/security/username"
@@ -33,7 +34,8 @@ type AuthBehaviors struct {
3334
sync.Once
3435
pc provisioning.UserProvisioningConfig
3536
}
36-
provisioner Provisioner
37+
provisioner Provisioner
38+
externalAuthTime time.Duration
3739
}
3840

3941
// Ensure that an AuthBehaviors is easily composable with itself.
@@ -178,3 +180,17 @@ func (b *AuthBehaviors) MaybeProvisionUser(
178180
}
179181
return nil
180182
}
183+
184+
// AddExternalAuthTime adds a time duration to the externalAuthTime during auth.
185+
// The AuthMethod implementation adds new time durations which track time spent
186+
// where auth is waiting on a response from an external service like making an
187+
// API call or validating a response from an external service.
188+
func (b *AuthBehaviors) AddExternalAuthTime(t time.Duration) {
189+
b.externalAuthTime += t
190+
}
191+
192+
// GetTotalExternalAuthTime returns the total aggregated time spent on external
193+
// service during authentication.
194+
func (b *AuthBehaviors) GetTotalExternalAuthTime() time.Duration {
195+
return b.externalAuthTime
196+
}

pkg/sql/pgwire/auth_methods.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
3333
"github.com/cockroachdb/cockroach/pkg/util/log"
3434
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
35+
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
3536
"github.com/cockroachdb/cockroach/pkg/util/uuid"
3637
"github.com/cockroachdb/errors"
3738
"github.com/cockroachdb/redact"
@@ -987,6 +988,9 @@ func AuthLDAP(
987988
b := &AuthBehaviors{}
988989
b.SetRoleMapper(UseSpecifiedIdentity(sessionUser))
989990

991+
// Audit the lookup start time.
992+
ldapLookupStartTime := timeutil.Now()
993+
990994
ldapUserDN, detailedErrors, authError := ldapManager.m.FetchLDAPUserDN(sCtx, execCfg.Settings, sessionUser, entry, identMap)
991995
if authError != nil {
992996
errForLog := authError
@@ -1001,6 +1005,9 @@ func AuthLDAP(
10011005
b.SetReplacementIdentity(ldapUserDN.String())
10021006
}
10031007

1008+
// Add the total lookup time to the external auth time.
1009+
b.AddExternalAuthTime(timeutil.Since(ldapLookupStartTime))
1010+
10041011
b.SetAuthenticator(func(
10051012
ctx context.Context, systemIdentity string, clientConnection bool, _ PasswordRetrievalFn, _ *ldap.DN,
10061013
) error {
@@ -1041,6 +1048,10 @@ func AuthLDAP(
10411048
if len(ldapPwd) == 0 {
10421049
return security.NewErrPasswordUserAuthFailed(sessionUser)
10431050
}
1051+
1052+
// Audit the authN start time.
1053+
ldapAuthNStartTime := timeutil.Now()
1054+
10441055
if detailedErrors, authError := ldapManager.m.ValidateLDAPLogin(
10451056
ctx, execCfg.Settings, ldapUserDN, sessionUser, ldapPwd, entry, identMap,
10461057
); authError != nil {
@@ -1051,6 +1062,9 @@ func AuthLDAP(
10511062
c.LogAuthFailed(ctx, eventpb.AuthFailReason_CREDENTIALS_INVALID, errForLog)
10521063
return authError
10531064
}
1065+
1066+
// Add the total authN time to the external auth time.
1067+
b.AddExternalAuthTime(timeutil.Since(ldapAuthNStartTime))
10541068
return nil
10551069
})
10561070

@@ -1087,6 +1101,9 @@ func AuthLDAP(
10871101
return err
10881102
}
10891103

1104+
// Audit the authZ start time.
1105+
ldapAuthZStartTime := timeutil.Now()
1106+
10901107
if ldapGroups, detailedErrors, authError := ldapManager.m.FetchLDAPGroups(
10911108
ctx, execCfg.Settings, ldapUserDN, sessionUser, entry, identMap,
10921109
); authError != nil {
@@ -1097,6 +1114,9 @@ func AuthLDAP(
10971114
c.LogAuthFailed(ctx, eventpb.AuthFailReason_AUTHORIZATION_ERROR, errForLog)
10981115
return authError
10991116
} else {
1117+
// Add the total authZ time to the external auth time.
1118+
b.AddExternalAuthTime(timeutil.Since(ldapAuthZStartTime))
1119+
11001120
c.LogAuthInfof(ctx, redact.Sprintf("LDAP authorization sync succeeded; attempting to assign roles for LDAP groups: %s", ldapGroups))
11011121
// Parse and apply transformation to LDAP group DNs for roles granter.
11021122
sqlRoles := make([]username.SQLUsername, 0, len(ldapGroups))

pkg/sql/pgwire/conn_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2303,6 +2303,8 @@ func TestPublishConnLatencyMetric(t *testing.T) {
23032303
getHistogramOptionsForIOLatency(AuthGSSConnLatency, time.Hour)),
23042304
AuthScramConnLatency: metric.NewHistogram(
23052305
getHistogramOptionsForIOLatency(AuthScramConnLatency, time.Hour)),
2306+
AuthLDAPConnLatencyInternal: metric.NewHistogram(
2307+
getHistogramOptionsForIOLatency(AuthLDAPConnLatencyInternal, time.Hour)),
23062308
},
23072309
}
23082310

@@ -2402,3 +2404,53 @@ func TestPublishConnLatencyMetric(t *testing.T) {
24022404
require.Equal(t, int64(2), count)
24032405
require.Equal(t, float64(9), sum)
24042406
}
2407+
2408+
// write unit tests for the function publishConnLatencyInternalMetric
2409+
func TestPublishConnLatencyInternalMetric(t *testing.T) {
2410+
defer leaktest.AfterTest(t)()
2411+
2412+
c := conn{
2413+
metrics: &tenantSpecificMetrics{
2414+
AuthLDAPConnLatency: metric.NewHistogram(
2415+
getHistogramOptionsForIOLatency(AuthLDAPConnLatency, time.Hour)),
2416+
AuthLDAPConnLatencyInternal: metric.NewHistogram(
2417+
getHistogramOptionsForIOLatency(AuthLDAPConnLatencyInternal, time.Hour)),
2418+
},
2419+
}
2420+
2421+
// LDAP
2422+
ldapDuration := int64(5)
2423+
ldapInternalDuration := int64(4)
2424+
c.publishConnLatencyMetric(ldapDuration, ldapHBAEntry.string())
2425+
c.publishConnLatencyInternalMetric(ldapInternalDuration, ldapHBAEntry.string())
2426+
w := c.metrics.AuthLDAPConnLatency.WindowedSnapshot()
2427+
count, sum := w.Total()
2428+
require.Equal(t, int64(1), count)
2429+
require.Equal(t, float64(5), sum)
2430+
2431+
wI := c.metrics.AuthLDAPConnLatencyInternal.WindowedSnapshot()
2432+
countI, sumI := wI.Total()
2433+
require.Equal(t, int64(1), countI)
2434+
require.Equal(t, float64(4), sumI)
2435+
2436+
// internal latency must be less than total latency.
2437+
require.Less(t, sumI, sum)
2438+
2439+
// republish on LDAP
2440+
ldapDuration = int64(2)
2441+
ldapInternalDuration = int64(1)
2442+
c.publishConnLatencyMetric(ldapDuration, ldapHBAEntry.string())
2443+
c.publishConnLatencyInternalMetric(ldapInternalDuration, ldapHBAEntry.string())
2444+
w = c.metrics.AuthLDAPConnLatency.WindowedSnapshot()
2445+
count, sum = w.Total()
2446+
require.Equal(t, int64(2), count)
2447+
require.Equal(t, float64(7), sum)
2448+
2449+
wI = c.metrics.AuthLDAPConnLatencyInternal.WindowedSnapshot()
2450+
countI, sumI = wI.Total()
2451+
require.Equal(t, int64(2), countI)
2452+
require.Equal(t, float64(5), sumI)
2453+
2454+
// internal latency must be less than total latency.
2455+
require.Less(t, sumI, sum)
2456+
}

pkg/sql/pgwire/server.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,12 @@ var (
224224
Measurement: "Nanoseconds",
225225
Unit: metric.Unit_NANOSECONDS,
226226
}
227+
AuthLDAPConnLatencyInternal = metric.Metadata{
228+
Name: "auth.ldap.conn.latency.internal",
229+
Help: "Internal Auth Latency to establish and authenticate a SQL connection using LDAP(excludes external LDAP calls)",
230+
Measurement: "Nanoseconds",
231+
Unit: metric.Unit_NANOSECONDS,
232+
}
227233
)
228234

229235
const (
@@ -377,6 +383,7 @@ type tenantSpecificMetrics struct {
377383
AuthLDAPConnLatency metric.IHistogram
378384
AuthGSSConnLatency metric.IHistogram
379385
AuthScramConnLatency metric.IHistogram
386+
AuthLDAPConnLatencyInternal metric.IHistogram
380387
}
381388

382389
func newTenantSpecificMetrics(
@@ -411,6 +418,8 @@ func newTenantSpecificMetrics(
411418
getHistogramOptionsForIOLatency(AuthGSSConnLatency, histogramWindow)),
412419
AuthScramConnLatency: metric.NewHistogram(
413420
getHistogramOptionsForIOLatency(AuthScramConnLatency, histogramWindow)),
421+
AuthLDAPConnLatencyInternal: metric.NewHistogram(
422+
getHistogramOptionsForIOLatency(AuthLDAPConnLatencyInternal, histogramWindow)),
414423
}
415424
}
416425

0 commit comments

Comments
 (0)