Skip to content

Commit 1aabeeb

Browse files
craig[bot]souravcrl
andcommitted
Merge #151105
151105: pgwire: publish authentication latency internal metrics r=pritesh-lahoti a=souravcrl Currently, we don't have a way of estimating how much time external authentication methods like ldap take for an auth attempt. With provisioning and authorization enabled, we will be adding additional latency for the auth flow. Hence, we need to gather additional metrics for enabling user auth: - Introduce a logging metric to complement auth_ldap_conn_latency cockroachdbMetric. This will be called auth.ldap.conn.latency.internal - This will be the actual auth time (authentication +provisioning + authorization) time. We will get the RTT times to the ldap server (bind service plus search user plus bind user plus search groups) and subtract it from the total auth time. fixes #148874 Epic CRDB-21590 Release note(ops change): `auth.ldap.conn.latency.internal` has been added to denote the internal authentication time for ldap auth method. Co-authored-by: souravcrl <[email protected]>
2 parents 8275999 + eb40564 commit 1aabeeb

File tree

6 files changed

+126
-1
lines changed

6 files changed

+126
-1
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,14 @@ layers:
11361136
unit: NANOSECONDS
11371137
aggregation: AVG
11381138
derivative: NONE
1139+
- name: auth.ldap.conn.latency.internal
1140+
exported_name: auth_ldap_conn_latency_internal
1141+
description: Internal Auth Latency to establish and authenticate a SQL connection using LDAP(excludes external LDAP calls)
1142+
y_axis_label: Nanoseconds
1143+
type: HISTOGRAM
1144+
unit: NANOSECONDS
1145+
aggregation: AVG
1146+
derivative: NONE
11391147
- name: auth.password.conn.latency
11401148
exported_name: auth_password_conn_latency
11411149
description: Latency to establish and authenticate a SQL connection using password

pkg/sql/pgwire/auth.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,17 @@ func (c *conn) handleAuthentication(
281281
duration := timeutil.Since(authStartTime).Nanoseconds()
282282
c.publishConnLatencyMetric(duration, hbaEntry.Method.String())
283283

284+
// Get the external auth duration which is time spent on external service by
285+
// the AuthMethod.
286+
externalDuration := behaviors.GetTotalExternalAuthTime().Nanoseconds()
287+
288+
// Compute the internal authentication latency needed to serve a SQL query.
289+
// The internal latency is the time spent on the connection for the actual
290+
// auth time (authentication +provisioning + authorization) time. We will get
291+
// the RTT times to the IDP server and subtract it from the total auth time.
292+
// The metric published is based on the authentication type.
293+
c.publishConnLatencyInternalMetric(duration-externalDuration, hbaEntry.Method.String())
294+
284295
server.lastLoginUpdater.updateLastLoginTime(ctx, dbUser)
285296

286297
return connClose, nil
@@ -305,6 +316,15 @@ func (c *conn) publishConnLatencyMetric(duration int64, authMethod string) {
305316
}
306317
}
307318

319+
// publishConnLatencyInternalMetric publishes the internal latency of the
320+
// connection based on the authentication method.
321+
func (c *conn) publishConnLatencyInternalMetric(internalDuration int64, authMethod string) {
322+
switch authMethod {
323+
case ldapHBAEntry.string():
324+
c.metrics.AuthLDAPConnLatencyInternal.RecordValue(internalDuration)
325+
}
326+
}
327+
308328
func (c *conn) authOKMessage() error {
309329
c.msgBuilder.initMsg(pgwirebase.ServerMsgAuth)
310330
c.msgBuilder.putInt32(authOK)

pkg/sql/pgwire/auth_behaviors.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package pgwire
88
import (
99
"context"
1010
"sync"
11+
"time"
1112

1213
"github.com/cockroachdb/cockroach/pkg/security/provisioning"
1314
"github.com/cockroachdb/cockroach/pkg/security/username"
@@ -33,7 +34,8 @@ type AuthBehaviors struct {
3334
sync.Once
3435
pc provisioning.UserProvisioningConfig
3536
}
36-
provisioner Provisioner
37+
provisioner Provisioner
38+
externalAuthTime time.Duration
3739
}
3840

3941
// Ensure that an AuthBehaviors is easily composable with itself.
@@ -178,3 +180,17 @@ func (b *AuthBehaviors) MaybeProvisionUser(
178180
}
179181
return nil
180182
}
183+
184+
// AddExternalAuthTime adds a time duration to the externalAuthTime during auth.
185+
// The AuthMethod implementation adds new time durations which track time spent
186+
// where auth is waiting on a response from an external service like making an
187+
// API call or validating a response from an external service.
188+
func (b *AuthBehaviors) AddExternalAuthTime(t time.Duration) {
189+
b.externalAuthTime += t
190+
}
191+
192+
// GetTotalExternalAuthTime returns the total aggregated time spent on external
193+
// service during authentication.
194+
func (b *AuthBehaviors) GetTotalExternalAuthTime() time.Duration {
195+
return b.externalAuthTime
196+
}

pkg/sql/pgwire/auth_methods.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
3333
"github.com/cockroachdb/cockroach/pkg/util/log"
3434
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
35+
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
3536
"github.com/cockroachdb/cockroach/pkg/util/uuid"
3637
"github.com/cockroachdb/errors"
3738
"github.com/cockroachdb/redact"
@@ -1084,6 +1085,9 @@ func AuthLDAP(
10841085
b := &AuthBehaviors{}
10851086
b.SetRoleMapper(UseSpecifiedIdentity(sessionUser))
10861087

1088+
// Audit the lookup start time.
1089+
ldapLookupStartTime := timeutil.Now()
1090+
10871091
ldapUserDN, detailedErrors, authError := ldapManager.m.FetchLDAPUserDN(sCtx, execCfg.Settings, sessionUser, entry, identMap)
10881092
if authError != nil {
10891093
errForLog := authError
@@ -1098,6 +1102,9 @@ func AuthLDAP(
10981102
b.SetReplacementIdentity(ldapUserDN.String())
10991103
}
11001104

1105+
// Add the total lookup time to the external auth time.
1106+
b.AddExternalAuthTime(timeutil.Since(ldapLookupStartTime))
1107+
11011108
b.SetAuthenticator(func(
11021109
ctx context.Context, systemIdentity string, clientConnection bool, _ PasswordRetrievalFn, _ *ldap.DN,
11031110
) error {
@@ -1138,6 +1145,10 @@ func AuthLDAP(
11381145
if len(ldapPwd) == 0 {
11391146
return security.NewErrPasswordUserAuthFailed(sessionUser)
11401147
}
1148+
1149+
// Audit the authN start time.
1150+
ldapAuthNStartTime := timeutil.Now()
1151+
11411152
if detailedErrors, authError := ldapManager.m.ValidateLDAPLogin(
11421153
ctx, execCfg.Settings, ldapUserDN, sessionUser, ldapPwd, entry, identMap,
11431154
); authError != nil {
@@ -1148,6 +1159,9 @@ func AuthLDAP(
11481159
c.LogAuthFailed(ctx, eventpb.AuthFailReason_CREDENTIALS_INVALID, errForLog)
11491160
return authError
11501161
}
1162+
1163+
// Add the total authN time to the external auth time.
1164+
b.AddExternalAuthTime(timeutil.Since(ldapAuthNStartTime))
11511165
return nil
11521166
})
11531167

@@ -1184,6 +1198,9 @@ func AuthLDAP(
11841198
return err
11851199
}
11861200

1201+
// Audit the authZ start time.
1202+
ldapAuthZStartTime := timeutil.Now()
1203+
11871204
if ldapGroups, detailedErrors, authError := ldapManager.m.FetchLDAPGroups(
11881205
ctx, execCfg.Settings, ldapUserDN, sessionUser, entry, identMap,
11891206
); authError != nil {
@@ -1194,6 +1211,9 @@ func AuthLDAP(
11941211
c.LogAuthFailed(ctx, eventpb.AuthFailReason_AUTHORIZATION_ERROR, errForLog)
11951212
return authError
11961213
} else {
1214+
// Add the total authZ time to the external auth time.
1215+
b.AddExternalAuthTime(timeutil.Since(ldapAuthZStartTime))
1216+
11971217
c.LogAuthInfof(ctx, redact.Sprintf("LDAP authorization sync succeeded; attempting to assign roles for LDAP groups: %s", ldapGroups))
11981218
// Parse and apply transformation to LDAP group DNs for roles granter.
11991219
sqlRoles := make([]username.SQLUsername, 0, len(ldapGroups))

pkg/sql/pgwire/conn_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2303,6 +2303,8 @@ func TestPublishConnLatencyMetric(t *testing.T) {
23032303
getHistogramOptionsForIOLatency(AuthGSSConnLatency, time.Hour)),
23042304
AuthScramConnLatency: metric.NewHistogram(
23052305
getHistogramOptionsForIOLatency(AuthScramConnLatency, time.Hour)),
2306+
AuthLDAPConnLatencyInternal: metric.NewHistogram(
2307+
getHistogramOptionsForIOLatency(AuthLDAPConnLatencyInternal, time.Hour)),
23062308
},
23072309
}
23082310

@@ -2402,3 +2404,53 @@ func TestPublishConnLatencyMetric(t *testing.T) {
24022404
require.Equal(t, int64(2), count)
24032405
require.Equal(t, float64(9), sum)
24042406
}
2407+
2408+
// write unit tests for the function publishConnLatencyInternalMetric
2409+
func TestPublishConnLatencyInternalMetric(t *testing.T) {
2410+
defer leaktest.AfterTest(t)()
2411+
2412+
c := conn{
2413+
metrics: &tenantSpecificMetrics{
2414+
AuthLDAPConnLatency: metric.NewHistogram(
2415+
getHistogramOptionsForIOLatency(AuthLDAPConnLatency, time.Hour)),
2416+
AuthLDAPConnLatencyInternal: metric.NewHistogram(
2417+
getHistogramOptionsForIOLatency(AuthLDAPConnLatencyInternal, time.Hour)),
2418+
},
2419+
}
2420+
2421+
// LDAP
2422+
ldapDuration := int64(5)
2423+
ldapInternalDuration := int64(4)
2424+
c.publishConnLatencyMetric(ldapDuration, ldapHBAEntry.string())
2425+
c.publishConnLatencyInternalMetric(ldapInternalDuration, ldapHBAEntry.string())
2426+
w := c.metrics.AuthLDAPConnLatency.WindowedSnapshot()
2427+
count, sum := w.Total()
2428+
require.Equal(t, int64(1), count)
2429+
require.Equal(t, float64(5), sum)
2430+
2431+
wI := c.metrics.AuthLDAPConnLatencyInternal.WindowedSnapshot()
2432+
countI, sumI := wI.Total()
2433+
require.Equal(t, int64(1), countI)
2434+
require.Equal(t, float64(4), sumI)
2435+
2436+
// internal latency must be less than total latency.
2437+
require.Less(t, sumI, sum)
2438+
2439+
// republish on LDAP
2440+
ldapDuration = int64(2)
2441+
ldapInternalDuration = int64(1)
2442+
c.publishConnLatencyMetric(ldapDuration, ldapHBAEntry.string())
2443+
c.publishConnLatencyInternalMetric(ldapInternalDuration, ldapHBAEntry.string())
2444+
w = c.metrics.AuthLDAPConnLatency.WindowedSnapshot()
2445+
count, sum = w.Total()
2446+
require.Equal(t, int64(2), count)
2447+
require.Equal(t, float64(7), sum)
2448+
2449+
wI = c.metrics.AuthLDAPConnLatencyInternal.WindowedSnapshot()
2450+
countI, sumI = wI.Total()
2451+
require.Equal(t, int64(2), countI)
2452+
require.Equal(t, float64(5), sumI)
2453+
2454+
// internal latency must be less than total latency.
2455+
require.Less(t, sumI, sum)
2456+
}

pkg/sql/pgwire/server.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,12 @@ var (
224224
Measurement: "Nanoseconds",
225225
Unit: metric.Unit_NANOSECONDS,
226226
}
227+
AuthLDAPConnLatencyInternal = metric.Metadata{
228+
Name: "auth.ldap.conn.latency.internal",
229+
Help: "Internal Auth Latency to establish and authenticate a SQL connection using LDAP(excludes external LDAP calls)",
230+
Measurement: "Nanoseconds",
231+
Unit: metric.Unit_NANOSECONDS,
232+
}
227233
)
228234

229235
const (
@@ -377,6 +383,7 @@ type tenantSpecificMetrics struct {
377383
AuthLDAPConnLatency metric.IHistogram
378384
AuthGSSConnLatency metric.IHistogram
379385
AuthScramConnLatency metric.IHistogram
386+
AuthLDAPConnLatencyInternal metric.IHistogram
380387
}
381388

382389
func newTenantSpecificMetrics(
@@ -411,6 +418,8 @@ func newTenantSpecificMetrics(
411418
getHistogramOptionsForIOLatency(AuthGSSConnLatency, histogramWindow)),
412419
AuthScramConnLatency: metric.NewHistogram(
413420
getHistogramOptionsForIOLatency(AuthScramConnLatency, histogramWindow)),
421+
AuthLDAPConnLatencyInternal: metric.NewHistogram(
422+
getHistogramOptionsForIOLatency(AuthLDAPConnLatencyInternal, histogramWindow)),
414423
}
415424
}
416425

0 commit comments

Comments
 (0)