Skip to content

Commit a66bd42

Browse files
appleboyclaude
andauthored
feat(cache): add configurable token verification cache layer (#132)
* feat(cache): add configurable token verification cache layer Add an optional cache layer (memory/Redis/Redis-aside) for token verification to reduce database queries on the ValidateToken hot path. The cache reuses the existing generic Cache[T] infrastructure and is disabled by default (TOKEN_CACHE_ENABLED=false). Key design decisions: - Only ValidateToken uses the cache; IntrospectToken (RFC 7662) always hits the database for authoritative real-time state - Single-token revocations immediately invalidate the cache - Bulk revocations (by user, by token family) collect hashes before deletion and invalidate each cached entry - Cache invalidation happens after transaction commit, not inside, to avoid premature eviction on rollback Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test(cache): add cache invalidation tests and fix silent error handling - Add TestRevokeTokenByID_CacheInvalidated for ID-based revocation - Add TestEnableToken_CacheInvalidated for disable/enable cycle - Add TestRevokeAllUserTokens_CacheInvalidated for bulk eviction - Add TestRefreshAccessToken_RotationMode_CacheInvalidated for rotation - Add TestRevokeTokenFamily_CacheInvalidated for replay detection - Log errors from GetActiveTokenHashesByFamilyID instead of discarding - Log errors from GetTokensByUserID in RevokeAllUserTokens - Add safe hash truncation in invalidateTokenCache error log Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(cache): only invalidate cache after successful revocation - Restructure RevokeAllUserTokens to skip cache invalidation when DB revocation fails, preventing stale cache entries on error - Clarify .env.example comment about cache TTL behavior during bulk revocation to reflect actual per-token invalidation logic Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(cache): graceful fallback on cache errors and strict token lookup - Fall back to direct DB lookup when cache backend fails (e.g. Redis unavailable) instead of rejecting valid tokens - Return error from GetAccessTokenByID in RevokeTokenByStatus to prevent cache entries from becoming stale when token lookup fails Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * style(cache): fix golines and testifylint issues - Break long log.Printf lines to satisfy golines max line length - Use require.Error instead of assert.Error for error assertions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test(cache): add tokeninfo integration tests and validation benchmarks - Add end-to-end handler tests verifying cache population, revoke/disable invalidation, and nil-cache regression for /oauth/tokeninfo - Add benchmarks comparing ValidateToken with and without memory cache (4.4x throughput improvement on cache hit path) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * refactor(test): simplify token cache tests - Remove redundant WHAT-comments, keep only WHY-comments - Deduplicate NewTokenService calls using core.Cache interface variable Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0d8fc6d commit a66bd42

23 files changed

Lines changed: 1128 additions & 13 deletions

.env.example

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,27 @@ EXPIRED_TOKEN_CLEANUP_INTERVAL=1h # How often to run the cleanup (default:
194194
# Client-side cache size per connection in MB for redis-aside mode only (default: 32MB)
195195
# CLIENT_COUNT_CACHE_SIZE_PER_CONN=32
196196

197+
# ============================================================
198+
# Token Cache Settings
199+
# ============================================================
200+
# Cache layer for token verification to reduce DB queries.
201+
# Disabled by default; enable for high-traffic or multi-node deployments.
202+
# TOKEN_CACHE_ENABLED=false
203+
204+
# Cache backend: memory (single instance), redis, or redis-aside (default: memory)
205+
# TOKEN_CACHE_TYPE=memory
206+
207+
# Cache lifetime (default: 5m). Bulk revocation attempts per-token cache invalidation; in rare
208+
# cases where cache invalidation is incomplete or fails, revoked tokens may remain valid in cache
209+
# for up to this duration. Single-token revocation invalidates cache immediately.
210+
# TOKEN_CACHE_TTL=5m
211+
212+
# Client-side cache TTL for redis-aside mode only (default: 30s)
213+
# TOKEN_CACHE_CLIENT_TTL=30s
214+
215+
# Client-side cache size per connection in MB for redis-aside mode only (default: 32MB)
216+
# TOKEN_CACHE_SIZE_PER_CONN=32
217+
197218
# ============================================================
198219
# Bootstrap and Shutdown Timeout Settings
199220
# ============================================================

internal/bootstrap/bootstrap.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ type Application struct {
3333
UserCacheCloser func() error
3434
ClientCountCache core.Cache[int64]
3535
ClientCountCacheCloser func() error
36+
TokenCache core.Cache[models.AccessToken]
37+
TokenCacheCloser func() error
3638
RateLimitRedisClient *redis.Client
3739

3840
// Services
@@ -114,6 +116,12 @@ func (app *Application) initializeInfrastructure(ctx context.Context) error {
114116
return err
115117
}
116118

119+
// Token Cache
120+
app.TokenCache, app.TokenCacheCloser, err = initializeTokenCache(ctx, app.Config)
121+
if err != nil {
122+
return err
123+
}
124+
117125
// Redis (for rate limiting)
118126
app.RateLimitRedisClient, err = initializeRateLimitRedisClient(ctx, app.Config)
119127
if err != nil {
@@ -144,6 +152,7 @@ func (app *Application) initializeBusinessLayer() {
144152
app.UserCache,
145153
app.ClientCountCache,
146154
app.TokenProvider,
155+
app.TokenCache,
147156
)
148157
}
149158

@@ -194,6 +203,7 @@ func (app *Application) startWithGracefulShutdown() {
194203
addCacheCleanupJob(m, app.MetricsCache, app.Config)
195204
addUserCacheCleanupJob(m, app.UserCache, app.Config)
196205
addClientCountCacheCleanupJob(m, app.ClientCountCache, app.Config)
206+
addTokenCacheCleanupJob(m, app.TokenCache, app.Config)
197207
addDatabaseShutdownJob(m, app.DB, app.Config)
198208
addAuditLogCleanupJob(m, app.Config, app.AuditService)
199209
addExpiredTokenCleanupJob(m, app.DB, app.Config)

internal/bootstrap/cache.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,23 @@ func initializeClientCountCache(
113113
})
114114
}
115115

116+
// initializeTokenCache initializes the token verification cache (disabled by default)
117+
func initializeTokenCache(
118+
ctx context.Context,
119+
cfg *config.Config,
120+
) (core.Cache[models.AccessToken], func() error, error) {
121+
if !cfg.TokenCacheEnabled {
122+
return nil, nil, nil
123+
}
124+
return initializeCache[models.AccessToken](ctx, cfg, cacheOpts{
125+
cacheType: cfg.TokenCacheType,
126+
keyPrefix: "authgate:tokens:",
127+
clientTTL: cfg.TokenCacheClientTTL,
128+
sizePerConn: cfg.TokenCacheSizePerConn,
129+
label: "Token",
130+
})
131+
}
132+
116133
// initializeUserCache initializes the user cache (always enabled, defaults to memory)
117134
func initializeUserCache(
118135
ctx context.Context,

internal/bootstrap/server.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,18 @@ func addClientCountCacheCleanupJob(
262262
addNamedCacheShutdownJob(m, "client count cache", clientCountCache.Close, cfg.CacheCloseTimeout)
263263
}
264264

265+
// addTokenCacheCleanupJob adds token cache cleanup on shutdown
266+
func addTokenCacheCleanupJob(
267+
m *graceful.Manager,
268+
tokenCache core.Cache[models.AccessToken],
269+
cfg *config.Config,
270+
) {
271+
if tokenCache == nil {
272+
return
273+
}
274+
addNamedCacheShutdownJob(m, "token cache", tokenCache.Close, cfg.CacheCloseTimeout)
275+
}
276+
265277
// addExpiredTokenCleanupJob adds a periodic job that purges expired access tokens
266278
// and device codes from the database to prevent unbounded table growth.
267279
func addExpiredTokenCleanupJob(m *graceful.Manager, db *store.Store, cfg *config.Config) {

internal/bootstrap/services.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ func initializeServices(
2727
userCache core.Cache[models.User],
2828
clientCountCache core.Cache[int64],
2929
tokenProvider core.TokenProvider,
30+
tokenCache core.Cache[models.AccessToken],
3031
) serviceSet {
3132
// Initialize authentication providers
3233
localProvider := auth.NewLocalAuthProvider(db)
@@ -51,6 +52,7 @@ func initializeServices(
5152
tokenProvider,
5253
auditService,
5354
prometheusMetrics,
55+
tokenCache,
5456
)
5557
clientService := services.NewClientService(
5658
db, auditService, clientCountCache, cfg.ClientCountCacheTTL,

internal/config/config.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,13 @@ type Config struct {
176176
ClientCountCacheClientTTL time.Duration // CLIENT_COUNT_CACHE_CLIENT_TTL for redis-aside (default: 10m)
177177
ClientCountCacheSizePerConn int // CLIENT_COUNT_CACHE_SIZE_PER_CONN for redis-aside in MB (default: 32MB)
178178

179+
// Token Cache settings (reduces DB queries for token verification)
180+
TokenCacheEnabled bool // TOKEN_CACHE_ENABLED: enable token verification cache (default: false)
181+
TokenCacheType string // TOKEN_CACHE_TYPE: memory|redis|redis-aside (default: memory)
182+
TokenCacheTTL time.Duration // TOKEN_CACHE_TTL: cache lifetime (default: 5m)
183+
TokenCacheClientTTL time.Duration // TOKEN_CACHE_CLIENT_TTL: redis-aside client-side TTL (default: 30s)
184+
TokenCacheSizePerConn int // TOKEN_CACHE_SIZE_PER_CONN: redis-aside size in MB (default: 32MB)
185+
179186
// Dynamic Client Registration (RFC 7591)
180187
EnableDynamicClientRegistration bool // Enable POST /oauth/register endpoint (default: false)
181188
DynamicClientRegistrationRateLimit int // Requests per minute for /oauth/register (default: 5)
@@ -371,6 +378,13 @@ func Load() *Config {
371378
32,
372379
), // 32MB default
373380

381+
// Token Cache settings
382+
TokenCacheEnabled: getEnvBool("TOKEN_CACHE_ENABLED", false),
383+
TokenCacheType: getEnv("TOKEN_CACHE_TYPE", CacheTypeMemory),
384+
TokenCacheTTL: getEnvDuration("TOKEN_CACHE_TTL", 5*time.Minute),
385+
TokenCacheClientTTL: getEnvDuration("TOKEN_CACHE_CLIENT_TTL", 30*time.Second),
386+
TokenCacheSizePerConn: getEnvInt("TOKEN_CACHE_SIZE_PER_CONN", 32), // 32MB default
387+
374388
// Dynamic Client Registration (RFC 7591)
375389
EnableDynamicClientRegistration: getEnvBool("ENABLE_DYNAMIC_CLIENT_REGISTRATION", false),
376390
DynamicClientRegistrationRateLimit: getEnvInt("DYNAMIC_CLIENT_REGISTRATION_RATE_LIMIT", 5),
@@ -546,6 +560,26 @@ func (c *Config) Validate() error {
546560
)
547561
}
548562

563+
// Token cache validation (only when enabled)
564+
if c.TokenCacheEnabled {
565+
if err := validateCacheType("TOKEN_CACHE_TYPE", c.TokenCacheType, c.RedisAddr); err != nil {
566+
return err
567+
}
568+
if c.TokenCacheTTL <= 0 {
569+
return fmt.Errorf(
570+
"TOKEN_CACHE_TTL must be a positive duration when TOKEN_CACHE_ENABLED=true (got %s)",
571+
c.TokenCacheTTL,
572+
)
573+
}
574+
if c.TokenCacheType == CacheTypeRedisAside && c.TokenCacheClientTTL <= 0 {
575+
return fmt.Errorf(
576+
"TOKEN_CACHE_CLIENT_TTL must be a positive duration when TOKEN_CACHE_TYPE=%q (got %s)",
577+
CacheTypeRedisAside,
578+
c.TokenCacheClientTTL,
579+
)
580+
}
581+
}
582+
549583
// SESSION_REMEMBER_ME_MAX_AGE must be positive when remember-me is enabled.
550584
// The gorilla/sessions cookie store codec has a default max-age of 30 days;
551585
// values above 2592000 (30 days) may cause cookie decode failures.

internal/core/store.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ type TokenReader interface {
7777
params types.PaginationParams,
7878
) ([]models.AccessToken, types.PaginationResult, error)
7979
GetTokensByCategoryAndStatus(userID, category, status string) ([]models.AccessToken, error)
80+
GetActiveTokenHashesByFamilyID(familyID string) ([]string, error)
8081
}
8182

8283
// TokenWriter groups token mutation operations.

internal/handlers/session_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func setupSessionServices(t *testing.T) (*store.Store, *services.TokenService) {
4141
auditSvc := services.NewAuditService(s, false, 0)
4242
deviceSvc := services.NewDeviceService(s, cfg, auditSvc, metrics.NewNoopMetrics())
4343
tokenSvc := services.NewTokenService(
44-
s, cfg, deviceSvc, localProvider, auditSvc, metrics.NewNoopMetrics(),
44+
s, cfg, deviceSvc, localProvider, auditSvc, metrics.NewNoopMetrics(), nil,
4545
)
4646

4747
return s, tokenSvc

0 commit comments

Comments
 (0)