Skip to content

Commit 67ce864

Browse files
authored
feat: filter by user uid (#26)
## Summary This PR makes additional improvements to platform-wide querying performance, adjusts the clickhouse audit log schema to use the correct timestamp for the request, adds support for querying by the user's UID, and adjusts the user-scoped projection to use the user's UID value instead of the username. ## Details - **Filter by User's UID** - Filtering by UID can be valuable to filter down to a specific user using a stable identifier instead of an email which can be changed by the user. UIDs are also only in place for users of the platform. Internal components that authenticate with certificates do not have UIDs. This gives us a clean way of filtering out internal components from audit logs. - **Request Received Timestamp** - I swapped to using the `.requestReceivedTimestamp` field of the audit log to represent the audit log's timestamp since it's the timestamp when the request was received by the apiserver. The `.stageTimestamp` is used by the collection pipeline to calculate delays in the pipeline because the timestamp indicates when the audit log was generated by the apiserver. - **User UID for user scope** - I swapped to using the user's UID as the filtering / sorting column when querying the audit log system through the user scope since the UID is the stable identifier for the user and is the value that's provided in the user's extra information. - **Hourly timestamp buckets** - Updated all projections to use the same hourly time bucketing introduced in #23. --- Relates to datum-cloud/enhancements#536
2 parents b0ad85e + ca841cd commit 67ce864

File tree

12 files changed

+154
-41
lines changed

12 files changed

+154
-41
lines changed

config/components/clickhouse-migrations/configmap.yaml

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -328,9 +328,10 @@ data:
328328
event_json String CODEC(ZSTD(3)),
329329
330330
-- Core timestamp (always queried)
331+
-- Uses requestReceivedTimestamp which represents when the API server received the request.
331332
timestamp DateTime64(3) MATERIALIZED
332333
coalesce(
333-
parseDateTime64BestEffortOrNull(JSONExtractString(event_json, 'stageTimestamp')),
334+
parseDateTime64BestEffortOrNull(JSONExtractString(event_json, 'requestReceivedTimestamp')),
334335
now64(3)
335336
),
336337
@@ -354,6 +355,12 @@ data:
354355
''
355356
),
356357
358+
user_uid String MATERIALIZED
359+
coalesce(
360+
JSONExtractString(event_json, 'user', 'uid'),
361+
''
362+
),
363+
357364
-- Request identity
358365
audit_id UUID MATERIALIZED
359366
toUUIDOrZero(coalesce(JSONExtractString(event_json, 'auditID'), '')),
@@ -390,6 +397,7 @@ data:
390397
INDEX bf_api_resource (api_group, resource) TYPE bloom_filter(0.01) GRANULARITY 1,
391398
INDEX idx_verb_resource_bloom (verb, resource) TYPE bloom_filter(0.01) GRANULARITY 1,
392399
INDEX idx_user_bloom user TYPE bloom_filter(0.001) GRANULARITY 1,
400+
INDEX idx_user_uid_bloom user_uid TYPE bloom_filter(0.001) GRANULARITY 1,
393401
394402
-- Set indexes for low-cardinality columns
395403
INDEX idx_status_code_set status_code TYPE set(100) GRANULARITY 4,
@@ -418,38 +426,64 @@ data:
418426
-- This projection is optimized for platform-wide queries that filter by
419427
-- timestamp, api_group, and resource (common for cross-tenant analytics).
420428
--
421-
-- Sort order: (timestamp, api_group, resource, audit_id)
429+
-- Sort order: (toStartOfHour(timestamp), api_group, resource, audit_id, timestamp)
422430
-- Use cases:
423431
-- - "All events for 'apps' API group and 'deployments' resource in last 24 hours"
424432
-- - "All events for core API 'pods' resource"
425433
-- - Platform-wide verb/resource filtering
426434
--
435+
-- Hour bucketing improves compression, data locality, and deduplication efficiency.
427436
428437
ALTER TABLE audit.events
429438
ADD PROJECTION platform_query_projection
430439
(
431440
SELECT *
432-
ORDER BY (timestamp, api_group, resource, audit_id)
441+
ORDER BY (toStartOfHour(timestamp), api_group, resource, audit_id, timestamp)
433442
);
434443
435444
-- ============================================================================
436445
-- Step 4: Add User Query Projection
437446
-- ============================================================================
438-
-- This projection is optimized for user-specific queries within time ranges.
447+
-- This projection is optimized for username-based queries within time ranges.
439448
--
440-
-- Sort order: (timestamp, user, api_group, resource)
449+
-- Sort order: (toStartOfHour(timestamp), user, api_group, resource, audit_id, timestamp)
441450
-- Use cases:
442451
-- - "What did [email protected] do in the last 24 hours?"
443452
-- - "All events by system:serviceaccount:kube-system:default"
444-
-- - User-specific verb/resource filtering
453+
-- - Platform admin filtering by username in CEL expressions
445454
--
455+
-- Hour bucketing improves compression, data locality, and deduplication efficiency.
446456
-- ClickHouse automatically chooses the best projection for each query based
447457
-- on the WHERE clause filters.
448458
449459
ALTER TABLE audit.events
450460
ADD PROJECTION user_query_projection
451461
(
452462
SELECT *
453-
ORDER BY (timestamp, user, api_group, resource)
463+
ORDER BY (toStartOfHour(timestamp), user, api_group, resource, audit_id, timestamp)
464+
);
465+
466+
-- ============================================================================
467+
-- Step 5: Add User UID Query Projection
468+
-- ============================================================================
469+
-- This projection is optimized for user-scoped queries by UID.
470+
--
471+
-- Sort order: (toStartOfHour(timestamp), user_uid, api_group, resource, audit_id, timestamp)
472+
-- Use cases:
473+
-- - User-scoped queries: "Show all activity by user with UID abc-123"
474+
-- - Cross-organization user activity tracking
475+
-- - User-specific audit trail regardless of username changes
476+
--
477+
-- This projection is used when scope.type == "user" to filter by user_uid
478+
-- instead of scope_name, enabling queries for a user's activity across all
479+
-- organizations and projects on the platform.
480+
--
481+
-- Hour bucketing improves compression, data locality, and deduplication efficiency.
482+
483+
ALTER TABLE audit.events
484+
ADD PROJECTION user_uid_query_projection
485+
(
486+
SELECT *
487+
ORDER BY (toStartOfHour(timestamp), user_uid, api_group, resource, audit_id, timestamp)
454488
);
455489

docs/api.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ _Appears in:_
7878
| --- | --- | --- | --- |
7979
| `startTime` _string_ | StartTime is the beginning of your search window (inclusive).<br /><br />Format Options:<br />- Relative: "now-30d", "now-2h", "now-30m" (units: s, m, h, d, w)<br /> Use for dashboards and recurring queries - they adjust automatically.<br />- Absolute: "2024-01-01T00:00:00Z" (RFC3339 with timezone)<br /> Use for historical analysis of specific time periods.<br /><br />Examples:<br /> "now-30d" → 30 days ago<br /> "2024-06-15T14:30:00-05:00" → specific time with timezone offset | | |
8080
| `endTime` _string_ | EndTime is the end of your search window (exclusive).<br /><br />Uses the same formats as StartTime. Commonly "now" for current moment.<br />Must be greater than StartTime.<br /><br />Examples:<br /> "now" → current time<br /> "2024-01-02T00:00:00Z" → specific end point | | |
81-
| `filter` _string_ | Filter narrows results using CEL (Common Expression Language). Leave empty to get all events.<br /><br />Available Fields:<br /> verb - API action: get, list, create, update, patch, delete, watch<br /> auditID - unique event identifier<br /> stageTimestamp - when this stage occurred (RFC3339 timestamp)<br /> user.username - who made the request (user or service account)<br /> responseStatus.code - HTTP response code (200, 201, 404, 500, etc.)<br /> objectRef.namespace - target resource namespace<br /> objectRef.resource - resource type (pods, deployments, secrets, configmaps, etc.)<br /> objectRef.name - specific resource name<br /><br />Operators: ==, !=, <, >, <=, >=, &&, \|\|, in<br />String Functions: startsWith(), endsWith(), contains()<br /><br />Common Patterns:<br /> "verb == 'delete'" - All deletions<br /> "objectRef.namespace == 'production'" - Activity in production namespace<br /> "verb in ['create', 'update', 'delete', 'patch']" - All write operations<br /> "responseStatus.code >= 400" - Failed requests<br /> "user.username.startsWith('system:serviceaccount:')" - Service account activity<br /> "objectRef.resource == 'secrets'" - Secret access<br /> "verb == 'delete' && objectRef.namespace == 'production'" - Production deletions<br /><br />Note: Use single quotes for strings. Field names are case-sensitive.<br />CEL reference: https://cel.dev | | |
81+
| `filter` _string_ | Filter narrows results using CEL (Common Expression Language). Leave empty to get all events.<br /><br />Available Fields:<br /> verb - API action: get, list, create, update, patch, delete, watch<br /> auditID - unique event identifier<br /> requestReceivedTimestamp - when the API server received the request (RFC3339 timestamp)<br /> user.username - who made the request (user or service account)<br /> user.uid - unique user identifier (stable across username changes)<br /> responseStatus.code - HTTP response code (200, 201, 404, 500, etc.)<br /> objectRef.namespace - target resource namespace<br /> objectRef.resource - resource type (pods, deployments, secrets, configmaps, etc.)<br /> objectRef.name - specific resource name<br /><br />Operators: ==, !=, <, >, <=, >=, &&, \|\|, in<br />String Functions: startsWith(), endsWith(), contains()<br /><br />Common Patterns:<br /> "verb == 'delete'" - All deletions<br /> "objectRef.namespace == 'production'" - Activity in production namespace<br /> "verb in ['create', 'update', 'delete', 'patch']" - All write operations<br /> "responseStatus.code >= 400" - Failed requests<br /> "user.username.startsWith('system:serviceaccount:')" - Service account activity<br /> "user.uid == '550e8400-e29b-41d4-a716-446655440000'" - Specific user by UID<br /> "objectRef.resource == 'secrets'" - Secret access<br /> "verb == 'delete' && objectRef.namespace == 'production'" - Production deletions<br /><br />Note: Use single quotes for strings. Field names are case-sensitive.<br />CEL reference: https://cel.dev | | |
8282
| `limit` _integer_ | Limit sets the maximum number of results per page.<br />Default: 100, Maximum: 1000.<br /><br />Use smaller values (10-50) for exploration, larger (500-1000) for data collection.<br />Use continue to fetch additional pages. | | |
8383
| `continue` _string_ | Continue is the pagination cursor for fetching additional pages.<br /><br />Leave empty for the first page. If status.continue is non-empty after a query,<br />copy that value here in a new query with identical parameters to get the next page.<br />Repeat until status.continue is empty.<br /><br />Important: Keep all other parameters (startTime, endTime, filter, limit) identical<br />across paginated requests. The cursor is opaque - copy it exactly without modification. | | |
8484

@@ -96,7 +96,7 @@ _Appears in:_
9696

9797
| Field | Description | Default | Validation |
9898
| --- | --- | --- | --- |
99-
| `results` _Event array_ | Results contains matching audit events, sorted newest-first.<br /><br />Each event follows the Kubernetes audit.Event format with fields like:<br /> verb, user.username, objectRef.\{namespace,resource,name\}, stageTimestamp,<br /> responseStatus.code, requestObject, responseObject<br /><br />Empty results? Try broadening your filter or time range.<br />Full documentation: https://kubernetes.io/docs/reference/config-api/apiserver-audit.v1/ | | |
99+
| `results` _Event array_ | Results contains matching audit events, sorted newest-first.<br /><br />Each event follows the Kubernetes audit.Event format with fields like:<br /> verb, user.username, objectRef.\{namespace,resource,name\}, requestReceivedTimestamp,<br /> stageTimestamp, responseStatus.code, requestObject, responseObject<br /><br />Empty results? Try broadening your filter or time range.<br />Full documentation: https://kubernetes.io/docs/reference/config-api/apiserver-audit.v1/ | | |
100100
| `continue` _string_ | Continue is the pagination cursor.<br />Non-empty means more results are available - copy this to spec.continue for the next page.<br />Empty means you have all results. | | |
101101
| `effectiveStartTime` _string_ | EffectiveStartTime is the actual start time used for this query (RFC3339 format).<br /><br />When you use relative times like "now-7d", this shows the exact timestamp that was<br />calculated. Useful for understanding exactly what time range was queried, especially<br />for auditing, debugging, or recreating queries with absolute timestamps.<br /><br />Example: If you query with startTime="now-7d" at 2025-12-17T12:00:00Z,<br />this will be "2025-12-10T12:00:00Z". | | |
102102
| `effectiveEndTime` _string_ | EffectiveEndTime is the actual end time used for this query (RFC3339 format).<br /><br />When you use relative times like "now", this shows the exact timestamp that was<br />calculated. Useful for understanding exactly what time range was queried.<br /><br />Example: If you query with endTime="now" at 2025-12-17T12:00:00Z,<br />this will be "2025-12-17T12:00:00Z". | | |

internal/cel/cel_test.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,20 @@ func TestCELFilterWorkflow(t *testing.T) {
100100
wantArgCount: 1,
101101
wantErr: false,
102102
},
103+
{
104+
name: "user.uid filter",
105+
filter: "user.uid == '550e8400-e29b-41d4-a716-446655440000'",
106+
wantSQL: "user_uid = {arg1}",
107+
wantArgCount: 1,
108+
wantErr: false,
109+
},
110+
{
111+
name: "combined user.username and user.uid",
112+
filter: "user.username == '[email protected]' || user.uid == '550e8400-e29b-41d4-a716-446655440000'",
113+
wantSQL: "(user = {arg1} OR user_uid = {arg2})",
114+
wantArgCount: 2,
115+
wantErr: false,
116+
},
103117
}
104118

105119
for _, tt := range tests {
@@ -146,7 +160,7 @@ func TestCELFilterCompilation(t *testing.T) {
146160
},
147161
{
148162
name: "valid timestamp comparison",
149-
filter: "stageTimestamp >= timestamp('2024-01-01T00:00:00Z')",
163+
filter: "requestReceivedTimestamp >= timestamp('2024-01-01T00:00:00Z')",
150164
wantErr: false,
151165
},
152166
{
@@ -207,7 +221,7 @@ func TestSQLConversionEdgeCases(t *testing.T) {
207221
}{
208222
{
209223
name: "timestamp parameter is correctly formatted",
210-
filter: "stageTimestamp >= timestamp('2024-01-01T00:00:00Z')",
224+
filter: "requestReceivedTimestamp >= timestamp('2024-01-01T00:00:00Z')",
211225
validate: func(t *testing.T, sql string, args []interface{}) {
212226
if len(args) != 1 {
213227
t.Errorf("Expected 1 arg, got %d", len(args))
@@ -266,7 +280,7 @@ func TestEnvironment(t *testing.T) {
266280
validExpressions := []string{
267281
"auditID == 'test'",
268282
"verb == 'delete'",
269-
"stageTimestamp > timestamp('2024-01-01T00:00:00Z')",
283+
"requestReceivedTimestamp > timestamp('2024-01-01T00:00:00Z')",
270284
"objectRef.namespace == 'default'",
271285
"objectRef.resource == 'pods'",
272286
"objectRef.name == 'my-pod'",

internal/cel/errors.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ func formatFilterError(err error) string {
5050
msg.WriteString(fmt.Sprintf("Invalid filter: %s", errMsg))
5151
}
5252

53-
msg.WriteString(". Available fields: auditID, verb, stageTimestamp, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.groups, responseStatus.code")
53+
msg.WriteString(". Available fields: auditID, verb, requestReceivedTimestamp, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.groups, responseStatus.code")
5454
msg.WriteString(". See https://cel.dev for CEL syntax")
5555

5656
return msg.String()

internal/cel/filter.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,11 @@ var tracer = otel.Tracer("activity-cel-filter")
2020

2121
// Environment creates a CEL environment for audit event filtering.
2222
//
23-
// Available fields: auditID, verb, stageTimestamp,
24-
// objectRef.{namespace,resource,name}, user.username, responseStatus.code
23+
// Available fields: auditID, verb, requestReceivedTimestamp,
24+
// objectRef.{namespace,resource,name}, user.username, user.uid, responseStatus.code
25+
//
26+
// Note: stageTimestamp is intentionally NOT available for filtering as it should
27+
// only be used for internal pipeline delay calculations, not for querying events.
2528
//
2629
// Supports standard CEL operators (==, &&, ||, in) and string methods
2730
// (startsWith, endsWith, contains).
@@ -33,7 +36,7 @@ func Environment() (*cel.Env, error) {
3336
return cel.NewEnv(
3437
cel.Variable("auditID", cel.StringType),
3538
cel.Variable("verb", cel.StringType),
36-
cel.Variable("stageTimestamp", cel.TimestampType),
39+
cel.Variable("requestReceivedTimestamp", cel.TimestampType),
3740

3841
cel.Variable("objectRef", objectRefType),
3942
cel.Variable("user", userType),
@@ -320,7 +323,7 @@ func (c *sqlConverter) convertIdentExpr(ident *expr.Expr_Ident) (string, error)
320323
return "audit_id", nil
321324
case "verb":
322325
return "verb", nil
323-
case "stageTimestamp":
326+
case "requestReceivedTimestamp":
324327
return "timestamp", nil
325328

326329
case "objectRef", "user", "responseStatus":
@@ -381,13 +384,15 @@ func (c *sqlConverter) convertSelectExpr(sel *expr.Expr_Select) (string, error)
381384

382385
case baseObject == "user" && field == "username":
383386
return "user", nil
387+
case baseObject == "user" && field == "uid":
388+
return "user_uid", nil
384389

385390
case baseObject == "responseStatus" && field == "code":
386391
return "status_code", nil
387392

388393
default:
389394
// Provide helpful suggestions for common fields that aren't filterable
390-
return "", fmt.Errorf("field '%s.%s' is not available for filtering. Available fields: auditID, verb, stageTimestamp, objectRef.apiGroup, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.groups, responseStatus.code", baseObject, field)
395+
return "", fmt.Errorf("field '%s.%s' is not available for filtering. Available fields: auditID, verb, requestReceivedTimestamp, objectRef.apiGroup, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.uid, user.groups, responseStatus.code", baseObject, field)
391396
}
392397
}
393398

internal/registry/activity/auditlog/scope.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@ const (
1515
// Used to restrict query results to the appropriate organizational boundary.
1616
type ScopeInfo struct {
1717
Type string // "platform", "organization", "project", "user"
18-
Name string // scope identifier (org name, project name, etc.)
18+
Name string // scope identifier (org name, project name, user UID, etc.)
1919
}
2020

2121
// ExtractScopeFromUser determines the audit log query scope from user authentication metadata.
2222
// Defaults to platform-wide scope when no parent resource is specified.
23+
//
24+
// For user scope, the Name field contains the user's UID (not username), which enables
25+
// querying all activity performed by that user across all organizations and projects.
2326
func ExtractScopeFromUser(u user.Info) ScopeInfo {
2427
if u.GetExtra() == nil {
2528
return ScopeInfo{Type: "platform", Name: ""}

internal/registry/activity/auditlog/scope_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ func TestExtractScopeFromUser(t *testing.T) {
3737
user: &user.DefaultInfo{
3838
Extra: map[string][]string{
3939
ParentKindExtraKey: {"User"},
40-
ParentNameExtraKey: {"john.doe"},
40+
ParentNameExtraKey: {"550e8400-e29b-41d4-a716-446655440000"},
4141
},
4242
},
43-
expected: ScopeInfo{Type: "user", Name: "john.doe"},
43+
expected: ScopeInfo{Type: "user", Name: "550e8400-e29b-41d4-a716-446655440000"},
4444
},
4545
{
4646
name: "no scope (platform)",

internal/registry/activity/auditlog/storage_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,18 @@ func TestQueryStorage_Create_ScopeExtraction(t *testing.T) {
217217
wantType: "project",
218218
wantName: "backend-api",
219219
},
220+
{
221+
name: "user scope",
222+
user: &user.DefaultInfo{
223+
Name: "user-scoped",
224+
Extra: map[string][]string{
225+
ParentKindExtraKey: {"User"},
226+
ParentNameExtraKey: {"550e8400-e29b-41d4-a716-446655440000"},
227+
},
228+
},
229+
wantType: "user",
230+
wantName: "550e8400-e29b-41d4-a716-446655440000",
231+
},
220232
{
221233
name: "platform scope (no extra)",
222234
user: &user.DefaultInfo{

0 commit comments

Comments
 (0)