diff --git a/config/components/clickhouse-migrations/configmap.yaml b/config/components/clickhouse-migrations/configmap.yaml index b56fe64..5f0d1cc 100644 --- a/config/components/clickhouse-migrations/configmap.yaml +++ b/config/components/clickhouse-migrations/configmap.yaml @@ -328,9 +328,10 @@ data: event_json String CODEC(ZSTD(3)), -- Core timestamp (always queried) + -- Uses requestReceivedTimestamp which represents when the API server received the request. timestamp DateTime64(3) MATERIALIZED coalesce( - parseDateTime64BestEffortOrNull(JSONExtractString(event_json, 'stageTimestamp')), + parseDateTime64BestEffortOrNull(JSONExtractString(event_json, 'requestReceivedTimestamp')), now64(3) ), @@ -354,6 +355,12 @@ data: '' ), + user_uid String MATERIALIZED + coalesce( + JSONExtractString(event_json, 'user', 'uid'), + '' + ), + -- Request identity audit_id UUID MATERIALIZED toUUIDOrZero(coalesce(JSONExtractString(event_json, 'auditID'), '')), @@ -390,6 +397,7 @@ data: INDEX bf_api_resource (api_group, resource) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_verb_resource_bloom (verb, resource) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_user_bloom user TYPE bloom_filter(0.001) GRANULARITY 1, + INDEX idx_user_uid_bloom user_uid TYPE bloom_filter(0.001) GRANULARITY 1, -- Set indexes for low-cardinality columns INDEX idx_status_code_set status_code TYPE set(100) GRANULARITY 4, @@ -418,31 +426,33 @@ data: -- This projection is optimized for platform-wide queries that filter by -- timestamp, api_group, and resource (common for cross-tenant analytics). -- - -- Sort order: (timestamp, api_group, resource, audit_id) + -- Sort order: (toStartOfHour(timestamp), api_group, resource, audit_id, timestamp) -- Use cases: -- - "All events for 'apps' API group and 'deployments' resource in last 24 hours" -- - "All events for core API 'pods' resource" -- - Platform-wide verb/resource filtering -- + -- Hour bucketing improves compression, data locality, and deduplication efficiency. ALTER TABLE audit.events ADD PROJECTION platform_query_projection ( SELECT * - ORDER BY (timestamp, api_group, resource, audit_id) + ORDER BY (toStartOfHour(timestamp), api_group, resource, audit_id, timestamp) ); -- ============================================================================ -- Step 4: Add User Query Projection -- ============================================================================ - -- This projection is optimized for user-specific queries within time ranges. + -- This projection is optimized for username-based queries within time ranges. -- - -- Sort order: (timestamp, user, api_group, resource) + -- Sort order: (toStartOfHour(timestamp), user, api_group, resource, audit_id, timestamp) -- Use cases: -- - "What did alice@example.com do in the last 24 hours?" -- - "All events by system:serviceaccount:kube-system:default" - -- - User-specific verb/resource filtering + -- - Platform admin filtering by username in CEL expressions -- + -- Hour bucketing improves compression, data locality, and deduplication efficiency. -- ClickHouse automatically chooses the best projection for each query based -- on the WHERE clause filters. @@ -450,6 +460,30 @@ data: ADD PROJECTION user_query_projection ( SELECT * - ORDER BY (timestamp, user, api_group, resource) + ORDER BY (toStartOfHour(timestamp), user, api_group, resource, audit_id, timestamp) + ); + + -- ============================================================================ + -- Step 5: Add User UID Query Projection + -- ============================================================================ + -- This projection is optimized for user-scoped queries by UID. + -- + -- Sort order: (toStartOfHour(timestamp), user_uid, api_group, resource, audit_id, timestamp) + -- Use cases: + -- - User-scoped queries: "Show all activity by user with UID abc-123" + -- - Cross-organization user activity tracking + -- - User-specific audit trail regardless of username changes + -- + -- This projection is used when scope.type == "user" to filter by user_uid + -- instead of scope_name, enabling queries for a user's activity across all + -- organizations and projects on the platform. + -- + -- Hour bucketing improves compression, data locality, and deduplication efficiency. + + ALTER TABLE audit.events + ADD PROJECTION user_uid_query_projection + ( + SELECT * + ORDER BY (toStartOfHour(timestamp), user_uid, api_group, resource, audit_id, timestamp) ); diff --git a/docs/api.md b/docs/api.md index f12e80d..8c7619b 100644 --- a/docs/api.md +++ b/docs/api.md @@ -78,7 +78,7 @@ _Appears in:_ | --- | --- | --- | --- | | `startTime` _string_ | StartTime is the beginning of your search window (inclusive).

Format Options:
- Relative: "now-30d", "now-2h", "now-30m" (units: s, m, h, d, w)
Use for dashboards and recurring queries - they adjust automatically.
- Absolute: "2024-01-01T00:00:00Z" (RFC3339 with timezone)
Use for historical analysis of specific time periods.

Examples:
"now-30d" → 30 days ago
"2024-06-15T14:30:00-05:00" → specific time with timezone offset | | | | `endTime` _string_ | EndTime is the end of your search window (exclusive).

Uses the same formats as StartTime. Commonly "now" for current moment.
Must be greater than StartTime.

Examples:
"now" → current time
"2024-01-02T00:00:00Z" → specific end point | | | -| `filter` _string_ | Filter narrows results using CEL (Common Expression Language). Leave empty to get all events.

Available Fields:
verb - API action: get, list, create, update, patch, delete, watch
auditID - unique event identifier
stageTimestamp - when this stage occurred (RFC3339 timestamp)
user.username - who made the request (user or service account)
responseStatus.code - HTTP response code (200, 201, 404, 500, etc.)
objectRef.namespace - target resource namespace
objectRef.resource - resource type (pods, deployments, secrets, configmaps, etc.)
objectRef.name - specific resource name

Operators: ==, !=, <, >, <=, >=, &&, \|\|, in
String Functions: startsWith(), endsWith(), contains()

Common Patterns:
"verb == 'delete'" - All deletions
"objectRef.namespace == 'production'" - Activity in production namespace
"verb in ['create', 'update', 'delete', 'patch']" - All write operations
"responseStatus.code >= 400" - Failed requests
"user.username.startsWith('system:serviceaccount:')" - Service account activity
"objectRef.resource == 'secrets'" - Secret access
"verb == 'delete' && objectRef.namespace == 'production'" - Production deletions

Note: Use single quotes for strings. Field names are case-sensitive.
CEL reference: https://cel.dev | | | +| `filter` _string_ | Filter narrows results using CEL (Common Expression Language). Leave empty to get all events.

Available Fields:
verb - API action: get, list, create, update, patch, delete, watch
auditID - unique event identifier
requestReceivedTimestamp - when the API server received the request (RFC3339 timestamp)
user.username - who made the request (user or service account)
user.uid - unique user identifier (stable across username changes)
responseStatus.code - HTTP response code (200, 201, 404, 500, etc.)
objectRef.namespace - target resource namespace
objectRef.resource - resource type (pods, deployments, secrets, configmaps, etc.)
objectRef.name - specific resource name

Operators: ==, !=, <, >, <=, >=, &&, \|\|, in
String Functions: startsWith(), endsWith(), contains()

Common Patterns:
"verb == 'delete'" - All deletions
"objectRef.namespace == 'production'" - Activity in production namespace
"verb in ['create', 'update', 'delete', 'patch']" - All write operations
"responseStatus.code >= 400" - Failed requests
"user.username.startsWith('system:serviceaccount:')" - Service account activity
"user.uid == '550e8400-e29b-41d4-a716-446655440000'" - Specific user by UID
"objectRef.resource == 'secrets'" - Secret access
"verb == 'delete' && objectRef.namespace == 'production'" - Production deletions

Note: Use single quotes for strings. Field names are case-sensitive.
CEL reference: https://cel.dev | | | | `limit` _integer_ | Limit sets the maximum number of results per page.
Default: 100, Maximum: 1000.

Use smaller values (10-50) for exploration, larger (500-1000) for data collection.
Use continue to fetch additional pages. | | | | `continue` _string_ | Continue is the pagination cursor for fetching additional pages.

Leave empty for the first page. If status.continue is non-empty after a query,
copy that value here in a new query with identical parameters to get the next page.
Repeat until status.continue is empty.

Important: Keep all other parameters (startTime, endTime, filter, limit) identical
across paginated requests. The cursor is opaque - copy it exactly without modification. | | | @@ -96,7 +96,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `results` _Event array_ | Results contains matching audit events, sorted newest-first.

Each event follows the Kubernetes audit.Event format with fields like:
verb, user.username, objectRef.\{namespace,resource,name\}, stageTimestamp,
responseStatus.code, requestObject, responseObject

Empty results? Try broadening your filter or time range.
Full documentation: https://kubernetes.io/docs/reference/config-api/apiserver-audit.v1/ | | | +| `results` _Event array_ | Results contains matching audit events, sorted newest-first.

Each event follows the Kubernetes audit.Event format with fields like:
verb, user.username, objectRef.\{namespace,resource,name\}, requestReceivedTimestamp,
stageTimestamp, responseStatus.code, requestObject, responseObject

Empty results? Try broadening your filter or time range.
Full documentation: https://kubernetes.io/docs/reference/config-api/apiserver-audit.v1/ | | | | `continue` _string_ | Continue is the pagination cursor.
Non-empty means more results are available - copy this to spec.continue for the next page.
Empty means you have all results. | | | | `effectiveStartTime` _string_ | EffectiveStartTime is the actual start time used for this query (RFC3339 format).

When you use relative times like "now-7d", this shows the exact timestamp that was
calculated. Useful for understanding exactly what time range was queried, especially
for auditing, debugging, or recreating queries with absolute timestamps.

Example: If you query with startTime="now-7d" at 2025-12-17T12:00:00Z,
this will be "2025-12-10T12:00:00Z". | | | | `effectiveEndTime` _string_ | EffectiveEndTime is the actual end time used for this query (RFC3339 format).

When you use relative times like "now", this shows the exact timestamp that was
calculated. Useful for understanding exactly what time range was queried.

Example: If you query with endTime="now" at 2025-12-17T12:00:00Z,
this will be "2025-12-17T12:00:00Z". | | | diff --git a/internal/cel/cel_test.go b/internal/cel/cel_test.go index 71eac4f..f6e8476 100644 --- a/internal/cel/cel_test.go +++ b/internal/cel/cel_test.go @@ -100,6 +100,20 @@ func TestCELFilterWorkflow(t *testing.T) { wantArgCount: 1, wantErr: false, }, + { + name: "user.uid filter", + filter: "user.uid == '550e8400-e29b-41d4-a716-446655440000'", + wantSQL: "user_uid = {arg1}", + wantArgCount: 1, + wantErr: false, + }, + { + name: "combined user.username and user.uid", + filter: "user.username == 'alice@example.com' || user.uid == '550e8400-e29b-41d4-a716-446655440000'", + wantSQL: "(user = {arg1} OR user_uid = {arg2})", + wantArgCount: 2, + wantErr: false, + }, } for _, tt := range tests { @@ -146,7 +160,7 @@ func TestCELFilterCompilation(t *testing.T) { }, { name: "valid timestamp comparison", - filter: "stageTimestamp >= timestamp('2024-01-01T00:00:00Z')", + filter: "requestReceivedTimestamp >= timestamp('2024-01-01T00:00:00Z')", wantErr: false, }, { @@ -207,7 +221,7 @@ func TestSQLConversionEdgeCases(t *testing.T) { }{ { name: "timestamp parameter is correctly formatted", - filter: "stageTimestamp >= timestamp('2024-01-01T00:00:00Z')", + filter: "requestReceivedTimestamp >= timestamp('2024-01-01T00:00:00Z')", validate: func(t *testing.T, sql string, args []interface{}) { if len(args) != 1 { t.Errorf("Expected 1 arg, got %d", len(args)) @@ -266,7 +280,7 @@ func TestEnvironment(t *testing.T) { validExpressions := []string{ "auditID == 'test'", "verb == 'delete'", - "stageTimestamp > timestamp('2024-01-01T00:00:00Z')", + "requestReceivedTimestamp > timestamp('2024-01-01T00:00:00Z')", "objectRef.namespace == 'default'", "objectRef.resource == 'pods'", "objectRef.name == 'my-pod'", diff --git a/internal/cel/errors.go b/internal/cel/errors.go index da0dacf..46ac5f2 100644 --- a/internal/cel/errors.go +++ b/internal/cel/errors.go @@ -50,7 +50,7 @@ func formatFilterError(err error) string { msg.WriteString(fmt.Sprintf("Invalid filter: %s", errMsg)) } - msg.WriteString(". Available fields: auditID, verb, stageTimestamp, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.groups, responseStatus.code") + msg.WriteString(". Available fields: auditID, verb, requestReceivedTimestamp, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.groups, responseStatus.code") msg.WriteString(". See https://cel.dev for CEL syntax") return msg.String() diff --git a/internal/cel/filter.go b/internal/cel/filter.go index 528daaa..0377b6e 100644 --- a/internal/cel/filter.go +++ b/internal/cel/filter.go @@ -20,8 +20,11 @@ var tracer = otel.Tracer("activity-cel-filter") // Environment creates a CEL environment for audit event filtering. // -// Available fields: auditID, verb, stageTimestamp, -// objectRef.{namespace,resource,name}, user.username, responseStatus.code +// Available fields: auditID, verb, requestReceivedTimestamp, +// objectRef.{namespace,resource,name}, user.username, user.uid, responseStatus.code +// +// Note: stageTimestamp is intentionally NOT available for filtering as it should +// only be used for internal pipeline delay calculations, not for querying events. // // Supports standard CEL operators (==, &&, ||, in) and string methods // (startsWith, endsWith, contains). @@ -33,7 +36,7 @@ func Environment() (*cel.Env, error) { return cel.NewEnv( cel.Variable("auditID", cel.StringType), cel.Variable("verb", cel.StringType), - cel.Variable("stageTimestamp", cel.TimestampType), + cel.Variable("requestReceivedTimestamp", cel.TimestampType), cel.Variable("objectRef", objectRefType), cel.Variable("user", userType), @@ -320,7 +323,7 @@ func (c *sqlConverter) convertIdentExpr(ident *expr.Expr_Ident) (string, error) return "audit_id", nil case "verb": return "verb", nil - case "stageTimestamp": + case "requestReceivedTimestamp": return "timestamp", nil case "objectRef", "user", "responseStatus": @@ -381,13 +384,15 @@ func (c *sqlConverter) convertSelectExpr(sel *expr.Expr_Select) (string, error) case baseObject == "user" && field == "username": return "user", nil + case baseObject == "user" && field == "uid": + return "user_uid", nil case baseObject == "responseStatus" && field == "code": return "status_code", nil default: // Provide helpful suggestions for common fields that aren't filterable - return "", fmt.Errorf("field '%s.%s' is not available for filtering. Available fields: auditID, verb, stageTimestamp, objectRef.apiGroup, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.groups, responseStatus.code", baseObject, field) + return "", fmt.Errorf("field '%s.%s' is not available for filtering. Available fields: auditID, verb, requestReceivedTimestamp, objectRef.apiGroup, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.uid, user.groups, responseStatus.code", baseObject, field) } } diff --git a/internal/registry/activity/auditlog/scope.go b/internal/registry/activity/auditlog/scope.go index 42e74fc..35b49d0 100644 --- a/internal/registry/activity/auditlog/scope.go +++ b/internal/registry/activity/auditlog/scope.go @@ -15,11 +15,14 @@ const ( // Used to restrict query results to the appropriate organizational boundary. type ScopeInfo struct { Type string // "platform", "organization", "project", "user" - Name string // scope identifier (org name, project name, etc.) + Name string // scope identifier (org name, project name, user UID, etc.) } // ExtractScopeFromUser determines the audit log query scope from user authentication metadata. // Defaults to platform-wide scope when no parent resource is specified. +// +// For user scope, the Name field contains the user's UID (not username), which enables +// querying all activity performed by that user across all organizations and projects. func ExtractScopeFromUser(u user.Info) ScopeInfo { if u.GetExtra() == nil { return ScopeInfo{Type: "platform", Name: ""} diff --git a/internal/registry/activity/auditlog/scope_test.go b/internal/registry/activity/auditlog/scope_test.go index b9a550b..0f72e05 100644 --- a/internal/registry/activity/auditlog/scope_test.go +++ b/internal/registry/activity/auditlog/scope_test.go @@ -37,10 +37,10 @@ func TestExtractScopeFromUser(t *testing.T) { user: &user.DefaultInfo{ Extra: map[string][]string{ ParentKindExtraKey: {"User"}, - ParentNameExtraKey: {"john.doe"}, + ParentNameExtraKey: {"550e8400-e29b-41d4-a716-446655440000"}, }, }, - expected: ScopeInfo{Type: "user", Name: "john.doe"}, + expected: ScopeInfo{Type: "user", Name: "550e8400-e29b-41d4-a716-446655440000"}, }, { name: "no scope (platform)", diff --git a/internal/registry/activity/auditlog/storage_test.go b/internal/registry/activity/auditlog/storage_test.go index ab93fdb..93f9013 100644 --- a/internal/registry/activity/auditlog/storage_test.go +++ b/internal/registry/activity/auditlog/storage_test.go @@ -217,6 +217,18 @@ func TestQueryStorage_Create_ScopeExtraction(t *testing.T) { wantType: "project", wantName: "backend-api", }, + { + name: "user scope", + user: &user.DefaultInfo{ + Name: "user-scoped", + Extra: map[string][]string{ + ParentKindExtraKey: {"User"}, + ParentNameExtraKey: {"550e8400-e29b-41d4-a716-446655440000"}, + }, + }, + wantType: "user", + wantName: "550e8400-e29b-41d4-a716-446655440000", + }, { name: "platform scope (no extra)", user: &user.DefaultInfo{ diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go index 01a5dbf..982d11a 100644 --- a/internal/storage/clickhouse.go +++ b/internal/storage/clickhouse.go @@ -474,11 +474,20 @@ func (s *ClickHouseStorage) buildQuery(ctx context.Context, spec v1alpha1.AuditL // Only add scope filters if not platform-wide query if scope.Type != "platform" { - conditions = append(conditions, "scope_type = ?") - args = append(args, scope.Type) + if scope.Type == "user" { + // For user scope, filter by user.uid instead of scope annotations. + // This allows querying all activity performed BY a specific user + // across all organizations and projects on the platform. + conditions = append(conditions, "user_uid = ?") + args = append(args, scope.Name) + } else { + // For organization/project scope, use the scope annotations + conditions = append(conditions, "scope_type = ?") + args = append(args, scope.Type) - conditions = append(conditions, "scope_name = ?") - args = append(args, scope.Name) + conditions = append(conditions, "scope_name = ?") + args = append(args, scope.Name) + } } // Use a single reference time for both timestamps to prevent sub-second drift @@ -542,14 +551,14 @@ func (s *ClickHouseStorage) buildQuery(ctx context.Context, spec v1alpha1.AuditL if scope.Type == "platform" { if hasUserFilter(spec.Filter) { // User filter present: use user_query_projection - query += " ORDER BY timestamp DESC, user DESC, api_group DESC, resource DESC" + query += " ORDER BY toStartOfHour(timestamp) DESC, user DESC, api_group DESC, resource DESC, audit_id DESC, timestamp DESC" } else { // No user filter: use platform_query_projection - query += " ORDER BY timestamp DESC, api_group DESC, resource DESC, audit_id DESC" + query += " ORDER BY toStartOfHour(timestamp) DESC, api_group DESC, resource DESC, audit_id DESC, timestamp DESC" } } else if scope.Type == "user" { - // User-scoped: use user_query_projection - query += " ORDER BY timestamp DESC, user DESC, api_group DESC, resource DESC" + // User-scoped: use user_uid_query_projection to filter by UID + query += " ORDER BY toStartOfHour(timestamp) DESC, user_uid DESC, api_group DESC, resource DESC, audit_id DESC, timestamp DESC" } else { // Tenant-scoped: match hour-bucketed primary key for efficient index use query += " ORDER BY toStartOfHour(timestamp) DESC, scope_type DESC, scope_name DESC, user DESC, audit_id DESC, timestamp DESC" diff --git a/migrations/001_initial_schema.sql b/migrations/001_initial_schema.sql index 18899c6..39a093c 100644 --- a/migrations/001_initial_schema.sql +++ b/migrations/001_initial_schema.sql @@ -12,9 +12,10 @@ CREATE TABLE IF NOT EXISTS audit.events event_json String CODEC(ZSTD(3)), -- Core timestamp (always queried) + -- Uses requestReceivedTimestamp which represents when the API server received the request. timestamp DateTime64(3) MATERIALIZED coalesce( - parseDateTime64BestEffortOrNull(JSONExtractString(event_json, 'stageTimestamp')), + parseDateTime64BestEffortOrNull(JSONExtractString(event_json, 'requestReceivedTimestamp')), now64(3) ), @@ -38,6 +39,12 @@ CREATE TABLE IF NOT EXISTS audit.events '' ), + user_uid String MATERIALIZED + coalesce( + JSONExtractString(event_json, 'user', 'uid'), + '' + ), + -- Request identity audit_id UUID MATERIALIZED toUUIDOrZero(coalesce(JSONExtractString(event_json, 'auditID'), '')), @@ -74,6 +81,7 @@ CREATE TABLE IF NOT EXISTS audit.events INDEX bf_api_resource (api_group, resource) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_verb_resource_bloom (verb, resource) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_user_bloom user TYPE bloom_filter(0.001) GRANULARITY 1, + INDEX idx_user_uid_bloom user_uid TYPE bloom_filter(0.001) GRANULARITY 1, -- Set indexes for low-cardinality columns INDEX idx_status_code_set status_code TYPE set(100) GRANULARITY 4, @@ -102,31 +110,33 @@ SETTINGS -- This projection is optimized for platform-wide queries that filter by -- timestamp, api_group, and resource (common for cross-tenant analytics). -- --- Sort order: (timestamp, api_group, resource, audit_id) +-- Sort order: (toStartOfHour(timestamp), api_group, resource, audit_id, timestamp) -- Use cases: -- - "All events for 'apps' API group and 'deployments' resource in last 24 hours" -- - "All events for core API 'pods' resource" -- - Platform-wide verb/resource filtering -- +-- Hour bucketing improves compression, data locality, and deduplication efficiency. ALTER TABLE audit.events ADD PROJECTION platform_query_projection ( SELECT * - ORDER BY (timestamp, api_group, resource, audit_id) + ORDER BY (toStartOfHour(timestamp), api_group, resource, audit_id, timestamp) ); -- ============================================================================ -- Step 4: Add User Query Projection -- ============================================================================ --- This projection is optimized for user-specific queries within time ranges. +-- This projection is optimized for username-based queries within time ranges. -- --- Sort order: (timestamp, user, api_group, resource) +-- Sort order: (toStartOfHour(timestamp), user, api_group, resource, audit_id, timestamp) -- Use cases: -- - "What did alice@example.com do in the last 24 hours?" -- - "All events by system:serviceaccount:kube-system:default" --- - User-specific verb/resource filtering +-- - Platform admin filtering by username in CEL expressions -- +-- Hour bucketing improves compression, data locality, and deduplication efficiency. -- ClickHouse automatically chooses the best projection for each query based -- on the WHERE clause filters. @@ -134,5 +144,29 @@ ALTER TABLE audit.events ADD PROJECTION user_query_projection ( SELECT * - ORDER BY (timestamp, user, api_group, resource) + ORDER BY (toStartOfHour(timestamp), user, api_group, resource, audit_id, timestamp) +); + +-- ============================================================================ +-- Step 5: Add User UID Query Projection +-- ============================================================================ +-- This projection is optimized for user-scoped queries by UID. +-- +-- Sort order: (toStartOfHour(timestamp), user_uid, api_group, resource, audit_id, timestamp) +-- Use cases: +-- - User-scoped queries: "Show all activity by user with UID abc-123" +-- - Cross-organization user activity tracking +-- - User-specific audit trail regardless of username changes +-- +-- This projection is used when scope.type == "user" to filter by user_uid +-- instead of scope_name, enabling queries for a user's activity across all +-- organizations and projects on the platform. +-- +-- Hour bucketing improves compression, data locality, and deduplication efficiency. + +ALTER TABLE audit.events +ADD PROJECTION user_uid_query_projection +( + SELECT * + ORDER BY (toStartOfHour(timestamp), user_uid, api_group, resource, audit_id, timestamp) ); diff --git a/pkg/apis/activity/v1alpha1/types.go b/pkg/apis/activity/v1alpha1/types.go index bbe0e46..566ca4c 100644 --- a/pkg/apis/activity/v1alpha1/types.go +++ b/pkg/apis/activity/v1alpha1/types.go @@ -80,8 +80,9 @@ type AuditLogQuerySpec struct { // Available Fields: // verb - API action: get, list, create, update, patch, delete, watch // auditID - unique event identifier - // stageTimestamp - when this stage occurred (RFC3339 timestamp) + // requestReceivedTimestamp - when the API server received the request (RFC3339 timestamp) // user.username - who made the request (user or service account) + // user.uid - unique user identifier (stable across username changes) // responseStatus.code - HTTP response code (200, 201, 404, 500, etc.) // objectRef.namespace - target resource namespace // objectRef.resource - resource type (pods, deployments, secrets, configmaps, etc.) @@ -96,6 +97,7 @@ type AuditLogQuerySpec struct { // "verb in ['create', 'update', 'delete', 'patch']" - All write operations // "responseStatus.code >= 400" - Failed requests // "user.username.startsWith('system:serviceaccount:')" - Service account activity + // "user.uid == '550e8400-e29b-41d4-a716-446655440000'" - Specific user by UID // "objectRef.resource == 'secrets'" - Secret access // "verb == 'delete' && objectRef.namespace == 'production'" - Production deletions // @@ -132,8 +134,8 @@ type AuditLogQueryStatus struct { // Results contains matching audit events, sorted newest-first. // // Each event follows the Kubernetes audit.Event format with fields like: - // verb, user.username, objectRef.{namespace,resource,name}, stageTimestamp, - // responseStatus.code, requestObject, responseObject + // verb, user.username, objectRef.{namespace,resource,name}, requestReceivedTimestamp, + // stageTimestamp, responseStatus.code, requestObject, responseObject // // Empty results? Try broadening your filter or time range. // Full documentation: https://kubernetes.io/docs/reference/config-api/apiserver-audit.v1/ diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 97123e5..29ee78a 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -468,7 +468,7 @@ func schema_pkg_apis_activity_v1alpha1_AuditLogQuerySpec(ref common.ReferenceCal }, "filter": { SchemaProps: spec.SchemaProps{ - Description: "Filter narrows results using CEL (Common Expression Language). Leave empty to get all events.\n\nAvailable Fields:\n verb - API action: get, list, create, update, patch, delete, watch\n auditID - unique event identifier\n stageTimestamp - when this stage occurred (RFC3339 timestamp)\n user.username - who made the request (user or service account)\n responseStatus.code - HTTP response code (200, 201, 404, 500, etc.)\n objectRef.namespace - target resource namespace\n objectRef.resource - resource type (pods, deployments, secrets, configmaps, etc.)\n objectRef.name - specific resource name\n\nOperators: ==, !=, <, >, <=, >=, &&, ||, in String Functions: startsWith(), endsWith(), contains()\n\nCommon Patterns:\n \"verb == 'delete'\" - All deletions\n \"objectRef.namespace == 'production'\" - Activity in production namespace\n \"verb in ['create', 'update', 'delete', 'patch']\" - All write operations\n \"responseStatus.code >= 400\" - Failed requests\n \"user.username.startsWith('system:serviceaccount:')\" - Service account activity\n \"objectRef.resource == 'secrets'\" - Secret access\n \"verb == 'delete' && objectRef.namespace == 'production'\" - Production deletions\n\nNote: Use single quotes for strings. Field names are case-sensitive. CEL reference: https://cel.dev", + Description: "Filter narrows results using CEL (Common Expression Language). Leave empty to get all events.\n\nAvailable Fields:\n verb - API action: get, list, create, update, patch, delete, watch\n auditID - unique event identifier\n requestReceivedTimestamp - when the API server received the request (RFC3339 timestamp)\n user.username - who made the request (user or service account)\n user.uid - unique user identifier (stable across username changes)\n responseStatus.code - HTTP response code (200, 201, 404, 500, etc.)\n objectRef.namespace - target resource namespace\n objectRef.resource - resource type (pods, deployments, secrets, configmaps, etc.)\n objectRef.name - specific resource name\n\nOperators: ==, !=, <, >, <=, >=, &&, ||, in String Functions: startsWith(), endsWith(), contains()\n\nCommon Patterns:\n \"verb == 'delete'\" - All deletions\n \"objectRef.namespace == 'production'\" - Activity in production namespace\n \"verb in ['create', 'update', 'delete', 'patch']\" - All write operations\n \"responseStatus.code >= 400\" - Failed requests\n \"user.username.startsWith('system:serviceaccount:')\" - Service account activity\n \"user.uid == '550e8400-e29b-41d4-a716-446655440000'\" - Specific user by UID\n \"objectRef.resource == 'secrets'\" - Secret access\n \"verb == 'delete' && objectRef.namespace == 'production'\" - Production deletions\n\nNote: Use single quotes for strings. Field names are case-sensitive. CEL reference: https://cel.dev", Type: []string{"string"}, Format: "", }, @@ -508,7 +508,7 @@ func schema_pkg_apis_activity_v1alpha1_AuditLogQueryStatus(ref common.ReferenceC }, }, SchemaProps: spec.SchemaProps{ - Description: "Results contains matching audit events, sorted newest-first.\n\nEach event follows the Kubernetes audit.Event format with fields like:\n verb, user.username, objectRef.{namespace,resource,name}, stageTimestamp,\n responseStatus.code, requestObject, responseObject\n\nEmpty results? Try broadening your filter or time range. Full documentation: https://kubernetes.io/docs/reference/config-api/apiserver-audit.v1/", + Description: "Results contains matching audit events, sorted newest-first.\n\nEach event follows the Kubernetes audit.Event format with fields like:\n verb, user.username, objectRef.{namespace,resource,name}, requestReceivedTimestamp,\n stageTimestamp, responseStatus.code, requestObject, responseObject\n\nEmpty results? Try broadening your filter or time range. Full documentation: https://kubernetes.io/docs/reference/config-api/apiserver-audit.v1/", Type: []string{"array"}, Items: &spec.SchemaOrArray{ Schema: &spec.Schema{