Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 41 additions & 7 deletions config/components/clickhouse-migrations/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -328,9 +328,10 @@ data:
event_json String CODEC(ZSTD(3)),

-- Core timestamp (always queried)
-- Uses requestReceivedTimestamp which represents when the API server received the request.
timestamp DateTime64(3) MATERIALIZED
coalesce(
parseDateTime64BestEffortOrNull(JSONExtractString(event_json, 'stageTimestamp')),
parseDateTime64BestEffortOrNull(JSONExtractString(event_json, 'requestReceivedTimestamp')),
now64(3)
),

Expand All @@ -354,6 +355,12 @@ data:
''
),

user_uid String MATERIALIZED
coalesce(
JSONExtractString(event_json, 'user', 'uid'),
''
),

-- Request identity
audit_id UUID MATERIALIZED
toUUIDOrZero(coalesce(JSONExtractString(event_json, 'auditID'), '')),
Expand Down Expand Up @@ -390,6 +397,7 @@ data:
INDEX bf_api_resource (api_group, resource) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_verb_resource_bloom (verb, resource) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_user_bloom user TYPE bloom_filter(0.001) GRANULARITY 1,
INDEX idx_user_uid_bloom user_uid TYPE bloom_filter(0.001) GRANULARITY 1,

-- Set indexes for low-cardinality columns
INDEX idx_status_code_set status_code TYPE set(100) GRANULARITY 4,
Expand Down Expand Up @@ -418,38 +426,64 @@ data:
-- This projection is optimized for platform-wide queries that filter by
-- timestamp, api_group, and resource (common for cross-tenant analytics).
--
-- Sort order: (timestamp, api_group, resource, audit_id)
-- Sort order: (toStartOfHour(timestamp), api_group, resource, audit_id, timestamp)
-- Use cases:
-- - "All events for 'apps' API group and 'deployments' resource in last 24 hours"
-- - "All events for core API 'pods' resource"
-- - Platform-wide verb/resource filtering
--
-- Hour bucketing improves compression, data locality, and deduplication efficiency.

ALTER TABLE audit.events
ADD PROJECTION platform_query_projection
(
SELECT *
ORDER BY (timestamp, api_group, resource, audit_id)
ORDER BY (toStartOfHour(timestamp), api_group, resource, audit_id, timestamp)
);

-- ============================================================================
-- Step 4: Add User Query Projection
-- ============================================================================
-- This projection is optimized for user-specific queries within time ranges.
-- This projection is optimized for username-based queries within time ranges.
--
-- Sort order: (timestamp, user, api_group, resource)
-- Sort order: (toStartOfHour(timestamp), user, api_group, resource, audit_id, timestamp)
-- Use cases:
-- - "What did [email protected] do in the last 24 hours?"
-- - "All events by system:serviceaccount:kube-system:default"
-- - User-specific verb/resource filtering
-- - Platform admin filtering by username in CEL expressions
--
-- Hour bucketing improves compression, data locality, and deduplication efficiency.
-- ClickHouse automatically chooses the best projection for each query based
-- on the WHERE clause filters.

ALTER TABLE audit.events
ADD PROJECTION user_query_projection
(
SELECT *
ORDER BY (timestamp, user, api_group, resource)
ORDER BY (toStartOfHour(timestamp), user, api_group, resource, audit_id, timestamp)
);

-- ============================================================================
-- Step 5: Add User UID Query Projection
-- ============================================================================
-- This projection is optimized for user-scoped queries by UID.
--
-- Sort order: (toStartOfHour(timestamp), user_uid, api_group, resource, audit_id, timestamp)
-- Use cases:
-- - User-scoped queries: "Show all activity by user with UID abc-123"
-- - Cross-organization user activity tracking
-- - User-specific audit trail regardless of username changes
--
-- This projection is used when scope.type == "user" to filter by user_uid
-- instead of scope_name, enabling queries for a user's activity across all
-- organizations and projects on the platform.
--
-- Hour bucketing improves compression, data locality, and deduplication efficiency.

ALTER TABLE audit.events
ADD PROJECTION user_uid_query_projection
(
SELECT *
ORDER BY (toStartOfHour(timestamp), user_uid, api_group, resource, audit_id, timestamp)
);

4 changes: 2 additions & 2 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ _Appears in:_
| --- | --- | --- | --- |
| `startTime` _string_ | StartTime is the beginning of your search window (inclusive).<br /><br />Format Options:<br />- Relative: "now-30d", "now-2h", "now-30m" (units: s, m, h, d, w)<br /> Use for dashboards and recurring queries - they adjust automatically.<br />- Absolute: "2024-01-01T00:00:00Z" (RFC3339 with timezone)<br /> Use for historical analysis of specific time periods.<br /><br />Examples:<br /> "now-30d" → 30 days ago<br /> "2024-06-15T14:30:00-05:00" → specific time with timezone offset | | |
| `endTime` _string_ | EndTime is the end of your search window (exclusive).<br /><br />Uses the same formats as StartTime. Commonly "now" for current moment.<br />Must be greater than StartTime.<br /><br />Examples:<br /> "now" → current time<br /> "2024-01-02T00:00:00Z" → specific end point | | |
| `filter` _string_ | Filter narrows results using CEL (Common Expression Language). Leave empty to get all events.<br /><br />Available Fields:<br /> verb - API action: get, list, create, update, patch, delete, watch<br /> auditID - unique event identifier<br /> stageTimestamp - when this stage occurred (RFC3339 timestamp)<br /> user.username - who made the request (user or service account)<br /> responseStatus.code - HTTP response code (200, 201, 404, 500, etc.)<br /> objectRef.namespace - target resource namespace<br /> objectRef.resource - resource type (pods, deployments, secrets, configmaps, etc.)<br /> objectRef.name - specific resource name<br /><br />Operators: ==, !=, <, >, <=, >=, &&, \|\|, in<br />String Functions: startsWith(), endsWith(), contains()<br /><br />Common Patterns:<br /> "verb == 'delete'" - All deletions<br /> "objectRef.namespace == 'production'" - Activity in production namespace<br /> "verb in ['create', 'update', 'delete', 'patch']" - All write operations<br /> "responseStatus.code >= 400" - Failed requests<br /> "user.username.startsWith('system:serviceaccount:')" - Service account activity<br /> "objectRef.resource == 'secrets'" - Secret access<br /> "verb == 'delete' && objectRef.namespace == 'production'" - Production deletions<br /><br />Note: Use single quotes for strings. Field names are case-sensitive.<br />CEL reference: https://cel.dev | | |
| `filter` _string_ | Filter narrows results using CEL (Common Expression Language). Leave empty to get all events.<br /><br />Available Fields:<br /> verb - API action: get, list, create, update, patch, delete, watch<br /> auditID - unique event identifier<br /> requestReceivedTimestamp - when the API server received the request (RFC3339 timestamp)<br /> user.username - who made the request (user or service account)<br /> user.uid - unique user identifier (stable across username changes)<br /> responseStatus.code - HTTP response code (200, 201, 404, 500, etc.)<br /> objectRef.namespace - target resource namespace<br /> objectRef.resource - resource type (pods, deployments, secrets, configmaps, etc.)<br /> objectRef.name - specific resource name<br /><br />Operators: ==, !=, <, >, <=, >=, &&, \|\|, in<br />String Functions: startsWith(), endsWith(), contains()<br /><br />Common Patterns:<br /> "verb == 'delete'" - All deletions<br /> "objectRef.namespace == 'production'" - Activity in production namespace<br /> "verb in ['create', 'update', 'delete', 'patch']" - All write operations<br /> "responseStatus.code >= 400" - Failed requests<br /> "user.username.startsWith('system:serviceaccount:')" - Service account activity<br /> "user.uid == '550e8400-e29b-41d4-a716-446655440000'" - Specific user by UID<br /> "objectRef.resource == 'secrets'" - Secret access<br /> "verb == 'delete' && objectRef.namespace == 'production'" - Production deletions<br /><br />Note: Use single quotes for strings. Field names are case-sensitive.<br />CEL reference: https://cel.dev | | |
| `limit` _integer_ | Limit sets the maximum number of results per page.<br />Default: 100, Maximum: 1000.<br /><br />Use smaller values (10-50) for exploration, larger (500-1000) for data collection.<br />Use continue to fetch additional pages. | | |
| `continue` _string_ | Continue is the pagination cursor for fetching additional pages.<br /><br />Leave empty for the first page. If status.continue is non-empty after a query,<br />copy that value here in a new query with identical parameters to get the next page.<br />Repeat until status.continue is empty.<br /><br />Important: Keep all other parameters (startTime, endTime, filter, limit) identical<br />across paginated requests. The cursor is opaque - copy it exactly without modification. | | |

Expand All @@ -96,7 +96,7 @@ _Appears in:_

| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `results` _Event array_ | Results contains matching audit events, sorted newest-first.<br /><br />Each event follows the Kubernetes audit.Event format with fields like:<br /> verb, user.username, objectRef.\{namespace,resource,name\}, stageTimestamp,<br /> responseStatus.code, requestObject, responseObject<br /><br />Empty results? Try broadening your filter or time range.<br />Full documentation: https://kubernetes.io/docs/reference/config-api/apiserver-audit.v1/ | | |
| `results` _Event array_ | Results contains matching audit events, sorted newest-first.<br /><br />Each event follows the Kubernetes audit.Event format with fields like:<br /> verb, user.username, objectRef.\{namespace,resource,name\}, requestReceivedTimestamp,<br /> stageTimestamp, responseStatus.code, requestObject, responseObject<br /><br />Empty results? Try broadening your filter or time range.<br />Full documentation: https://kubernetes.io/docs/reference/config-api/apiserver-audit.v1/ | | |
| `continue` _string_ | Continue is the pagination cursor.<br />Non-empty means more results are available - copy this to spec.continue for the next page.<br />Empty means you have all results. | | |
| `effectiveStartTime` _string_ | EffectiveStartTime is the actual start time used for this query (RFC3339 format).<br /><br />When you use relative times like "now-7d", this shows the exact timestamp that was<br />calculated. Useful for understanding exactly what time range was queried, especially<br />for auditing, debugging, or recreating queries with absolute timestamps.<br /><br />Example: If you query with startTime="now-7d" at 2025-12-17T12:00:00Z,<br />this will be "2025-12-10T12:00:00Z". | | |
| `effectiveEndTime` _string_ | EffectiveEndTime is the actual end time used for this query (RFC3339 format).<br /><br />When you use relative times like "now", this shows the exact timestamp that was<br />calculated. Useful for understanding exactly what time range was queried.<br /><br />Example: If you query with endTime="now" at 2025-12-17T12:00:00Z,<br />this will be "2025-12-17T12:00:00Z". | | |
Expand Down
20 changes: 17 additions & 3 deletions internal/cel/cel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,20 @@ func TestCELFilterWorkflow(t *testing.T) {
wantArgCount: 1,
wantErr: false,
},
{
name: "user.uid filter",
filter: "user.uid == '550e8400-e29b-41d4-a716-446655440000'",
wantSQL: "user_uid = {arg1}",
wantArgCount: 1,
wantErr: false,
},
{
name: "combined user.username and user.uid",
filter: "user.username == '[email protected]' || user.uid == '550e8400-e29b-41d4-a716-446655440000'",
wantSQL: "(user = {arg1} OR user_uid = {arg2})",
wantArgCount: 2,
wantErr: false,
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -146,7 +160,7 @@ func TestCELFilterCompilation(t *testing.T) {
},
{
name: "valid timestamp comparison",
filter: "stageTimestamp >= timestamp('2024-01-01T00:00:00Z')",
filter: "requestReceivedTimestamp >= timestamp('2024-01-01T00:00:00Z')",
wantErr: false,
},
{
Expand Down Expand Up @@ -207,7 +221,7 @@ func TestSQLConversionEdgeCases(t *testing.T) {
}{
{
name: "timestamp parameter is correctly formatted",
filter: "stageTimestamp >= timestamp('2024-01-01T00:00:00Z')",
filter: "requestReceivedTimestamp >= timestamp('2024-01-01T00:00:00Z')",
validate: func(t *testing.T, sql string, args []interface{}) {
if len(args) != 1 {
t.Errorf("Expected 1 arg, got %d", len(args))
Expand Down Expand Up @@ -266,7 +280,7 @@ func TestEnvironment(t *testing.T) {
validExpressions := []string{
"auditID == 'test'",
"verb == 'delete'",
"stageTimestamp > timestamp('2024-01-01T00:00:00Z')",
"requestReceivedTimestamp > timestamp('2024-01-01T00:00:00Z')",
"objectRef.namespace == 'default'",
"objectRef.resource == 'pods'",
"objectRef.name == 'my-pod'",
Expand Down
2 changes: 1 addition & 1 deletion internal/cel/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func formatFilterError(err error) string {
msg.WriteString(fmt.Sprintf("Invalid filter: %s", errMsg))
}

msg.WriteString(". Available fields: auditID, verb, stageTimestamp, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.groups, responseStatus.code")
msg.WriteString(". Available fields: auditID, verb, requestReceivedTimestamp, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.groups, responseStatus.code")
msg.WriteString(". See https://cel.dev for CEL syntax")

return msg.String()
Expand Down
15 changes: 10 additions & 5 deletions internal/cel/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@ var tracer = otel.Tracer("activity-cel-filter")

// Environment creates a CEL environment for audit event filtering.
//
// Available fields: auditID, verb, stageTimestamp,
// objectRef.{namespace,resource,name}, user.username, responseStatus.code
// Available fields: auditID, verb, requestReceivedTimestamp,
// objectRef.{namespace,resource,name}, user.username, user.uid, responseStatus.code
//
// Note: stageTimestamp is intentionally NOT available for filtering as it should
// only be used for internal pipeline delay calculations, not for querying events.
//
// Supports standard CEL operators (==, &&, ||, in) and string methods
// (startsWith, endsWith, contains).
Expand All @@ -33,7 +36,7 @@ func Environment() (*cel.Env, error) {
return cel.NewEnv(
cel.Variable("auditID", cel.StringType),
cel.Variable("verb", cel.StringType),
cel.Variable("stageTimestamp", cel.TimestampType),
cel.Variable("requestReceivedTimestamp", cel.TimestampType),

cel.Variable("objectRef", objectRefType),
cel.Variable("user", userType),
Expand Down Expand Up @@ -320,7 +323,7 @@ func (c *sqlConverter) convertIdentExpr(ident *expr.Expr_Ident) (string, error)
return "audit_id", nil
case "verb":
return "verb", nil
case "stageTimestamp":
case "requestReceivedTimestamp":
return "timestamp", nil

case "objectRef", "user", "responseStatus":
Expand Down Expand Up @@ -381,13 +384,15 @@ func (c *sqlConverter) convertSelectExpr(sel *expr.Expr_Select) (string, error)

case baseObject == "user" && field == "username":
return "user", nil
case baseObject == "user" && field == "uid":
return "user_uid", nil

case baseObject == "responseStatus" && field == "code":
return "status_code", nil

default:
// Provide helpful suggestions for common fields that aren't filterable
return "", fmt.Errorf("field '%s.%s' is not available for filtering. Available fields: auditID, verb, stageTimestamp, objectRef.apiGroup, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.groups, responseStatus.code", baseObject, field)
return "", fmt.Errorf("field '%s.%s' is not available for filtering. Available fields: auditID, verb, requestReceivedTimestamp, objectRef.apiGroup, objectRef.namespace, objectRef.resource, objectRef.name, user.username, user.uid, user.groups, responseStatus.code", baseObject, field)
}
}

Expand Down
5 changes: 4 additions & 1 deletion internal/registry/activity/auditlog/scope.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@ const (
// Used to restrict query results to the appropriate organizational boundary.
type ScopeInfo struct {
Type string // "platform", "organization", "project", "user"
Name string // scope identifier (org name, project name, etc.)
Name string // scope identifier (org name, project name, user UID, etc.)
}

// ExtractScopeFromUser determines the audit log query scope from user authentication metadata.
// Defaults to platform-wide scope when no parent resource is specified.
//
// For user scope, the Name field contains the user's UID (not username), which enables
// querying all activity performed by that user across all organizations and projects.
func ExtractScopeFromUser(u user.Info) ScopeInfo {
if u.GetExtra() == nil {
return ScopeInfo{Type: "platform", Name: ""}
Expand Down
4 changes: 2 additions & 2 deletions internal/registry/activity/auditlog/scope_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ func TestExtractScopeFromUser(t *testing.T) {
user: &user.DefaultInfo{
Extra: map[string][]string{
ParentKindExtraKey: {"User"},
ParentNameExtraKey: {"john.doe"},
ParentNameExtraKey: {"550e8400-e29b-41d4-a716-446655440000"},
},
},
expected: ScopeInfo{Type: "user", Name: "john.doe"},
expected: ScopeInfo{Type: "user", Name: "550e8400-e29b-41d4-a716-446655440000"},
},
{
name: "no scope (platform)",
Expand Down
12 changes: 12 additions & 0 deletions internal/registry/activity/auditlog/storage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,18 @@ func TestQueryStorage_Create_ScopeExtraction(t *testing.T) {
wantType: "project",
wantName: "backend-api",
},
{
name: "user scope",
user: &user.DefaultInfo{
Name: "user-scoped",
Extra: map[string][]string{
ParentKindExtraKey: {"User"},
ParentNameExtraKey: {"550e8400-e29b-41d4-a716-446655440000"},
},
},
wantType: "user",
wantName: "550e8400-e29b-41d4-a716-446655440000",
},
{
name: "platform scope (no extra)",
user: &user.DefaultInfo{
Expand Down
Loading