Skip to content

Commit 3584b64

Browse files
authored
[ez][CH] Add default.workflow_job schema (#6848)
This is a noop, like all other schemas, this may not reflect the actual schema and is not actually enfored. It is here mainly to help with documentation and to attempt to track changes Add the schema so we can know what's there It's mostly the webhook that github sends for a workflow_job plus a few extra things: Found in dynamo: * torchci_classification - holds the log classification from log classifier * backfill - a column that tells if this is eligible for backfill from the backfill job Aliases * _inserted_at - pretty much what it says * repository_full_name - ex pytorch/pytorch, comes from the html_url, so we don't need to join with the workflow_run table to get this info * workflow_event - similar to above but uses dicts to efficiently get the information from workflow_run * workflow_created_at - same as above Keep going related things: (kg stands for keep going) * torchci_classification_temp - holds the log classifier result from the temp log while the job does continue through error/keep going * torchci_classification_kg - alias that combines torchci_classification_temp and torchci_classification for easy querying * conclusion_kg - similar to the above but for conclusion, basically returns "failure, or will fail" * log_url - alias for log url so we don't need to concat in queries all the time, and also takes into account the temp log from keep going
1 parent 9effa8e commit 3584b64

File tree

1 file changed

+68
-0
lines changed
  • clickhouse_db_schema/default.workflow_job

1 file changed

+68
-0
lines changed
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
CREATE TABLE default.workflow_job
2+
(
3+
`check_run_url` String,
4+
`completed_at` DateTime64(9),
5+
`conclusion` String,
6+
`created_at` DateTime64(9),
7+
`dynamoKey` String,
8+
`head_branch` String,
9+
`head_sha` String COMMENT 'Contains commit SHA that matches workflow_run.head_commit.id',
10+
`html_url` String,
11+
`id` Int64,
12+
`labels` Array(String),
13+
`name` String,
14+
`node_id` String,
15+
`run_attempt` Int64,
16+
`run_id` Int64,
17+
`run_url` String,
18+
`runner_group_id` Int64,
19+
`runner_group_name` String,
20+
`runner_id` Int64,
21+
`runner_name` String,
22+
`started_at` DateTime64(9),
23+
`status` String,
24+
`steps` Array(Tuple(
25+
completed_at DateTime64(9),
26+
conclusion String,
27+
name String,
28+
number Int64,
29+
started_at DateTime64(9),
30+
status String)),
31+
`torchci_classification` Tuple(
32+
captures Array(String),
33+
context Array(String),
34+
job_id Int64,
35+
line String,
36+
line_num Int64,
37+
rule String),
38+
`url` String,
39+
`workflow_name` String,
40+
`backfill` Bool DEFAULT true,
41+
`_inserted_at` DateTime MATERIALIZED now(),
42+
`repository_full_name` String ALIAS extractAll(url, 'https://api.github.com/repos/([^/]+/[^/]+)/actions/jobs/')[1] COMMENT 'Repository name in format "owner/repo" extracted from URL. Matches workflow_run.repository.full_name',
43+
`workflow_event` String ALIAS dictGet('default.workflow_events_dict', 'event', dictGet('default.workflow_run_dict', 'event_hash', run_id)) COMMENT 'Corresponds to the event column in workflow_run table',
44+
`workflow_created_at` DateTime ALIAS dictGet('default.workflow_run_dict', 'created_at', run_id) COMMENT 'Corresponds to the created_at column in workflow_run table',
45+
`torchci_classification_temp` Tuple(
46+
captures Array(String),
47+
context Array(String),
48+
job_id Int64,
49+
line String,
50+
line_num Int64,
51+
rule String) COMMENT 'Holds the log classifier result from the temp log while the job does continue through error/keep going',
52+
`torchci_classification_kg` Tuple(
53+
captures Array(String),
54+
context Array(String),
55+
job_id Int64,
56+
line String,
57+
line_num Int64,
58+
rule String) ALIAS if(tupleElement(torchci_classification, 'line') = '', torchci_classification_temp, torchci_classification) COMMENT 'Combined torchci_classification and torchci_classification_temp for easier querying for keep going. kg stands for keep going',
59+
`conclusion_kg` String ALIAS if((conclusion = '') AND (tupleElement(torchci_classification_temp, 'line') != ''), 'failure', conclusion) COMMENT 'Altered conclusion for keep going',
60+
`log_url` String ALIAS multiIf(repository_full_name != 'pytorch/pytorch', concat('https://ossci-raw-job-status.s3.amazonaws.com/log/', repository_full_name, '/', CAST(id, 'String')), (tupleElement(torchci_classification_temp, 'line') != '') AND (conclusion = ''), concat('https://gha-artifacts.s3.us-east-1.amazonaws.com/temp_logs/', CAST(id, 'String')), concat('https://ossci-raw-job-status.s3.amazonaws.com/log/', CAST(id, 'String'))) COMMENT 'Log url for the job. Takes into account temp logs if possible',
61+
INDEX status_index status TYPE bloom_filter GRANULARITY 1,
62+
INDEX created_at_index created_at TYPE minmax GRANULARITY 1,
63+
INDEX started_at_index started_at TYPE minmax GRANULARITY 1,
64+
INDEX completed_at_index completed_at TYPE minmax GRANULARITY 1
65+
)
66+
ENGINE = SharedReplacingMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')
67+
ORDER BY (id, run_id, dynamoKey)
68+
SETTINGS index_granularity = 8192

0 commit comments

Comments
 (0)