Skip to content

Commit 3b952a3

Browse files
authored
[ghinfra] Set up ingestion from s3 -> clickhouse for cloudwatch (#6898)
Path: cloudwatch metrics -> firehose -> s3 (new bucket fbossci-cloudwatch-metrics) -> clickhouse This is the s3 -> clickhouse part I think clickhouse has some in built ingestions for kinesis but I'm lazy... Requires meta-pytorch/pytorch-gha-infra#751 Testing: ran the python code via `python tools/rockset_migration/s32ch.py --clickhouse-table "infra_metrics.cloudwatch_metrics" --stored-data t.json --s3-bucket fbossci-cloudwatch-metrics --s3-prefix ghci-related`
1 parent 22c52a0 commit 3b952a3

File tree

2 files changed

+46
-0
lines changed

2 files changed

+46
-0
lines changed

aws/lambda/clickhouse-replicator-s3/lambda_function.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,25 @@ def disabled_tests_historical_adapter(table, bucket, key):
544544
general_adapter(table, bucket, key, schema, ["none"], "JSONEachRow")
545545

546546

547+
def cloudwatch_metrics_adapter(table, bucket, key):
548+
schema = """
549+
`metric_stream_name` LowCardinality(String),
550+
`account_id` LowCardinality(String),
551+
`region` LowCardinality(String),
552+
`namespace` LowCardinality(String),
553+
`metric_name` LowCardinality(String),
554+
`dimensions` Map(String, String),
555+
`timestamp` DateTime,
556+
`value` Tuple(
557+
max Float32,
558+
min Float32,
559+
sum Float32,
560+
count Float32),
561+
`unit` LowCardinality(String)
562+
"""
563+
general_adapter(table, bucket, key, schema, ["none"], "JSONEachRow")
564+
565+
547566
SUPPORTED_PATHS = {
548567
"merges": "default.merges",
549568
"queue_times_historical": "default.queue_times_historical",
@@ -564,6 +583,8 @@ def disabled_tests_historical_adapter(table, bucket, key):
564583
"util_metadata": "misc.oss_ci_utilization_metadata",
565584
"util_timeseries": "misc.oss_ci_time_series",
566585
"disabled_tests_historical": "misc.disabled_tests_historical",
586+
# fbossci-cloudwatch-metrics bucket
587+
"ghci-related": "infra_metrics.cloudwatch_metrics",
567588
}
568589

569590
OBJECT_CONVERTER = {
@@ -586,6 +607,7 @@ def disabled_tests_historical_adapter(table, bucket, key):
586607
"misc.oss_ci_utilization_metadata": oss_ci_util_metadata_adapter,
587608
"misc.oss_ci_time_series": oss_ci_util_time_series_adapter,
588609
"misc.disabled_tests_historical": disabled_tests_historical_adapter,
610+
"infra_metrics.cloudwatch_metrics": cloudwatch_metrics_adapter,
589611
}
590612

591613

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
CREATE TABLE infra_metrics.cloudwatch_metrics
2+
(
3+
`metric_stream_name` LowCardinality(String),
4+
`account_id` LowCardinality(String),
5+
`region` LowCardinality(String),
6+
`namespace` LowCardinality(String),
7+
`metric_name` LowCardinality(String),
8+
`dimensions` Map(String, String),
9+
`timestamp` DateTime,
10+
`value` Tuple(
11+
max Float32,
12+
min Float32,
13+
sum Float32,
14+
count Float32),
15+
`unit` LowCardinality(String),
16+
`_meta` Tuple(
17+
bucket String,
18+
key String)
19+
)
20+
ENGINE = SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')
21+
PARTITION BY toYYYYMM(timestamp)
22+
ORDER BY (namespace, metric_name, timestamp, dimensions)
23+
TTL timestamp + toIntervalMonth(12)
24+
SETTINGS index_granularity = 8192

0 commit comments

Comments
 (0)