|
| 1 | +-- Insert aggregated metrics into the array_metrics table |
| 2 | +INSERT INTO array_metrics |
| 3 | +WITH daily_aggregate AS ( |
| 4 | + -- Aggregate daily site hits per user |
| 5 | + SELECT |
| 6 | + user_id, |
| 7 | + DATE(event_time) AS date, |
| 8 | + COUNT(1) AS num_site_hits |
| 9 | + FROM events |
| 10 | + WHERE DATE(event_time) = DATE('2023-01-01') |
| 11 | + AND user_id IS NOT NULL |
| 12 | + GROUP BY user_id, DATE(event_time) |
| 13 | +), |
| 14 | +yesterday_array AS ( |
| 15 | + -- Retrieve existing metrics for the month starting from '2023-01-01' |
| 16 | + SELECT * |
| 17 | + FROM array_metrics |
| 18 | + WHERE month_start = DATE('2023-01-01') |
| 19 | +) |
| 20 | +SELECT |
| 21 | + -- Select user_id from either daily_aggregate or yesterday_array |
| 22 | + COALESCE( da.user_id, ya.user_id) AS user_id, |
| 23 | + -- Determine month_start date |
| 24 | + COALESCE(ya.month_start, DATE_TRUNC('month', da.date)) AS month_start, |
| 25 | + -- Set metric name to 'site_hits' |
| 26 | + 'site_hits' AS metric_name, |
| 27 | + -- Update metric_array based on existing data and new daily aggregates |
| 28 | + CASE |
| 29 | + WHEN ya.metric_array IS NOT NULL THEN |
| 30 | + ya.metric_array || ARRAY[COALESCE(da.num_site_hits,0)] |
| 31 | + WHEN ya.metric_array IS NULL THEN |
| 32 | + ARRAY_FILL(0, ARRAY[COALESCE (date - DATE(DATE_TRUNC('month', date)), 0)]) |
| 33 | + || ARRAY[COALESCE(da.num_site_hits,0)] |
| 34 | + END AS metric_array |
| 35 | +FROM daily_aggregate da |
| 36 | +FULL OUTER JOIN yesterday_array ya |
| 37 | +ON da.user_id = ya.user_id |
| 38 | +ON CONFLICT (user_id, month_start, metric_name) |
| 39 | +DO |
| 40 | + UPDATE SET metric_array = EXCLUDED.metric_array; |
| 41 | + |
| 42 | +-- Uncomment and run the following query to verify the cardinality of metric_array |
| 43 | +-- SELECT cardinality(metric_array), COUNT(1) |
| 44 | +-- FROM array_metrics |
| 45 | +-- GROUP BY 1; |
| 46 | + |
| 47 | +-- Aggregate metrics by summing specific elements in the metric_array |
| 48 | +WITH agg AS ( |
| 49 | + SELECT metric_name, month_start, ARRAY[SUM(metric_array[1]), SUM(metric_array[2]), SUM(metric_array[3])] AS summed_array |
| 50 | + FROM array_metrics |
| 51 | + GROUP BY metric_name, month_start |
| 52 | +) |
| 53 | +-- Select and display the metric_name, date (adjusted by index), and summed value |
| 54 | +SELECT |
| 55 | + metric_name, |
| 56 | + month_start + CAST(CAST(index - 1 AS TEXT) || ' day' AS INTERVAL) AS adjusted_date, |
| 57 | + elem AS value |
| 58 | +FROM agg |
| 59 | +CROSS JOIN UNNEST(agg.summed_array) WITH ORDINALITY AS a(elem, index); |
0 commit comments