Skip to content

Commit 6cdcef1

Browse files
authored
Adds bloom filter index to coordinates agg views (#6536)
1 parent 21572b9 commit 6cdcef1

File tree

3 files changed

+107
-0
lines changed

3 files changed

+107
-0
lines changed

.changeset/three-gifts-battle.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
'hive': patch
3+
---
4+
5+
Adds an index to coordinates\_(daily,hourly,minutely) tables to speedup the
6+
get_top_operations_for_types ClickHoue query.
7+
8+
Reading of type and fields usage statisticts should be noticeably faster now on big datasets.
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import { z } from 'zod';
2+
import type { Action } from '../clickhouse';
3+
4+
const SystemTablesModel = z.array(
5+
z.object({
6+
name: z.string(),
7+
uuid: z.string(),
8+
}),
9+
);
10+
11+
const StateTableModel = z.array(
12+
z.object({
13+
table: z.string(),
14+
idx_created: z.boolean(),
15+
idx_materialized: z.boolean(),
16+
}),
17+
);
18+
19+
// This migration adds an index for the `coordinate` field.
20+
// Improve the performance of the queries that filter the rows by the type's name.
21+
//
22+
// For example, when looking for `Member.*` coordinates we elimiate the need to scan the whole table,
23+
// by laveraging the idx_typename index.
24+
// We filter rows by the first part of the `coordinate` field (substringIndex(coordinate, '.', 1)).
25+
export const action: Action = async (exec, query) => {
26+
// Create a table to store the state of the migration
27+
await exec(`
28+
CREATE TABLE IF NOT EXISTS default.migration_coordinates_typename_index (
29+
table String,
30+
idx_created Bool DEFAULT false,
31+
idx_materialized Bool DEFAULT false
32+
) ENGINE = MergeTree() ORDER BY tuple()
33+
`);
34+
35+
const tables = await query(`
36+
SELECT uuid, name FROM system.tables WHERE name IN (
37+
'coordinates_daily',
38+
'coordinates_hourly',
39+
'coordinates_minutely'
40+
);
41+
`).then(async r => SystemTablesModel.parse(r.data));
42+
43+
if (tables.length !== 3) {
44+
throw new Error('Expected 3 tables');
45+
}
46+
47+
const tableStates = await query(`
48+
SELECT table, idx_created, idx_materialized FROM default.migration_coordinates_typename_index
49+
`).then(async r => StateTableModel.parse(r.data));
50+
51+
for (const { uuid, name } of tables) {
52+
let state = tableStates.find(s => s.table === name);
53+
54+
if (!state) {
55+
console.log(`Creating state for table ${name}`);
56+
await exec(`
57+
INSERT INTO default.migration_coordinates_typename_index (table) VALUES ('${name}')
58+
`);
59+
60+
state = { table: name, idx_created: false, idx_materialized: false };
61+
}
62+
63+
const innerTable = `.inner_id.${uuid}`;
64+
65+
if (state.idx_created) {
66+
console.log(`Skipping idx_typename for table ${name}`);
67+
} else {
68+
console.log(`Creating idx_typename for table ${name}`);
69+
await exec(
70+
`ALTER TABLE "${innerTable}" ADD INDEX IF NOT EXISTS idx_typename (substringIndex(coordinate, '.', 1)) TYPE ngrambf_v1(4, 1024, 2, 0) GRANULARITY 1`,
71+
);
72+
await exec(
73+
`ALTER TABLE default.migration_coordinates_typename_index UPDATE idx_created = true WHERE table = '${name}'`,
74+
{
75+
mutations_sync: '2',
76+
},
77+
);
78+
}
79+
80+
if (state.idx_materialized) {
81+
console.log(`Skipping materializing idx_typename for table ${name}`);
82+
} else {
83+
console.log(`Materializing idx_typename for table ${name}`);
84+
await exec(`ALTER TABLE "${innerTable}" MATERIALIZE INDEX idx_typename`);
85+
await exec(
86+
`ALTER TABLE default.migration_coordinates_typename_index UPDATE idx_materialized = true WHERE table = '${name}'`,
87+
{
88+
mutations_sync: '2',
89+
},
90+
);
91+
}
92+
}
93+
94+
console.log('Dropping migration state table');
95+
await exec(`
96+
DROP TABLE default.migration_coordinates_typename_index
97+
`);
98+
};

packages/migrations/src/clickhouse.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ export async function migrateClickHouse(
169169
import('./clickhouse-actions/009-ttl-1-year'),
170170
import('./clickhouse-actions/010-app-deployment-operations'),
171171
import('./clickhouse-actions/011-audit-logs'),
172+
import('./clickhouse-actions/012-coordinates-typename-index'),
172173
]);
173174

174175
async function actionRunner(action: Action, index: number) {

0 commit comments

Comments
 (0)