Skip to content
Open
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
63b935c
Remove unnecessary concat function, and use UTF8 compatible functions…
casab Feb 9, 2023
1ea8d11
Merge remote-tracking branch 'upstream/master' into feature/clickhous…
casab Jun 7, 2024
49111c2
Add integration testing for ordering with collation
casab Jun 7, 2024
153b3fd
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Nov 22, 2024
b2c4ee9
Move case sensitive entry from ClickHouseDbRunner.js to ClickHouseDbR…
casab Jan 24, 2025
dd2fd32
Added back CONCAT() to prevent sql injection
casab Jan 24, 2025
f26f4fe
Merge branch 'master' into feature/clickhouse_utf8_filter_order
KSDaemon Jan 24, 2025
a5b279e
Change intentation from 4 spaces to 2 spaces
casab Jan 24, 2025
f7a4759
Only apply coalition if field's type is string
casab Jan 24, 2025
83437e9
Merge branch 'master' into feature/clickhouse_utf8_filter_order
KSDaemon Jan 27, 2025
600c1fa
Replaced Google with Gork to make ordering clear, added toValidUTF8 t…
casab Jan 27, 2025
eadfa1a
Added CUBEJS_DB_CLICKHOUSE_SORT_COLLATION env variable for collation …
casab Jan 27, 2025
0610d16
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Jan 28, 2025
8807178
Only use collation for clickhouse if env variable is set
casab Jan 28, 2025
f095335
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Feb 4, 2025
e76b91a
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Feb 5, 2025
fdebe6e
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Feb 6, 2025
be6875b
Add CUBEJS_DB_CLICKHOUSE_USE_COLLATION env var to disable collation, …
casab Feb 6, 2025
4fc9ed4
Fix default clickhouseSortCollection value
casab Feb 6, 2025
b44e57e
Use ILIKE instead of LIKE with lowerUTF8 and toValidUTF8
casab Feb 6, 2025
a2616fe
Merge branch 'master' into feature/clickhouse_utf8_filter_order
KSDaemon Feb 6, 2025
ce3ec56
Fix clickhouseUseCollation tests to check for default value
casab Feb 6, 2025
ee06aee
Merge branch 'master' into feature/clickhouse_utf8_filter_order
KSDaemon Feb 6, 2025
b649df2
Fix clickhouse integration tests for the missing values
casab Feb 6, 2025
716f24b
Merge branch 'master' into feature/clickhouse_utf8_filter_order
KSDaemon Feb 6, 2025
79ab001
Assert collation in the 'collation in order by' clickhouse integratio…
casab Feb 6, 2025
f660674
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Feb 11, 2025
beac449
Fix getFieldType by using hash.id instead
casab Feb 11, 2025
bd29d16
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Feb 13, 2025
a62eb42
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Feb 13, 2025
225c19a
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Feb 14, 2025
8971b45
Merge branch 'master' into feature/clickhouse_utf8_filter_order
KSDaemon Feb 18, 2025
f712829
Merge branch 'master' into feature/clickhouse_utf8_filter_order
KSDaemon Feb 18, 2025
d18b953
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Feb 19, 2025
79373d6
Merge branch 'master' into feature/clickhouse_utf8_filter_order
KSDaemon Feb 20, 2025
8179923
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Jul 20, 2025
7a809cd
Make getField and getCollation private
casab Jul 20, 2025
97aeae4
Use plain js instead of ramda for collation check
casab Jul 20, 2025
2379931
Move templates/select, templates/expressions/sort, templates/expressi…
casab Jul 20, 2025
8d61d13
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Jul 23, 2025
8672872
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Jul 24, 2025
ca0612c
Merge branch 'master' into feature/clickhouse_utf8_filter_order
casab Jul 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions packages/cubejs-backend-shared/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,53 @@ const variables: Record<string, (...args: any) => any> = {
]
),

/**
* ClickHouse sort collation.
*/
clickhouseSortCollation: ({ dataSource }: {dataSource: string }) => {
const val = process.env[
keyByDataSource('CUBEJS_DB_CLICKHOUSE_SORT_COLLATION', dataSource)
];
if (!val) {
// Default to 'en' collation
return 'en';
}
return val;
},

/**
* Clickhouse use collation flag.
*/

clickhouseUseCollation: ({ dataSource }: { dataSource: string }) => {
const val = process.env[
keyByDataSource(
'CUBEJS_DB_CLICKHOUSE_USE_COLLATION',
dataSource,
)
];

if (val) {
if (val.toLocaleLowerCase() === 'true') {
return true;
} else if (val.toLowerCase() === 'false') {
return false;
} else {
throw new TypeError(
`The ${
keyByDataSource(
'CUBEJS_DB_CLICKHOUSE_USE_COLLATION',
dataSource,
)
} must be either 'true' or 'false'.`
);
}
} else {
// Default to true
return true;
}
},

/** ****************************************************************
* ElasticSearch Driver *
***************************************************************** */
Expand Down
71 changes: 71 additions & 0 deletions packages/cubejs-backend-shared/test/db_env_multi.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1539,6 +1539,77 @@ describe('Multiple datasources', () => {
);
});

test('getEnv("clickhouseSortCollation")', () => {
process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default1';
process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_SORT_COLLATION = 'postgres1';
process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_SORT_COLLATION = 'wrong1';
expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default1');
expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('postgres1');
expect(() => getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);

process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default2';
process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_SORT_COLLATION = 'postgres2';
process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_SORT_COLLATION = 'wrong2';
expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default2');
expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('postgres2');
expect(() => getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);

delete process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION;
delete process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_SORT_COLLATION;
delete process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_SORT_COLLATION;
expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('en');
expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('en');
expect(() => getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);
});

test('getEnv("clickhouseUseCollation")', () => {
process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'true';
process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'true';
process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'true';
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true);
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true);
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);

process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'false';
process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'false';
process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'false';
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(false);
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(false);
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);

process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'wrong';
process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'wrong';
process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'wrong';
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'default' })).toThrow(
'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.'
);
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toThrow(
'The CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.'
);
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);

delete process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION;
delete process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION;
delete process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION;
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true);
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true);
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);
});

test('getEnv("elasticApiId")', () => {
process.env.CUBEJS_DB_ELASTIC_APIKEY_ID = 'default1';
process.env.CUBEJS_DS_POSTGRES_DB_ELASTIC_APIKEY_ID = 'postgres1';
Expand Down
45 changes: 45 additions & 0 deletions packages/cubejs-backend-shared/test/db_env_single.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,51 @@ describe('Single datasources', () => {
expect(getEnv('clickhouseReadOnly', { dataSource: 'wrong' })).toBeUndefined();
});

test('getEnv("clickhouseSortCollation")', () => {
process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default1';
expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default1');
expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('default1');
expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toEqual('default1');

process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default2';
expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default2');
expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('default2');
expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toEqual('default2');

delete process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION;
expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('en');
expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('en');
expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toEqual('en');
});

test('getEnv("clickhouseUseCollation")', () => {
process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'true';
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true);
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true);
expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(true);

process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'false';
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(false);
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(false);
expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(false);

process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'wrong';
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'default' })).toThrow(
'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.'
);
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toThrow(
'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.'
);
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow(
'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.'
);

delete process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION;
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true);
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true);
expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(true);
});

test('getEnv("elasticApiId")', () => {
process.env.CUBEJS_DB_ELASTIC_APIKEY_ID = 'default1';
expect(getEnv('elasticApiId', { dataSource: 'default' })).toEqual('default1');
Expand Down
87 changes: 80 additions & 7 deletions packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import { parseSqlInterval } from '@cubejs-backend/shared';
import R from 'ramda';

import { getEnv, parseSqlInterval } from '@cubejs-backend/shared';
import { BaseQuery } from './BaseQuery';
import { BaseFilter } from './BaseFilter';
import { UserError } from '../compiler/UserError';
Expand All @@ -18,7 +20,7 @@ class ClickHouseFilter extends BaseFilter {
public likeIgnoreCase(column, not, param, type) {
const p = (!type || type === 'contains' || type === 'ends') ? '%' : '';
const s = (!type || type === 'contains' || type === 'starts') ? '%' : '';
return `lower(${column}) ${not ? 'NOT' : ''} LIKE CONCAT('${p}', lower(${this.allocateParam(param)}), '${s}')`;
return `${column} ${not ? 'NOT' : ''} ILIKE CONCAT('${p}', ${this.allocateParam(param)}, '${s}')`;
}

public castParameter() {
Expand Down Expand Up @@ -123,7 +125,7 @@ export class ClickHouseQuery extends BaseQuery {
.join(' AND ');
}

public getFieldAlias(id) {
public getField(id) {
const equalIgnoreCase = (a, b) => (
typeof a === 'string' && typeof b === 'string' && a.toUpperCase() === b.toUpperCase()
);
Expand All @@ -134,16 +136,34 @@ export class ClickHouseQuery extends BaseQuery {
d => equalIgnoreCase(d.dimension, id),
);

if (!field) {
field = this.measures.find(
d => equalIgnoreCase(d.measure, id) || equalIgnoreCase(d.expressionName, id),
);
}

return field;
}

public getFieldAlias(id) {
const field = this.getField(id);

if (field) {
return field.aliasName();
}

field = this.measures.find(
d => equalIgnoreCase(d.measure, id) || equalIgnoreCase(d.expressionName, id),
);
return null;
}

public getFieldType(hash) {
if (!hash || !hash.id) {
return null;
}

const field = this.getField(hash.id);

if (field) {
return field.aliasName();
return field.definition().type;
}

return null;
Expand All @@ -168,6 +188,43 @@ export class ClickHouseQuery extends BaseQuery {
return `${fieldAlias} ${direction}`;
}

public getCollation() {
const useCollation = getEnv('clickhouseUseCollation', { dataSource: this.dataSource });
if (useCollation) {
return getEnv('clickhouseSortCollation', { dataSource: this.dataSource });
}
return null;
}

public override orderBy() {
//
// ClickHouse orders string by bytes, so we need to use COLLATE 'en' to order by string
//
if (R.isEmpty(this.order)) {
return '';
}

const collation = this.getCollation();

const orderByString = R.pipe(
R.map((order) => {
let orderString = this.orderHashToString(order);
if (collation && this.getFieldType(order) === 'string') {
orderString = `${orderString} COLLATE '${collation}'`;
}
return orderString;
}),
R.reject(R.isNil),
R.join(', ')
)(this.order);

if (!orderByString) {
return '';
}

return ` ORDER BY ${orderByString}`;
}

public groupByClause() {
if (this.ungrouped) {
return '';
Expand Down Expand Up @@ -281,6 +338,22 @@ export class ClickHouseQuery extends BaseQuery {
// ClickHouse intervals have a distinct type for each granularity
delete templates.types.interval;
delete templates.types.binary;

const collation = this.getCollation();

if (collation) {
templates.expressions.sort = `${templates.expressions.sort}{% if data_type and data_type == 'string' %} COLLATE '${collation}'{% endif %}`;
templates.expressions.order_by = `${templates.expressions.order_by}{% if data_type and data_type == 'string' %} COLLATE '${collation}'{% endif %}`;

const oldOrderBy = '{% if order_by %}\nORDER BY {{ order_by | map(attribute=\'expr\') | join(\', \') }}{% endif %}';

const newOrderBy =
'{% if order_by %}\nORDER BY {% for item in order_by %}{{ item.expr }}' +
`{%- if item.data_type and item.data_type == 'string' %} COLLATE '${collation}'{% endif %}` +
'{%- if not loop.last %}, {% endif %}{% endfor %}{% endif %}';

templates.statements.select = templates.statements.select.replace(oldOrderBy, newOrderBy);
}
return templates;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ export class ClickHouseDbRunner extends BaseDbRunner {
(3, 300, '2017-01-05 16:00:00', '2017-01-19 16:00:00', 2, 'google', 120.120, 70.60),
(4, 400, '2017-01-06 16:00:00', '2017-01-24 16:00:00', 2, null, 120.120, 10.60),
(5, 500, '2017-01-06 16:00:00', '2017-01-24 16:00:00', 2, null, 120.120, 58.10),
(6, 500, '2016-09-06 16:00:00', '2016-09-06 16:00:00', 2, null, 120.120, 58.10)
(6, 500, '2016-09-06 16:00:00', '2016-09-06 16:00:00', 2, null, 120.120, 58.10),
(7, 300, '2017-01-07 16:00:00', '2017-01-25 16:00:00', 2, 'Gork', 120.120, 59.60)
` });

await clickHouse.command({ query: `
Expand Down
Loading
Loading