Skip to content

Commit be6875b

Browse files
committed
Add CUBEJS_DB_CLICKHOUSE_USE_COLLATION env var to disable collation, add data_type to AliasedColumn and TemplateColumn to be able to use the data type in the templates
1 parent fdebe6e commit be6875b

File tree

6 files changed

+169
-8
lines changed

6 files changed

+169
-8
lines changed

packages/cubejs-backend-shared/src/env.ts

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1147,10 +1147,43 @@ const variables: Record<string, (...args: any) => any> = {
11471147
dataSource: string,
11481148
}) => (
11491149
process.env[
1150-
keyByDataSource('CUBEJS_DB_CLICKHOUSE_SORT_COLLATION', dataSource)
1150+
keyByDataSource('CUBEJS_DB_CLICKHOUSE_SORT_COLLATION', dataSource) || 'en'
11511151
]
11521152
),
11531153

1154+
/**
1155+
* Clickhouse use collation flag.
1156+
*/
1157+
1158+
clickhouseUseCollation: ({ dataSource }: { dataSource: string }) => {
1159+
const val = process.env[
1160+
keyByDataSource(
1161+
'CUBEJS_DB_CLICKHOUSE_USE_COLLATION',
1162+
dataSource,
1163+
)
1164+
];
1165+
1166+
if (val) {
1167+
if (val.toLocaleLowerCase() === 'true') {
1168+
return true;
1169+
} else if (val.toLowerCase() === 'false') {
1170+
return false;
1171+
} else {
1172+
throw new TypeError(
1173+
`The ${
1174+
keyByDataSource(
1175+
'CUBEJS_DB_CLICKHOUSE_USE_COLLATION',
1176+
dataSource,
1177+
)
1178+
} must be either 'true' or 'false'.`
1179+
);
1180+
}
1181+
} else {
1182+
// Default to true
1183+
return true;
1184+
}
1185+
},
1186+
11541187
/** ****************************************************************
11551188
* ElasticSearch Driver *
11561189
***************************************************************** */

packages/cubejs-backend-shared/test/db_env_multi.test.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,6 +1568,48 @@ describe('Multiple datasources', () => {
15681568
);
15691569
});
15701570

1571+
test('getEnv("clickhouseUseCollation")', () => {
1572+
process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'true';
1573+
process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'true';
1574+
process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'true';
1575+
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true);
1576+
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true);
1577+
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow(
1578+
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
1579+
);
1580+
1581+
process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'false';
1582+
process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'false';
1583+
process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'false';
1584+
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(false);
1585+
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(false);
1586+
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow(
1587+
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
1588+
);
1589+
1590+
process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'wrong';
1591+
process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'wrong';
1592+
process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'wrong';
1593+
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'default' })).toThrow(
1594+
'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.'
1595+
);
1596+
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toThrow(
1597+
'The CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.'
1598+
);
1599+
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow(
1600+
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
1601+
);
1602+
1603+
delete process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION;
1604+
delete process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION;
1605+
delete process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION;
1606+
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true);
1607+
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true);
1608+
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow(
1609+
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
1610+
);
1611+
});
1612+
15711613
test('getEnv("elasticApiId")', () => {
15721614
process.env.CUBEJS_DB_ELASTIC_APIKEY_ID = 'default1';
15731615
process.env.CUBEJS_DS_POSTGRES_DB_ELASTIC_APIKEY_ID = 'postgres1';

packages/cubejs-backend-shared/test/db_env_single.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,34 @@ describe('Single datasources', () => {
992992
expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toBeUndefined();
993993
});
994994

995+
test('getEnv("clickhouseUseCollation")', () => {
996+
process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'true';
997+
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true);
998+
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true);
999+
expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(true);
1000+
1001+
process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'false';
1002+
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(false);
1003+
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(false);
1004+
expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(false);
1005+
1006+
process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'wrong';
1007+
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'default' })).toThrow(
1008+
'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.'
1009+
);
1010+
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toThrow(
1011+
'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.'
1012+
);
1013+
expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow(
1014+
'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.'
1015+
);
1016+
1017+
delete process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION;
1018+
expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true);
1019+
expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true);
1020+
expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(true);
1021+
});
1022+
9951023
test('getEnv("elasticApiId")', () => {
9961024
process.env.CUBEJS_DB_ELASTIC_APIKEY_ID = 'default1';
9971025
expect(getEnv('elasticApiId', { dataSource: 'default' })).toEqual('default1');

packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,14 @@ export class ClickHouseQuery extends BaseQuery {
184184
return `${fieldAlias} ${direction}`;
185185
}
186186

187+
public getCollation() {
188+
const useCollation = getEnv('clickhouseUseCollation', { dataSource: this.dataSource });
189+
if (useCollation) {
190+
return getEnv('clickhouseSortCollation', { dataSource: this.dataSource });
191+
}
192+
return null;
193+
}
194+
187195
public override orderBy() {
188196
//
189197
// ClickHouse orders string by bytes, so we need to use COLLATE 'en' to order by string
@@ -192,12 +200,12 @@ export class ClickHouseQuery extends BaseQuery {
192200
return '';
193201
}
194202

195-
const collation = getEnv('clickhouseSortCollation');
203+
const collation = this.getCollation();
196204

197205
const orderByString = R.pipe(
198206
R.map((order) => {
199207
let orderString = this.orderHashToString(order);
200-
if (this.getFieldType(order) === 'string') && collation !== '' {
208+
if (collation && this.getFieldType(order) === 'string') {
201209
orderString = `${orderString} COLLATE '${collation}'`;
202210
}
203211
return orderString;
@@ -326,6 +334,22 @@ export class ClickHouseQuery extends BaseQuery {
326334
// ClickHouse intervals have a distinct type for each granularity
327335
delete templates.types.interval;
328336
delete templates.types.binary;
337+
338+
const collation = this.getCollation();
339+
340+
if (collation) {
341+
templates.expressions.sort = `${templates.expressions.sort}{% if data_type and data_type == 'string' %} COLLATE '${collation}'{% endif %}`;
342+
templates.expressions.order_by = `${templates.expressions.order_by}{% if data_type and data_type == 'string' %} COLLATE '${collation}'{% endif %}`;
343+
344+
const oldOrderBy = '{% if order_by %}\nORDER BY {{ order_by | map(attribute=\'expr\') | join(\', \') }}{% endif %}';
345+
346+
const newOrderBy =
347+
'{% if order_by %}\nORDER BY {% for item in order_by %}{{ item.expr }}' +
348+
`{%- if item.data_type and item.data_type == 'string' %} COLLATE '${collation}'{% endif %}` +
349+
'{%- if not loop.last %}, {% endif %}{% endfor %}{% endif %}';
350+
351+
templates.statements.select = templates.statements.select.replace(oldOrderBy, newOrderBy);
352+
}
329353
return templates;
330354
}
331355
}

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ use cubeclient::models::{V1LoadRequestQuery, V1LoadRequestQueryJoinSubquery};
2323
use datafusion::{
2424
error::{DataFusionError, Result},
2525
logical_plan::{
26-
plan::Extension, replace_col, Column, DFSchema, DFSchemaRef, Expr, GroupingSet, JoinType,
27-
LogicalPlan, UserDefinedLogicalNode,
26+
plan::Extension, replace_col, Column, DFSchema, DFSchemaRef, Expr, ExprSchemable,
27+
GroupingSet, JoinType, LogicalPlan, UserDefinedLogicalNode,
2828
},
2929
physical_plan::{aggregates::AggregateFunction, functions::BuiltinScalarFunction},
3030
scalar::ScalarValue,
@@ -797,6 +797,12 @@ impl CubeScanWrapperNode {
797797
// When generating column expression that points to literal member it would render literal and generate alias
798798
// Here it should just generate the literal
799799
// 2. It would not allow to provide aliases for expressions, instead it usually generates them
800+
801+
let data_type = expr
802+
.get_type(&node.schema)
803+
.and_then(|dt| Self::generate_sql_type(generator.clone(), dt))
804+
.unwrap_or_else(|_| "".to_string());
805+
800806
let (expr, sql) = Self::generate_sql_for_expr(
801807
plan.clone(),
802808
new_sql,
@@ -806,7 +812,11 @@ impl CubeScanWrapperNode {
806812
Arc::new(HashMap::new()),
807813
)
808814
.await?;
809-
columns.push(AliasedColumn { expr, alias });
815+
columns.push(AliasedColumn {
816+
expr,
817+
alias,
818+
data_type,
819+
});
810820
new_sql = sql;
811821
}
812822

@@ -1221,13 +1231,21 @@ impl CubeScanWrapperNode {
12211231
let join_condition = join_condition[0].expr.clone();
12221232
sql = new_sql;
12231233

1234+
let data_type = condition
1235+
.get_type(&schema)
1236+
.and_then(|dt| {
1237+
Self::generate_sql_type(generator.clone(), dt)
1238+
})
1239+
.unwrap_or_else(|_| "".to_string());
1240+
12241241
let join_sql_expression = {
12251242
// TODO this is NOT a proper way to generate member expr here
12261243
// TODO Do we even want a full-blown member expression here? or arguments + expr will be enough?
12271244
let res = Self::make_member_def(
12281245
&AliasedColumn {
12291246
expr: join_condition,
12301247
alias: "__join__alias__unused".to_string(),
1248+
data_type,
12311249
},
12321250
&ungrouped_scan_node.used_cubes,
12331251
)?;
@@ -1518,6 +1536,13 @@ impl CubeScanWrapperNode {
15181536
} else {
15191537
original_expr.clone()
15201538
};
1539+
//let data_type = expr.get_type(&schema.clone())?;
1540+
//let data_type = Self::generate_sql_type(generator.clone(), data_type.clone())?;
1541+
let data_type = expr
1542+
.get_type(&schema)
1543+
.and_then(|dt| Self::generate_sql_type(generator.clone(), dt))
1544+
.unwrap_or_else(|_| "".to_string());
1545+
15211546
let (expr_sql, new_sql_query) = Self::generate_sql_for_expr(
15221547
plan.clone(),
15231548
sql,
@@ -1535,6 +1560,7 @@ impl CubeScanWrapperNode {
15351560
aliased_columns.push(AliasedColumn {
15361561
expr: expr_sql,
15371562
alias,
1563+
data_type,
15381564
});
15391565
}
15401566
Ok((aliased_columns, sql))
@@ -2040,6 +2066,10 @@ impl CubeScanWrapperNode {
20402066
asc,
20412067
nulls_first,
20422068
} => {
2069+
let data_type = expr
2070+
.get_type(plan.schema())
2071+
.and_then(|dt| Self::generate_sql_type(sql_generator.clone(), dt))
2072+
.unwrap_or_else(|_| "".to_string());
20432073
let (expr, sql_query) = Self::generate_sql_for_expr(
20442074
plan.clone(),
20452075
sql_query,
@@ -2051,7 +2081,7 @@ impl CubeScanWrapperNode {
20512081
.await?;
20522082
let resulting_sql = sql_generator
20532083
.get_sql_templates()
2054-
.sort_expr(expr, asc, nulls_first)
2084+
.sort_expr(expr, asc, nulls_first, data_type)
20552085
.map_err(|e| {
20562086
DataFusionError::Internal(format!(
20572087
"Can't generate SQL for sort expr: {}",

rust/cubesql/cubesql/src/transport/service.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ pub struct SqlTemplates {
349349
pub struct AliasedColumn {
350350
pub expr: String,
351351
pub alias: String,
352+
pub data_type: String,
352353
}
353354

354355
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -357,6 +358,7 @@ pub struct TemplateColumn {
357358
pub alias: String,
358359
pub aliased: String,
359360
pub index: usize,
361+
pub data_type: String,
360362
}
361363

362364
impl SqlTemplates {
@@ -498,6 +500,7 @@ impl SqlTemplates {
498500
alias: c.alias.to_string(),
499501
aliased: self.alias_expr(&c.expr, &c.alias)?,
500502
index: i + 1,
503+
data_type: c.data_type,
501504
})
502505
})
503506
.collect::<Result<Vec<_>, _>>()
@@ -708,10 +711,11 @@ impl SqlTemplates {
708711
expr: String,
709712
asc: bool,
710713
nulls_first: bool,
714+
data_type: String,
711715
) -> Result<String, CubeError> {
712716
self.render_template(
713717
"expressions/sort",
714-
context! { expr => expr, asc => asc, nulls_first => nulls_first },
718+
context! { expr => expr, asc => asc, nulls_first => nulls_first, data_type => data_type },
715719
)
716720
}
717721

0 commit comments

Comments
 (0)