Skip to content

Commit 765cd77

Browse files
authored
feat(schema-compiler): Support time series queries in MySQL dialect in Tesseract (#10078)
* fix formatInterval() as MySQL doesn't support milliseconds * more types * add ilike templates for tesseract * add time_series_* templates * update snapshots * add mysql + tesseract tests in CI * skip some tests for tesseract * cast as TIMESTAMP * skip some tests for tesseract * fix templates * skip some tests for tesseract * fix templates * update snapshots * fix skip tests
1 parent 8a67db4 commit 765cd77

File tree

4 files changed

+13690
-5295
lines changed

4 files changed

+13690
-5295
lines changed

.github/workflows/drivers-tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ jobs:
285285
use_tesseract_sql_planner: true
286286
- database: databricks-jdbc
287287
use_tesseract_sql_planner: true
288+
- database: mysql
289+
use_tesseract_sql_planner: true
288290
fail-fast: false
289291

290292
steps:

packages/cubejs-schema-compiler/src/adapter/MysqlQuery.ts

Lines changed: 74 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,21 @@ import { getEnv, parseSqlInterval } from '@cubejs-backend/shared';
33
import { BaseQuery } from './BaseQuery';
44
import { BaseFilter } from './BaseFilter';
55
import { UserError } from '../compiler/UserError';
6+
import { BaseTimeDimension } from './BaseTimeDimension';
67

78
const GRANULARITY_TO_INTERVAL = {
8-
day: (date) => `DATE_FORMAT(${date}, '%Y-%m-%dT00:00:00.000')`,
9-
week: (date) => `DATE_FORMAT(DATE_ADD('1900-01-01', INTERVAL TIMESTAMPDIFF(WEEK, '1900-01-01', ${date}) WEEK), '%Y-%m-%dT00:00:00.000')`,
10-
hour: (date) => `DATE_FORMAT(${date}, '%Y-%m-%dT%H:00:00.000')`,
11-
minute: (date) => `DATE_FORMAT(${date}, '%Y-%m-%dT%H:%i:00.000')`,
12-
second: (date) => `DATE_FORMAT(${date}, '%Y-%m-%dT%H:%i:%S.000')`,
13-
month: (date) => `DATE_FORMAT(${date}, '%Y-%m-01T00:00:00.000')`,
14-
quarter: (date) => `DATE_ADD('1900-01-01', INTERVAL TIMESTAMPDIFF(QUARTER, '1900-01-01', ${date}) QUARTER)`,
15-
year: (date) => `DATE_FORMAT(${date}, '%Y-01-01T00:00:00.000')`
9+
day: (date: string) => `DATE_FORMAT(${date}, '%Y-%m-%dT00:00:00.000')`,
10+
week: (date: string) => `DATE_FORMAT(DATE_ADD('1900-01-01', INTERVAL TIMESTAMPDIFF(WEEK, '1900-01-01', ${date}) WEEK), '%Y-%m-%dT00:00:00.000')`,
11+
hour: (date: string) => `DATE_FORMAT(${date}, '%Y-%m-%dT%H:00:00.000')`,
12+
minute: (date: string) => `DATE_FORMAT(${date}, '%Y-%m-%dT%H:%i:00.000')`,
13+
second: (date: string) => `DATE_FORMAT(${date}, '%Y-%m-%dT%H:%i:%S.000')`,
14+
month: (date: string) => `DATE_FORMAT(${date}, '%Y-%m-01T00:00:00.000')`,
15+
quarter: (date: string) => `DATE_ADD('1900-01-01', INTERVAL TIMESTAMPDIFF(QUARTER, '1900-01-01', ${date}) QUARTER)`,
16+
year: (date: string) => `DATE_FORMAT(${date}, '%Y-01-01T00:00:00.000')`
1617
};
1718

1819
class MysqlFilter extends BaseFilter {
19-
public likeIgnoreCase(column, not, param, type) {
20+
public likeIgnoreCase(column: string, not: boolean, param, type: string) {
2021
const p = (!type || type === 'contains' || type === 'ends') ? '%' : '';
2122
const s = (!type || type === 'contains' || type === 'starts') ? '%' : '';
2223
return `${column}${not ? ' NOT' : ''} LIKE CONCAT('${p}', ${this.allocateParam(param)}, '${s}')`;
@@ -125,44 +126,59 @@ export class MysqlQuery extends BaseQuery {
125126
return `'${intervalParsed.hour}:${intervalParsed.minute}:${intervalParsed.second}' HOUR_SECOND`;
126127
} else if (intervalParsed.minute && intervalParsed.second && intKeys === 2) {
127128
return `'${intervalParsed.minute}:${intervalParsed.second}' MINUTE_SECOND`;
129+
} else if (intervalParsed.hour && intKeys === 1) {
130+
return `${intervalParsed.hour} HOUR`;
131+
} else if (intervalParsed.minute && intKeys === 1) {
132+
return `${intervalParsed.minute} MINUTE`;
133+
} else if (intervalParsed.second && intKeys === 1) {
134+
return `${intervalParsed.second} SECOND`;
135+
} else if (intervalParsed.millisecond && intKeys === 1) {
136+
// MySQL doesn't support MILLISECOND, use MICROSECOND instead (1ms = 1000μs)
137+
return `${intervalParsed.millisecond * 1000} MICROSECOND`;
128138
}
129139

130-
// No need to support microseconds.
131-
132140
throw new Error(`Cannot transform interval expression "${interval}" to MySQL dialect`);
133141
}
134142

135-
public escapeColumnName(name) {
143+
public escapeColumnName(name: string): string {
136144
return `\`${name}\``;
137145
}
138146

139-
public seriesSql(timeDimension) {
147+
public seriesSql(timeDimension: BaseTimeDimension): string {
140148
const values = timeDimension.timeSeries().map(
141149
([from, to]) => `select '${from}' f, '${to}' t`
142150
).join(' UNION ALL ');
143151
return `SELECT TIMESTAMP(dates.f) date_from, TIMESTAMP(dates.t) date_to FROM (${values}) AS dates`;
144152
}
145153

146-
public concatStringsSql(strings) {
154+
public concatStringsSql(strings: string[]): string {
147155
return `CONCAT(${strings.join(', ')})`;
148156
}
149157

150-
public unixTimestampSql() {
158+
public unixTimestampSql(): string {
151159
return 'UNIX_TIMESTAMP()';
152160
}
153161

154-
public wrapSegmentForDimensionSelect(sql) {
162+
public wrapSegmentForDimensionSelect(sql: string): string {
155163
return `IF(${sql}, 1, 0)`;
156164
}
157165

158-
public preAggregationTableName(cube, preAggregationName, skipSchema) {
166+
public preAggregationTableName(cube: string, preAggregationName: string, skipSchema: boolean): string {
159167
const name = super.preAggregationTableName(cube, preAggregationName, skipSchema);
160168
if (name.length > 64) {
161169
throw new UserError(`MySQL can not work with table names that longer than 64 symbols. Consider using the 'sqlAlias' attribute in your cube and in your pre-aggregation definition for ${name}.`);
162170
}
163171
return name;
164172
}
165173

174+
public supportGeneratedSeriesForCustomTd(): boolean {
175+
return true;
176+
}
177+
178+
public intervalString(interval: string): string {
179+
return this.formatInterval(interval);
180+
}
181+
166182
public sqlTemplates() {
167183
const templates = super.sqlTemplates();
168184
// PERCENTILE_CONT works but requires PARTITION BY
@@ -172,11 +188,51 @@ export class MysqlQuery extends BaseQuery {
172188
// NOTE: this template contains a comma; two order expressions are being generated
173189
templates.expressions.sort = '{{ expr }} IS NULL {% if nulls_first %}DESC{% else %}ASC{% endif %}, {{ expr }} {% if asc %}ASC{% else %}DESC{% endif %}';
174190
delete templates.expressions.ilike;
175-
templates.types.string = 'VARCHAR';
191+
templates.types.string = 'CHAR';
176192
templates.types.boolean = 'TINYINT';
177193
templates.types.timestamp = 'DATETIME';
178194
delete templates.types.interval;
179195
templates.types.binary = 'BLOB';
196+
197+
templates.expressions.concat_strings = 'CONCAT({{ strings | join(\',\' ) }})';
198+
199+
templates.filters.like_pattern = 'CONCAT({% if start_wild %}\'%\'{% else %}\'\'{% endif %}, LOWER({{ value }}), {% if end_wild %}\'%\'{% else %}\'\'{% endif %})';
200+
templates.tesseract.ilike = 'LOWER({{ expr }}) {% if negated %}NOT {% endif %}LIKE {{ pattern }}';
201+
202+
templates.statements.time_series_select = 'SELECT TIMESTAMP(dates.f) date_from, TIMESTAMP(dates.t) date_to \n' +
203+
'FROM (\n' +
204+
'{% for time_item in seria %}' +
205+
' select \'{{ time_item[0] }}\' f, \'{{ time_item[1] }}\' t \n' +
206+
'{% if not loop.last %} UNION ALL\n{% endif %}' +
207+
'{% endfor %}' +
208+
') AS dates';
209+
210+
templates.statements.generated_time_series_select =
211+
'WITH RECURSIVE date_series AS (\n' +
212+
' SELECT TIMESTAMP({{ start }}) AS date_from\n' +
213+
' UNION ALL\n' +
214+
' SELECT DATE_ADD(date_from, INTERVAL {{ granularity }})\n' +
215+
' FROM date_series\n' +
216+
' WHERE DATE_ADD(date_from, INTERVAL {{ granularity }}) <= TIMESTAMP({{ end }})\n' +
217+
')\n' +
218+
'SELECT CAST(date_from AS DATETIME) AS date_from,\n' +
219+
' CAST(DATE_SUB(DATE_ADD(date_from, INTERVAL {{ granularity }}), INTERVAL 1000 MICROSECOND) AS DATETIME) AS date_to\n' +
220+
'FROM date_series';
221+
222+
templates.statements.generated_time_series_with_cte_range_source =
223+
'WITH RECURSIVE date_series AS (\n' +
224+
' SELECT {{ range_source }}.{{ min_name }} AS date_from,\n' +
225+
' {{ range_source }}.{{ max_name }} AS max_date\n' +
226+
' FROM {{ range_source }}\n' +
227+
' UNION ALL\n' +
228+
' SELECT DATE_ADD(date_from, INTERVAL {{ granularity }}), max_date\n' +
229+
' FROM date_series\n' +
230+
' WHERE DATE_ADD(date_from, INTERVAL {{ granularity }}) <= max_date\n' +
231+
')\n' +
232+
'SELECT CAST(date_from AS DATETIME) AS date_from,\n' +
233+
' CAST(DATE_SUB(DATE_ADD(date_from, INTERVAL {{ granularity }}), INTERVAL 1000 MICROSECOND) AS DATETIME) AS date_to\n' +
234+
'FROM date_series';
235+
180236
return templates;
181237
}
182238
}

packages/cubejs-testing-drivers/fixtures/mysql.json

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,50 @@
174174
"SQL API: Extended nested Rollup over asterisk",
175175
"SQL API: SQL push down push to cube quoted alias",
176176
"SQL API: Date/time comparison with date_trunc with SQL push down",
177+
"SQL API: Rolling Window YTD (year + month + day + date_trunc equal)",
178+
"SQL API: Rolling Window YTD (year + month + day + date_trunc IN)"
179+
],
180+
"tesseractSkip": [
181+
"querying custom granularities ECommerce: count by three_months_by_march + no dimension",
182+
"querying custom granularities ECommerce: count by three_months_by_march + dimension",
183+
"querying BigECommerce: rolling window YTD (month + week)",
184+
"querying BigECommerce: rolling window YTD (month + week + no gran)",
185+
177186

178187
"---------------------------------------",
179-
"Error during rewrite: Can't detect Cube query and it may be not supported yet.",
188+
"SKIPPED SQL API (Need work)",
180189
"---------------------------------------",
190+
"SQL API: reuse params",
191+
"SQL API: Nested Rollup",
192+
"SQL API: Nested Rollup with aliases",
181193
"SQL API: Rolling Window YTD (year + month + day + date_trunc equal)",
182-
"SQL API: Rolling Window YTD (year + month + day + date_trunc IN)"
194+
"SQL API: Rolling Window YTD (year + month + day + date_trunc IN)",
195+
"SQL API: SQL push down push to cube quoted alias",
196+
"SQL API: Date/time comparison with date_trunc with SQL push down",
197+
198+
"---- Different results comparing to baseQuery version. Need to investigate ----",
199+
"SQL API: Timeshift measure from cube",
200+
"querying ECommerce: dimensions",
201+
"querying ECommerce: dimensions + order",
202+
"querying ECommerce: dimensions + limit",
203+
"querying ECommerce: dimensions + total",
204+
"querying ECommerce: dimensions + order + limit + total",
205+
"querying ECommerce: dimensions + order + total + offset",
206+
"querying ECommerce: dimensions + order + limit + total + offset",
207+
"filtering ECommerce: contains dimensions, first",
208+
"filtering ECommerce: contains dimensions, second",
209+
"filtering ECommerce: startsWith + dimensions, first",
210+
"filtering ECommerce: startsWith + dimensions, second",
211+
"filtering ECommerce: endsWith + dimensions, first",
212+
"filtering ECommerce: endsWith + dimensions, second",
213+
"querying BigECommerce: rolling window YTD without date range",
214+
"querying BigECommerce: rolling window YTD (month + week + day + no gran)",
215+
"querying BigECommerce: rolling window YTD (month + week + day)",
216+
"querying BigECommerce: rolling window YTD (month)",
217+
"querying BigECommerce: rolling window by 2 month without date range",
218+
"querying BigECommerce: rolling window by 2 month",
219+
"querying BigECommerce: rolling window by 2 week",
220+
"querying BigECommerce: rolling window by 2 day without date range",
221+
"querying BigECommerce: rolling window by 2 day"
183222
]
184223
}

0 commit comments

Comments
 (0)