From ad251c27e24423a8fa5aba4d218d51c48faaeec2 Mon Sep 17 00:00:00 2001 From: Alexandr Romanenko Date: Fri, 16 May 2025 18:56:52 +0200 Subject: [PATCH 1/5] feat(tesseract): Initial BigQuery support --- .../src/adapter/BaseQuery.js | 24 ++++++++--- .../src/adapter/BigqueryQuery.ts | 11 +++++ .../src/adapter/CubeStoreQuery.ts | 5 +++ .../fixtures/bigquery.json | 3 +- .../fixtures/postgres.json | 3 +- .../src/cube_bridge/base_query_options.rs | 2 + .../src/cube_bridge/base_tools.rs | 4 ++ .../src/physical_plan_builder/builder.rs | 23 ++++++++-- .../cubesqlplanner/src/plan/builder/select.rs | 12 ++++++ .../cubesqlplanner/src/plan/expression.rs | 2 + .../cubesqlplanner/src/plan/join.rs | 4 +- .../cubesqlplanner/src/plan/time_series.rs | 8 ++-- .../cubesqlplanner/src/planner/base_query.rs | 5 +-- .../src/planner/filter/base_filter.rs | 43 +++++++++++-------- .../planners/dimension_subquery_planner.rs | 1 + .../multi_stage/member_query_planner.rs | 2 + .../multi_stage/rolling_window_planner.rs | 6 +-- .../src/planner/query_properties.rs | 22 ++++++++-- .../cubesqlplanner/src/planner/query_tools.rs | 31 +++---------- .../sql_evaluator/sql_nodes/auto_prefix.rs | 26 +++++++++-- .../sql_evaluator/sql_nodes/evaluate_sql.rs | 15 ++++--- .../final_pre_aggregation_measure.rs | 11 ++--- .../sql_nodes/render_references.rs | 11 ++--- .../sql_evaluator/sql_nodes/time_dimension.rs | 4 +- .../src/planner/sql_templates/filter.rs | 22 +--------- .../src/planner/sql_templates/plan.rs | 31 ++++--------- 26 files changed, 194 insertions(+), 137 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index c7617ddf8e9b2..1141cd9a73efa 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -326,7 +326,7 @@ export class BaseQuery { this.allFilters = this.timeDimensions.concat(this.segments).concat(this.filters); this.useNativeSqlPlanner = this.options.useNativeSqlPlanner ?? getEnv('nativeSqlPlanner'); this.canUseNativeSqlPlannerPreAggregation = false; - if (this.useNativeSqlPlanner) { + if (this.useNativeSqlPlanner && !this.neverUseSqlPlannerPreaggregation()) { const hasMultiStageMeasures = this.fullKeyQueryAggregateMeasures({ hasMultipliedForPreAggregation: true }).multiStageMembers.length > 0; this.canUseNativeSqlPlannerPreAggregation = hasMultiStageMeasures; } @@ -349,6 +349,11 @@ export class BaseQuery { this.initUngrouped(); } + // Temporary workaround to avoid checking for multistage in CubeStoreQuery, since that could lead to errors when HLL functions are present in the query. + neverUseSqlPlannerPreaggregation() { + return false; + } + prebuildJoin() { try { // TODO allJoinHints should contain join hints form pre-agg @@ -747,7 +752,8 @@ export class BaseQuery { } } - return this.buildSqlAndParamsRust(exportAnnotatedSql); + const res = this.buildSqlAndParamsRust(exportAnnotatedSql); + return res; } if (!this.options.preAggregationQuery && !this.options.disableExternalPreAggregations && this.externalQueryClass) { @@ -756,7 +762,7 @@ export class BaseQuery { } } - return this.compilers.compiler.withQuery( + const res = this.compilers.compiler.withQuery( this, () => this.cacheValue( ['buildSqlAndParams', exportAnnotatedSql], @@ -768,6 +774,7 @@ export class BaseQuery { { cache: this.queryCache } ) ); + return res; } buildSqlAndParamsRust(exportAnnotatedSql) { @@ -775,7 +782,6 @@ export class BaseQuery { R.map((hash) => ((!hash || !hash.id) ? null : hash)), R.reject(R.isNil), )(this.options.order); - const queryParams = { measures: this.options.measures, dimensions: this.options.dimensions, @@ -792,7 +798,8 @@ export class BaseQuery { baseTools: this, ungrouped: this.options.ungrouped, exportAnnotatedSql: exportAnnotatedSql === true, - preAggregationQuery: this.options.preAggregationQuery + preAggregationQuery: this.options.preAggregationQuery, + totalQuery: this.options.totalQuery, }; const buildResult = nativeBuildSqlAndParams(queryParams); @@ -871,12 +878,12 @@ export class BaseQuery { // FIXME helper for native generator, maybe should be moved entirely to rust generateTimeSeries(granularity, dateRange) { - return timeSeriesBase(granularity, dateRange); + return timeSeriesBase(granularity, dateRange, { timestampPrecision: this.timestampPrecision() }); } // FIXME helper for native generator, maybe should be moved entirely to rust generateCustomTimeSeries(granularityInterval, dateRange, origin) { - return timeSeriesFromCustomInterval(granularityInterval, dateRange, moment(origin), { timestampPrecision: 3 }); + return timeSeriesFromCustomInterval(granularityInterval, dateRange, moment(origin), { timestampPrecision: this.timestampPrecision() }); } getPreAggregationByName(cube, preAggregationName) { @@ -3827,6 +3834,9 @@ export class BaseQuery { like_escape: '{{ like_expr }} ESCAPE {{ escape_char }}', concat_strings: '{{ strings | join(\' || \' ) }}', }, + tesseract: { + ilike: '{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}', // May require different overloads in Tesseract than the ilike from expressions used in SQLAPI. + }, filters: { equals: '{{ column }} = {{ value }}{{ is_null_check }}', not_equals: '{{ column }} <> {{ value }}{{ is_null_check }}', diff --git a/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts b/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts index db7e0e4056f2c..27d6816263e2c 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts @@ -261,13 +261,24 @@ export class BigqueryQuery extends BaseQuery { templates.expressions.timestamp_literal = 'TIMESTAMP(\'{{ value }}\')'; delete templates.expressions.ilike; delete templates.expressions.like_escape; + templates.filters.like_pattern = 'CONCAT({% if start_wild %}\'%\'{% else %}\'\'{% endif %}, LOWER({{ value }}), {% if end_wild %}\'%\'{% else %}\'\'{% endif %})'; + templates.tesseract.ilike = 'LOWER({{ expr }}) {% if negated %}NOT {% endif %} LIKE {{ pattern }}'; templates.types.boolean = 'BOOL'; templates.types.float = 'FLOAT64'; templates.types.double = 'FLOAT64'; templates.types.decimal = 'BIGDECIMAL({{ precision }},{{ scale }})'; templates.types.binary = 'BYTES'; + templates.expressions.cast_to_string = 'CAST({{ expr }} AS STRING)'; templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM'; templates.join_types.full = 'FULL'; + templates.statements.time_series_select = 'SELECT DATETIME(TIMESTAMP(f)) date_from, DATETIME(TIMESTAMP(t)) date_to \n' + + 'FROM (\n' + + '{% for time_item in seria %}' + + ' select \'{{ time_item[0] }}\' f, \'{{ time_item[1] }}\' t \n' + + '{% if not loop.last %} UNION ALL\n{% endif %}' + + '{% endfor %}' + + ') AS dates'; + return templates; } } diff --git a/packages/cubejs-schema-compiler/src/adapter/CubeStoreQuery.ts b/packages/cubejs-schema-compiler/src/adapter/CubeStoreQuery.ts index c64ef3fd0984c..5717969612fd7 100644 --- a/packages/cubejs-schema-compiler/src/adapter/CubeStoreQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/CubeStoreQuery.ts @@ -68,6 +68,11 @@ export class CubeStoreQuery extends BaseQuery { return `date_trunc('${GRANULARITY_TO_INTERVAL[granularity]}', ${dimension})`; } + // Temporary workaround to avoid checking for multistage in CubeStoreQuery, since that could lead to errors when HLL functions are present in the query. + public neverUseSqlPlannerPreaggregation() { + return true; + } + /** * Returns sql for source expression floored to timestamps aligned with * intervals relative to origin timestamp point. diff --git a/packages/cubejs-testing-drivers/fixtures/bigquery.json b/packages/cubejs-testing-drivers/fixtures/bigquery.json index 9af4155bb8da8..d762448755f00 100644 --- a/packages/cubejs-testing-drivers/fixtures/bigquery.json +++ b/packages/cubejs-testing-drivers/fixtures/bigquery.json @@ -17,7 +17,8 @@ "CUBESQL_SQL_PUSH_DOWN": "true", "CUBEJS_DB_EXPORT_BUCKET": "cube-open-source-export-bucket", - "CUBEJS_DB_EXPORT_BUCKET_TYPE": "gcp" + "CUBEJS_DB_EXPORT_BUCKET_TYPE": "gcp", + "_CUBEJS_TESSERACT_SQL_PLANNER": "true" }, "ports" : ["4000", "5656"] }, diff --git a/packages/cubejs-testing-drivers/fixtures/postgres.json b/packages/cubejs-testing-drivers/fixtures/postgres.json index df16d61ecc11f..4427f0f8f5c99 100644 --- a/packages/cubejs-testing-drivers/fixtures/postgres.json +++ b/packages/cubejs-testing-drivers/fixtures/postgres.json @@ -12,7 +12,8 @@ "CUBEJS_PG_SQL_PORT": "5656", "CUBEJS_SQL_USER": "admin", "CUBEJS_SQL_PASSWORD": "admin_password", - "CUBESQL_SQL_PUSH_DOWN": "true" + "CUBESQL_SQL_PUSH_DOWN": "true", + "_CUBEJS_TESSERACT_SQL_PLANNER": "true" }, "depends_on": ["data"], "links": ["data"], diff --git a/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_query_options.rs b/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_query_options.rs index da4ebe8da8c6b..94687ba9d1971 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_query_options.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_query_options.rs @@ -63,6 +63,8 @@ pub struct BaseQueryOptionsStatic { pub export_annotated_sql: bool, #[serde(rename = "preAggregationQuery")] pub pre_aggregation_query: Option, + #[serde(rename = "totalQuery")] + pub total_query: Option, } #[nativebridge::native_bridge(BaseQueryOptionsStatic)] diff --git a/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_tools.rs b/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_tools.rs index 3bfdb34a29907..99a5463bca518 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_tools.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_tools.rs @@ -34,6 +34,8 @@ pub trait BaseTools { used_filters: Option>, ) -> Result, CubeError>; fn timestamp_precision(&self) -> Result; + fn time_stamp_cast(&self, field: String) -> Result; //TODO move to templates + fn date_time_cast(&self, field: String) -> Result; //TODO move to templates fn in_db_time_zone(&self, date: String) -> Result; fn generate_time_series( &self, @@ -47,6 +49,8 @@ pub trait BaseTools { origin: String, ) -> Result>, CubeError>; fn get_allocated_params(&self) -> Result, CubeError>; + fn subtract_interval(&self, date: String, interval: String) -> Result; + fn add_interval(&self, date: String, interval: String) -> Result; fn all_cube_members(&self, path: String) -> Result, CubeError>; //===== TODO Move to templates fn hll_init(&self, sql: String) -> Result; diff --git a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/builder.rs b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/builder.rs index 695f1f1773131..efb56be1ff25c 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/builder.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/builder.rs @@ -16,6 +16,8 @@ use itertools::Itertools; use std::collections::HashMap; use std::collections::HashSet; use std::rc::Rc; +const TOTAL_COUNT: &'static str = "total_count"; +const ORIGINAL_QUERY: &'static str = "original_query"; #[derive(Clone, Debug)] struct PhysicalPlanBuilderContext { @@ -56,7 +58,7 @@ pub struct PhysicalPlanBuilder { impl PhysicalPlanBuilder { pub fn new(query_tools: Rc) -> Self { - let plan_sql_templates = PlanSqlTemplates::new(query_tools.templates_render()); + let plan_sql_templates = query_tools.plan_sql_templates(); Self { query_tools, plan_sql_templates, @@ -67,10 +69,25 @@ impl PhysicalPlanBuilder { &self, logical_plan: Rc, original_sql_pre_aggregations: HashMap, + total_query: bool, ) -> Result, CubeError> { let mut context = PhysicalPlanBuilderContext::default(); context.original_sql_pre_aggregations = original_sql_pre_aggregations; - self.build_impl(logical_plan, &context) + let query = self.build_impl(logical_plan, &context)?; + let query = if total_query { + self.build_total_count(query, &context)? + } else { + query + }; + Ok(query) + } + + fn build_total_count(&self, source: Rc, context: &PhysicalPlanBuilderContext) -> Result, CubeError> { + fn build_total_count( + &self, + source: Rc