From 9cb6f98b05d3ff054e8fa6dced858a4649f2dcf0 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Fri, 28 Mar 2025 18:05:25 +0200 Subject: [PATCH 1/3] feat(schema-compiler)!: Set preAggregation.allowNonStrictDateRangeMatch = true by default --- packages/cubejs-backend-shared/src/env.ts | 3 +++ packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts | 6 ++++++ packages/cubejs-schema-compiler/test/unit/schema.test.ts | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index eaad63fc5d03f..88359d4ec2c0e 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -226,6 +226,9 @@ const variables: Record any> = { transpilationWorkerThreads: () => get('CUBEJS_TRANSPILATION_WORKER_THREADS') .default('false') .asBoolStrict(), + allowNonStrictDateRangeMatching: () => get('CUBEJS_PRE_AGGREGATIONS_ALLOW_NON_STRICT_DATE_RANGE_MATCH') + .default('true') + .asBoolStrict(), transpilationWorkerThreadsCount: () => get('CUBEJS_TRANSPILATION_WORKER_THREADS_COUNT') .default('0') .asInt(), diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts index dffa59b9e017c..bf1ee3a74681b 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts @@ -248,6 +248,12 @@ export class CubeSymbols { preAggregation.type = 'rollup'; } + if (preAggregation.allowNonStrictDateRangeMatch === undefined && + ['originalSql', 'rollupJoin', 'rollup'].includes(preAggregation.type) && + (preAggregation.timeDimension || preAggregation.timeDimensions)) { + preAggregation.allowNonStrictDateRangeMatch = getEnv('allowNonStrictDateRangeMatching'); + } + if (preAggregation.scheduledRefresh === undefined && preAggregation.type !== 'rollupJoin' && preAggregation.type !== 'rollupLambda') { preAggregation.scheduledRefresh = getEnv('scheduledRefreshDefault'); } diff --git a/packages/cubejs-schema-compiler/test/unit/schema.test.ts b/packages/cubejs-schema-compiler/test/unit/schema.test.ts index aab99d32ccf97..66579d665f9c1 100644 --- a/packages/cubejs-schema-compiler/test/unit/schema.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/schema.test.ts @@ -69,6 +69,7 @@ describe('Schema Testing', () => { refreshRangeEnd: { sql: expect.any(Function), }, + allowNonStrictDateRangeMatch: true, }, countCreatedAt: { external: true, @@ -102,6 +103,7 @@ describe('Schema Testing', () => { refreshRangeEnd: { sql: expect.any(Function), }, + allowNonStrictDateRangeMatch: true, } }); }); @@ -128,6 +130,7 @@ describe('Schema Testing', () => { refreshRangeEnd: { sql: expect.any(Function), }, + allowNonStrictDateRangeMatch: true, }, countCreatedAt: { // because preview @@ -162,6 +165,7 @@ describe('Schema Testing', () => { refreshRangeEnd: { sql: expect.any(Function), }, + allowNonStrictDateRangeMatch: true, } }); }); From d57abc9b19577a8043de6d2cdee02844f86f3b58 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Fri, 28 Mar 2025 19:31:44 +0200 Subject: [PATCH 2/3] fix tests --- .../test/integration/postgres/pre-aggregations.test.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts index b3d191d8f5066..160c5037a2f74 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts @@ -233,7 +233,8 @@ describe('PreAggregations', () => { countCustomGranularity: { measures: [count], timeDimension: createdAt, - granularity: 'hourTenMinOffset' + granularity: 'hourTenMinOffset', + allowNonStrictDateRangeMatch: false }, sourceAndIdRollup: { measures: [count], From 9fea92f742e929c553cc17991a1966d599ec0be2 Mon Sep 17 00:00:00 2001 From: Igor Lukanin Date: Mon, 31 Mar 2025 13:03:13 +0200 Subject: [PATCH 3/3] Update docs --- .../caching/matching-pre-aggregations.mdx | 7 +++--- .../visualization-tools/superset.mdx | 9 ------- .../configuration/environment-variables.mdx | 13 ++++++++-- .../reference/data-model/pre-aggregations.mdx | 24 ++++++++++++------- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/docs/pages/product/caching/matching-pre-aggregations.mdx b/docs/pages/product/caching/matching-pre-aggregations.mdx index 9916c05b63dd0..857978358d3a2 100644 --- a/docs/pages/product/caching/matching-pre-aggregations.mdx +++ b/docs/pages/product/caching/matching-pre-aggregations.mdx @@ -77,9 +77,10 @@ the values should be the start and end days of the month, i.e., `['2020-01-01T00:00:00.000', '2020-01-31T23:59:59.999']`; when the granularity is `day`, the values should be the start and end hours of the day, i.e., `['2020-01-01T00:00:00.000', '2020-01-01T23:59:59.999']`. Date ranges are -inclusive, and the minimum granularity is `second`. Use the -[`allow_non_strict_date_range_match`][ref-non-strict-date-range-match] to allow -a pre-aggregation to match a non-strict date range anyway. +inclusive, and the minimum granularity is `second`. By default, this is ensured +via the [`allow_non_strict_date_range_match`][ref-non-strict-date-range-match] +parameter of pre-aggregations: it allows to match _non-strict date ranges_ and is +set to `true` by default. - **The time zone in the query must match the time zone of a pre-aggregation.** You can configure a list of time zones that pre-aggregations will be built for using the [`scheduled_refresh_time_zones`][ref-conf-scheduled-refresh-time-zones] diff --git a/docs/pages/product/configuration/visualization-tools/superset.mdx b/docs/pages/product/configuration/visualization-tools/superset.mdx index 8815134c3e9e7..d0d06a477704c 100644 --- a/docs/pages/product/configuration/visualization-tools/superset.mdx +++ b/docs/pages/product/configuration/visualization-tools/superset.mdx @@ -117,15 +117,6 @@ The `COUNT(*)` aggregate function is being mapped to a measure of type [count](/reference/data-model/types-and-formats#count) in Cube's **Orders** data model file. -## Additional configuration - -### Pre-aggregations - -To allow queries from Superset to match pre-aggregations in Cube, [the -`allow_non_strict_date_range_match` -property][ref-schema-ref-preagg-allownonstrict] must be set to `true` in the -pre-aggregation definition. This is because Superset uses loose date ranges when -generating SQL queries. [ref-getting-started]: /product/getting-started/cloud [ref-schema-ref-preagg-allownonstrict]: diff --git a/docs/pages/reference/configuration/environment-variables.mdx b/docs/pages/reference/configuration/environment-variables.mdx index 4e7d712972f1d..5bb4da70027bf 100644 --- a/docs/pages/reference/configuration/environment-variables.mdx +++ b/docs/pages/reference/configuration/environment-variables.mdx @@ -1057,6 +1057,16 @@ to use for storing pre-aggregations. It can be also set using the [`pre_aggregations_schema` configuration option](/reference/configuration/config#pre_aggregations_schema). +## `CUBEJS_PRE_AGGREGATIONS_ALLOW_NON_STRICT_DATE_RANGE_MATCH` + +If `true`, allows non-strict date range matching for pre-aggregations by default. +This can be overridden for individual pre-aggregations using the +[`allow_non_strict_date_range_match`][ref-schema-ref-preagg-allownonstrict] parameter. + +| Possible Values | Default in Development | Default in Production | +| --------------- | ---------------------- | --------------------- | +| `true`, `false` | `true` | `true` | + ## `CUBEJS_REFRESH_WORKER` If `true`, this instance of Cube will **only** refresh pre-aggregations. @@ -1078,8 +1088,6 @@ mode](/product/caching/using-pre-aggregations#rollup-only-mode) for details. It can be also set using the [`orchestrator_options.rollupOnlyMode` configuration option](/reference/configuration/config#orchestrator_options). - - ## `CUBEJS_SCHEDULED_REFRESH_TIMEZONES` A comma-separated [list of timezones to schedule refreshes @@ -1688,3 +1696,4 @@ The port for a Cube deployment to listen to API connections on. [ref-sql-api-streaming]: /product/apis-integrations/sql-api#streaming [ref-row-limit]: /product/apis-integrations/queries#row-limit [mysql-server-tz-support]: https://dev.mysql.com/doc/refman/8.4/en/time-zone-support.html +[ref-schema-ref-preagg-allownonstrict]: /reference/data-model/pre-aggregations#allow_non_strict_date_range_match diff --git a/docs/pages/reference/data-model/pre-aggregations.mdx b/docs/pages/reference/data-model/pre-aggregations.mdx index eb18db356addd..47518e1f54743 100644 --- a/docs/pages/reference/data-model/pre-aggregations.mdx +++ b/docs/pages/reference/data-model/pre-aggregations.mdx @@ -1110,14 +1110,20 @@ refreshes. ### `allow_non_strict_date_range_match` -The `allow_non_strict_date_range_match` property can be used to allow queries to -match this pre-aggregation when using non-strict date ranges. -By default Cube checks if requested date range exactly matches pre-aggregation granularity. -For example if you're requesting half of a day or your date range filter is just one millisecond off for daily granularity pre-aggregation Cube would not use such a pre-aggregation by default. -Use this flag to lift this strict check. -This is useful -when using a visualization tools such as Tableau or [Apache -Superset][ref-config-downstream-superset] that use loose date ranges by default. +The `allow_non_strict_date_range_match` parameter is used to allow queries to match a +pre-aggregation even if a query contains a _non-strict date range_. It is set to `true` +by default via the [`CUBEJS_PRE_AGGREGATIONS_ALLOW_NON_STRICT_DATE_RANGE_MATCH`][ref-env-allow-non-strict] +environment variable. + +If this flag is set to `false`, Cube would check if requested date range exactly matches +pre-aggregation granularity. For example, if you're requesting half of a day or your date +range filter is just one millisecond off for a pre-aggregation with the daily granularity, +Cube would not use such a pre-aggregation. + +With this flag set to `true`, that strict check is lifted. It allows queries from BI tools +to still match pre-aggregations at the cost of a slight potential data discrepancy. +This is convenient when using Cube with visualization tools such as [Tableau][ref-config-downstream-tableau] +or [Apache Superset][ref-config-downstream-superset] that use loose date ranges. @@ -1767,3 +1773,5 @@ cubes: [ref-pre-aggs]: /product/caching/using-pre-aggregations [ref-ref-cubes]: /reference/data-model/cube [ref-custom-granularity]: /reference/data-model/dimensions#granularities +[ref-env-allow-non-strict]: /reference/configuration/environment-variables#cubejs-pre-aggregations-allow-non-strict-date-range-match +[ref-config-downstream-tableau]: /product/configuration/visualization-tools/tableau