cube-js
diff --git a/‎.github/actions/integration/dremio.sh‎
Lines changed: 10 additions & 0 deletions b/‎.github/actions/integration/dremio.sh‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎.github/workflows/push.yml‎
Lines changed: 16 additions & 1 deletion b/‎.github/workflows/push.yml‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 33 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎docs/pages/product/apis-integrations/ai-api.mdx‎
Lines changed: 38 additions & 0 deletions b/‎docs/pages/product/apis-integrations/ai-api.mdx‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎docs/pages/product/auth/context.mdx‎
Lines changed: 1 addition & 1 deletion b/‎docs/pages/product/auth/context.mdx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/pages/product/caching.mdx‎
Lines changed: 17 additions & 12 deletions b/‎docs/pages/product/caching.mdx‎
Lines changed: 17 additions & 12 deletions
diff --git a/‎docs/pages/product/workspace/ai-assistant.mdx‎
Lines changed: 6 additions & 0 deletions b/‎docs/pages/product/workspace/ai-assistant.mdx‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/pages/reference/configuration/environment-variables.mdx‎
Lines changed: 2 additions & 2 deletions b/‎docs/pages/reference/configuration/environment-variables.mdx‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/pages/reference/data-model/joins.mdx‎
Lines changed: 113 additions & 0 deletions b/‎docs/pages/reference/data-model/joins.mdx‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎docs/pages/reference/data-model/pre-aggregations.mdx‎
Lines changed: 1 addition & 1 deletion b/‎docs/pages/reference/data-model/pre-aggregations.mdx‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,10 @@
+#!/bin/bash
+set -eo pipefail
+
+# Debug log for test containers
+export DEBUG=testcontainers
+
+echo "::group::Dremio [cloud]"
+yarn lerna run --concurrency 1 --stream --no-prefix integration:dremio
+
+echo "::endgroup::"
@@ -327,6 +327,7 @@ jobs:
     env:
       CLOUD_DATABASES: >
         firebolt
+        dremio
       # Athena (just to check for secrets availability)
       DRIVERS_TESTS_ATHENA_CUBEJS_AWS_KEY: ${{ secrets.DRIVERS_TESTS_ATHENA_CUBEJS_AWS_KEY }}
 
@@ -335,7 +336,7 @@ jobs:
         node-version: [20.x]
         db: [
             'clickhouse', 'druid', 'elasticsearch', 'mssql', 'mysql', 'postgres', 'prestodb',
-            'mysql-aurora-serverless', 'crate', 'mongobi', 'firebolt'
+            'mysql-aurora-serverless', 'crate', 'mongobi', 'firebolt', 'dremio'
         ]
       fail-fast: false
 
@@ -397,6 +398,10 @@ jobs:
           DRIVERS_TESTS_FIREBOLT_CUBEJS_FIREBOLT_ACCOUNT: ${{ secrets.DRIVERS_TESTS_FIREBOLT_CUBEJS_FIREBOLT_ACCOUNT }}
           DRIVERS_TESTS_FIREBOLT_CUBEJS_DB_USER: ${{ secrets.DRIVERS_TESTS_FIREBOLT_CUBEJS_DB_USER }}
           DRIVERS_TESTS_FIREBOLT_CUBEJS_DB_PASS: ${{ secrets.DRIVERS_TESTS_FIREBOLT_CUBEJS_DB_PASS }}
+          # Dremio Integration
+          DRIVERS_TESTS_DREMIO_CUBEJS_DB_URL: ${{ secrets.DRIVERS_TESTS_DREMIO_CUBEJS_DB_URL }}
+          DRIVERS_TESTS_DREMIO_CUBEJS_DB_NAME: ${{ secrets.DRIVERS_TESTS_DREMIO_CUBEJS_DB_NAME }}
+          DRIVERS_TESTS_DREMIO_CUBEJS_DB_DREMIO_AUTH_TOKEN: ${{ secrets.DRIVERS_TESTS_DREMIO_CUBEJS_DB_DREMIO_AUTH_TOKEN }}
 
   integration-smoke:
     needs: [ latest-tag-sha, build-cubestore ]
@@ -407,6 +412,7 @@ jobs:
     strategy:
       matrix:
         node-version: [ 20.x ]
+        python-version: [ 3.11 ]
       fail-fast: false
 
     steps:
@@ -432,6 +438,10 @@ jobs:
         uses: actions/setup-node@v4
         with:
           node-version: ${{ matrix.node-version }}
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
       - name: Get yarn cache directory path
         id: yarn-cache-dir-path
         run: echo "dir=$(yarn cache dir)" >> "$GITHUB_OUTPUT"
@@ -459,6 +469,11 @@ jobs:
         uses: GoodManWEN/oracle-client-action@main
       - name: Build client
         run: yarn build
+      - name: Build cubejs-backend-native (with Python)
+        run: yarn run native:build-release-python
+        working-directory: ./packages/cubejs-backend-native
+        env:
+          PYO3_PYTHON: python${{ matrix.python-version }}
       - name: Lerna tsc
         run: yarn tsc
       - name: Download cubestored-x86_64-unknown-linux-gnu-release artifact
 
@@ -3,6 +3,39 @@
 All notable changes to this project will be documented in this file.
 See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
 
+## [1.1.11](https://github.com/cube-js/cube/compare/v1.1.10...v1.1.11) (2024-12-16)
+
+
+### Bug Fixes
+
+* TypeError: Cannot read properties of undefined (reading 'joins') ([14adaeb](https://github.com/cube-js/cube/commit/14adaebdd1c3d398bcd2997012da070999e47d9d))
+
+
+
+
+
+## [1.1.10](https://github.com/cube-js/cube/compare/v1.1.9...v1.1.10) (2024-12-16)
+
+
+### Bug Fixes
+
+* **api-gateway:** allow switch sql user when the new user is the same ([#9037](https://github.com/cube-js/cube/issues/9037)) ([a69c28f](https://github.com/cube-js/cube/commit/a69c28f524fa0625b825b98a38e7f5a211a98f74))
+* **api-gateway:** make sure DAP works sql pushdown ([#9021](https://github.com/cube-js/cube/issues/9021)) ([23695b2](https://github.com/cube-js/cube/commit/23695b2b5e886b5b7daf8b3f74003bb04e5b2e0b))
+* **cubestore:** Allow create an index from expressions ([#9006](https://github.com/cube-js/cube/issues/9006)) ([222cab8](https://github.com/cube-js/cube/commit/222cab897c289bfc929f217483e4905204bac12f))
+* **schema-compiler:** fix DAP with query_rewrite and python config ([#9033](https://github.com/cube-js/cube/issues/9033)) ([849790f](https://github.com/cube-js/cube/commit/849790f965dd0d9fddba11e3d8d124b84397ca9b))
+* **schema-compiler:** join relationship aliases ([ad4e8e3](https://github.com/cube-js/cube/commit/ad4e8e3872307ab77e035709e5208b0191f87f5b))
+
+
+### Features
+
+* **cubesql:** Basic VALUES support in rewrite engine ([#9041](https://github.com/cube-js/cube/issues/9041)) ([368671f](https://github.com/cube-js/cube/commit/368671fd1b53b2ed5ad8df6af113492982f23c0c))
+* **dremio-driver:** Add Dremio Cloud Support ([#8956](https://github.com/cube-js/cube/issues/8956)) ([d2c2fcd](https://github.com/cube-js/cube/commit/d2c2fcdaf8944ea7dd27e73b63c0b151c317022e))
+* **tesseract:** Support multiple join paths within single query ([#9047](https://github.com/cube-js/cube/issues/9047)) ([b62446e](https://github.com/cube-js/cube/commit/b62446e3c3893068f8dd8aa32d7204ea06a16f98))
+
+
+
+
+
 ## [1.1.9](https://github.com/cube-js/cube/compare/v1.1.8...v1.1.9) (2024-12-08)
 
 
 
@@ -181,6 +181,44 @@ to give the AI context on possible values in a categorical dimension:
               - completed
 ```
 
+### Value search
+
+By default, the AI API has no ability to see the contents of your data (for privacy reasons).
+However, this makes it difficult for the AI API to generate correct filters for some queries.
+
+Imagine you have a categorical `order_status` dimension with the possible values "shipped",
+"processing", and "completed". Without value search, asking "how many complete orders did
+we have today" might get you a query filtering on `order_status = 'Complete'` instead of
+the correct `order_status = 'completed'`.
+
+To solve this, the AI API can perform "value searches" where it introspects the values in
+selected categorical dimensions before running a query. Value search is opt-in and dimensions
+must be enabled for it individually. Currently, the AI API performs value search by running
+Cube queries using the `contains` filter operator against one or more chosen dimensions.
+The LLM will select dimensions from among those you have based on the question asked and
+generate possible values dynamically.
+
+<InfoBox>
+  When running value search queries, the AI API passes through the security context used
+  for the AI API request, so security is maintained and only dimensions the end user has
+  access to are able to be searched.
+</InfoBox>
+
+To enable value search on a dimension, set the `searchable` field to true under the `ai`
+meta tag, as shown below:
+```yaml
+    - name: order_status
+      sql: order_status
+      type: string
+      meta:
+        ai:
+          searchable: true
+```
+
+Note that enabling Value Search may lead to slightly longer AI API response times when it
+is used but should result in significantly more accurate queries in many situations. Value
+Search can only be used on string dimensions.
+
 ### Other LLM providers
 
 <InfoBox>
 
@@ -220,7 +220,7 @@ def masked(sql, security_context):
   if is_trusted_team:
     return sql
   else:
-    return "\"'--- masked ---'\""
+    return "'--- masked ---'"
 ```
 
 
 
@@ -257,21 +257,26 @@ versions.
 
 Any query that is fulfilled by Cube will use one of the following cache types:
 
-- **[Pre-aggregations](#pre-aggregations) in Cube Store.** This is the most
-advantageous and performant option.
+- **[Pre-aggregations](#pre-aggregations) in Cube Store.** This cache type 
+indicates that the query utilized existing pre-aggregations in Cube Store, 
+so it did not need to go to the database for processing.
 - **Pre-aggregations in Cube Store with a suboptimal query plan.** This cache
-type indicates that queries still benefit from pre-aggregations in Cube Store
-but it's possible to get a performance boost by [using indexes][ref-indexes].
+type indicates that the query ultilized pre-aggregations in Cube Store,
+but that it's possible to get a performance boost by [using indexes][ref-indexes].
 - **Pre-aggregations in the data source.** This cache type indicates that
-queries don't benefit from pre-aggregations in Cube Store and it's possible
-to get a massive performance boost by using Cube Store as [pre-aggregation
+the query utilized pre-aggregations from in the upstream data source. 
+These queries could gain a performance boost by using Cube Store as [pre-aggregation
 storage][ref-storage].
-- **[In-memory cache.](#in-memory-cache)** This cache type indicates that
-queries don't benefit from pre-aggregations at all. Queries directly hit the
-upstream data source and in-memory cache is used to speed up the execution of
-identical queries that arrive within a short period of time.
-- **No cache.** This cache type indicates queries that directly hit the
-upstream data source and have the worst performance possible.
+- **[In-memory cache.](#in-memory-cache)** This cache type indicates that the 
+results were retrieved from Cube's in-memory cache. All query results 
+are stored in Cube's in-memory cache, and if the same query is 
+run within a certain time frame, the results will be retrieved from in-memory 
+cache instead of being processed on the database or in Cube Store. This is the 
+fastest query retrieval method, but it requires that the exact same query was 
+run very recently.
+- **No cache.** This cache type indicates that the query was processed in the upstream 
+data source and was not accelrated using pre-aggregations. These queries could have 
+a significant performance boost if pre-aggregations and Cube Store were utilized.
 
 In [Query History][ref-query-history] and throughout Cube Cloud, colored bolt
 icons are used to indicate the cache type. Also, [Performance
 
@@ -97,6 +97,11 @@ to give the AI context on possible values in a categorical dimension:
               - completed
 ```
 
+### Value search
+
+Value Search can be enabled for AI Assistant in the same way as for the AI API. See the 
+[AI API's documentation][ref-ai-api-value-search] for details and instructions.
+
 ### Other LLM providers
 
 See the [AI API's documentation][ref-ai-api-providers] for information on how to "bring your own" LLM. 
@@ -127,3 +132,4 @@ See the [AI API's documentation][ref-ai-api-providers] for information on how to
 [ref-playground]: /product/workspace/playground
 [ref-catalog-downstream]: /product/workspace/semantic-catalog#connecting-downstream-tools
 [ref-ai-api-providers]: /product/apis-integrations/ai-api#other-llm-providers
+[ref-ai-api-value-search]: /product/apis-integrations/ai-api#value-search
@@ -572,8 +572,8 @@ The timeout value for any queries made to the database by Cube.
 <InfoBox>
 
 There's a hard limit of 20 minutes for queries that ingest data into Cube Store
-when pre-aggregations are built. If you bump into this limit, consider using an
-export bucket and splitting pre-aggregations into partitions.
+when pre-aggregations are built. If you bump into this limit, consider using
+an export bucket and splitting pre-aggregations into partitions.
 
 </InfoBox>
 
 
@@ -404,6 +404,119 @@ cubes:
 
 </CodeTabs>
 
+## Chasm and fan traps
+
+Cube automatically detects chasm and fan traps based on the `many_to_one` and `one_to_many` relationships defined in join.
+When detected, Cube generates a deduplication query that evaluates all distinct primary keys within the multiplied measure's cube and then joins distinct primary keys to this cube on itself to calculate the aggregation result.
+If there's more than one multiplied measure in a query, then such query is generated for every such multiplied measure, and results are joined.
+Cube solves for chasm and fan traps during query time.
+If there's pre-aggregregation that fits measure multiplication requirements it'd be leveraged to serve such a query.
+Such pre-aggregations and queries are always considered non-additive for the purpose of pre-aggregation matching.
+
+Let's consider an example data model:
+
+<CodeTabs>
+
+```javascript
+cube(`orders`, {
+  sql_table: `orders`
+
+  dimensions: {
+    id: {
+      sql: `id`,
+      type: `number`,
+      primary_key: true
+    },
+    city: {
+      sql: `city`,
+      type: `string`
+    }
+  },
+
+  joins: {
+    customers: {
+      relationship: `many_to_one`,
+      sql: `${CUBE}.customer_id = ${customers.id}`,
+    },
+  },
+});
+
+cube(`customers`, {
+  sql_table: `customers`
+
+  measures: {
+    count: {
+      type: `count`,
+    }
+  },
+
+  dimensions: {
+    id: {
+      sql: `id`,
+      type: `number`,
+      primary_key: true
+    }
+  }
+});
+```
+
+```yaml
+cubes:
+  - name: orders
+    sql_table: orders
+
+    dimensions:
+      - name: id
+        sql: id
+        type: number
+        primary_key: true
+      - name: city
+        sql: city
+        type: string
+
+    joins:
+      - name: customers
+        relationship: many_to_one
+        sql: "{orders}.customer_id = {customers.id}"
+
+- name: customers
+    sql_table: customers
+
+    dimensions:
+      - name: id
+        sql: id
+        type: number
+        primary_key: true
+
+    measures:
+      - name: average_age
+        sql: age
+        type: avg
+
+```
+
+</CodeTabs>
+
+If we try to query `customers.average_age` by `orders.city`, the Cube detects that the `average_age` measure in the `customers` cube would be multiplied by `orders` to `customers` and would generate SQL similar to:
+
+```sql
+SELECT
+  "keys"."orders__city",
+  avg("customers_key__customers".age) "customers__average_age"
+FROM
+  (
+    SELECT
+      DISTINCT "customers_key__orders".city "orders__city",
+      "customers_key__customers".id "customers__id"
+    FROM
+      orders AS "customers_key__orders"
+      LEFT JOIN customers AS "customers_key__customers" ON "customers_key__orders".customer_id = "customers_key__customers".id
+  ) AS "keys"
+  LEFT JOIN customers AS "customers_key__customers" ON "keys"."customers__id" = "customers_key__customers".id
+GROUP BY
+  1
+```
+
 ## CUBE reference
 
 When you have several joined cubes, you should accurately use columns’ names to
 
@@ -921,7 +921,7 @@ cubes:
 
 </CodeTabs>
 
-For possible `every` parameter values please refer to
+To have a pre-aggregation rebuild at a specific time of day, you can use a CRON string with some limitations. For more details about values that can be used with the `every` parameter, please refer to the
 [`refreshKey`][ref-cube-refreshkey] documentation.
 
 You can also use `every` with `sql`: