diff --git a/packages/cubejs-duckdb-driver/src/DuckDBQuery.ts b/packages/cubejs-duckdb-driver/src/DuckDBQuery.ts index 96cd50a725128..6561568aa50e7 100644 --- a/packages/cubejs-duckdb-driver/src/DuckDBQuery.ts +++ b/packages/cubejs-duckdb-driver/src/DuckDBQuery.ts @@ -12,6 +12,16 @@ const GRANULARITY_TO_INTERVAL: Record string> = { }; class DuckDBFilter extends BaseFilter { + public castParameter() { + const numberTypes = ['number', 'count', 'count_distinct', 'count_distinct_approx', 'sum', 'avg', 'min', 'max']; + const definition = this.definition(); + + if (numberTypes.includes(definition.type)) { + return 'CAST(? AS DOUBLE)'; + } + + return '?'; + } } export class DuckDBQuery extends BaseQuery { @@ -55,4 +65,12 @@ export class DuckDBQuery extends BaseQuery { templates.functions.GREATEST = 'GREATEST({{ args_concat }})'; return templates; } + + public timeStampParam(timeDimension: any) { + if (timeDimension.measure) { + // For time measures, we don't need to check dateFieldType + return super.timeStampCast('?'); + } + return super.timeStampParam(timeDimension); + } } diff --git a/packages/cubejs-testing/birdbox-fixtures/duckdb/schema/Orders.js b/packages/cubejs-testing/birdbox-fixtures/duckdb/schema/Orders.js index 54bd827a1806c..85d185a069e4a 100644 --- a/packages/cubejs-testing/birdbox-fixtures/duckdb/schema/Orders.js +++ b/packages/cubejs-testing/birdbox-fixtures/duckdb/schema/Orders.js @@ -1,15 +1,15 @@ cube(`Orders`, { sql: ` - select id, amount, status from ( - select 1 as id, 100 as amount, 'new' status + select id, amount, status, created_at, is_paid from ( + select 1 as id, 100 as amount, 'new' status, TIMESTAMPTZ '2020-01-01 00:00:00' created_at, TRUE as is_paid UNION ALL - select 2 as id, 200 as amount, 'new' status + select 2 as id, 200 as amount, 'new' status, TIMESTAMPTZ '2020-01-02 00:00:00' created_at, TRUE as is_paid UNION ALL - select 3 as id, 300 as amount, 'processed' status + select 3 as id, 300 as amount, 'processed' status, TIMESTAMPTZ '2020-01-03 00:00:00' created_at, TRUE as is_paid UNION ALL - select 4 as id, 500 as amount, 'processed' status + select 4 as id, 500 as amount, 'processed' status, TIMESTAMPTZ '2020-01-04 00:00:00' created_at, FALSE as is_paid UNION ALL - select 5 as id, 600 as amount, 'shipped' status + select 5 as id, 600 as amount, 'shipped' status, TIMESTAMPTZ '2020-01-05 00:00:00' created_at, FALSE as is_paid ) `, measures: { @@ -20,6 +20,26 @@ cube(`Orders`, { sql: `amount`, type: `sum`, }, + avgAmount: { + sql: `amount`, + type: `avg`, + }, + statusList: { + sql: `status`, + type: `string`, + }, + lastStatus: { + sql: `MAX(status)`, + type: `string`, + }, + hasUnpaidOrders: { + sql: `BOOL_OR(NOT is_paid)`, + type: `boolean`, + }, + maxCreatedAt: { + sql: `MAX(created_at)`, + type: `time`, + }, toRemove: { type: `count`, }, @@ -29,5 +49,17 @@ cube(`Orders`, { sql: `status`, type: `string`, }, + amount: { + sql: `amount`, + type: `number`, + }, + isPaid: { + sql: `is_paid`, + type: `boolean`, + }, + createdAt: { + sql: `created_at`, + type: `time`, + }, }, }); diff --git a/packages/cubejs-testing/test/smoke-duckdb.test.ts b/packages/cubejs-testing/test/smoke-duckdb.test.ts index b487e31a6168e..7da0e57b4a3b8 100644 --- a/packages/cubejs-testing/test/smoke-duckdb.test.ts +++ b/packages/cubejs-testing/test/smoke-duckdb.test.ts @@ -1,6 +1,6 @@ import cubejs, { CubeApi } from '@cubejs-client/core'; // eslint-disable-next-line import/no-extraneous-dependencies -import { afterAll, beforeAll, jest } from '@jest/globals'; +import { afterAll, beforeAll, expect, jest, test } from '@jest/globals'; import { BirdBox, getBirdbox } from '../src'; import { DEFAULT_API_TOKEN, @@ -37,4 +37,529 @@ describe('duckdb', () => { }, JEST_AFTER_ALL_DEFAULT_TIMEOUT); test('query measure', () => testQueryMeasure(client)); + + test('numeric measure filter - uses CAST', async () => { + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.totalAmount', + operator: 'gte', + values: ['300'] + } + ] + }); + + // The HAVING clause applies after counting all records + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('numeric dimension filter - uses CAST', async () => { + // This test verifies that the WHERE clause for numeric dimensions uses CAST(? AS DOUBLE) + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.amount', + operator: 'gte', + values: ['300'] + } + ] + }); + + // Only 3 orders have amount >= 300 + expect(response.rawData()[0]['Orders.count']).toBe('3'); + }); + + test('string filter - does NOT use CAST', async () => { + // This test verifies that the WHERE clause for string dimensions does not use CAST + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.status', + operator: 'equals', + values: ['processed'] + } + ] + }); + + // There are 2 'processed' orders + expect(response.rawData()[0]['Orders.count']).toBe('2'); + }); + + test('numeric exact equality filter - also uses CAST', async () => { + // This test verifies that even for equality comparisons on numeric values, CAST is used + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.amount', + operator: 'equals', + values: ['300'] + } + ] + }); + + // Only 1 order has amount exactly 300 + expect(response.rawData()[0]['Orders.count']).toBe('1'); + }); + + test('numeric string comparison - values are properly handled with CAST', async () => { + // This test verifies that numeric strings are properly cast as numbers + // This is important because '100' and 100 are different in string comparisons + // but should be treated the same with numeric CAST + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.amount', + operator: 'equals', + values: ['100'] // string '100' should be cast to number 100 + } + ] + }); + + // Only 1 order has amount 100 + expect(response.rawData()[0]['Orders.count']).toBe('1'); + }); + + test('multiple string values in filter - none use CAST', async () => { + // This test verifies that IN (...) clauses for string dimensions don't use CAST + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.status', + operator: 'equals', + values: ['new', 'processed'] + } + ] + }); + + // There are 4 orders with status 'new' or 'processed' + expect(response.rawData()[0]['Orders.count']).toBe('4'); + }); + + test('string measure filter - should NOT use CAST', async () => { + // This test verifies that filters on string measures don't use CAST + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.lastStatus', + operator: 'equals', + values: ['shipped'] + } + ] + }); + + // Output should match the shipped orders + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('boolean measure filter - should NOT use CAST', async () => { + // This test verifies that filters on boolean measures don't use CAST + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.hasUnpaidOrders', + operator: 'equals', + values: ['true'] + } + ] + }); + + // Output should match all orders since there are unpaid orders + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('time measure filter - afterDate operator', async () => { + // This test verifies that filters on time measures don't use CAST + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.maxCreatedAt', + operator: 'afterDate', + values: ['2020-01-03'] + } + ] + }); + + // Orders after 2020-01-03 + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('time measure filter - beforeDate operator', async () => { + // This test verifies that beforeDate operator works correctly with time measures + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.maxCreatedAt', + operator: 'beforeDate', + values: ['2020-01-06'] + } + ] + }); + // All orders should be counted since max date (2020-01-05) is before 2020-01-06 + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('time measure filter - beforeOrOnDate operator', async () => { + // This test verifies that beforeOrOnDate operator works correctly with time measures + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.maxCreatedAt', + operator: 'beforeOrOnDate', + values: ['2020-01-05'] + } + ] + }); + // All orders should be counted since max date (2020-01-05) is equal to 2020-01-05 + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('time measure filter - afterOrOnDate operator', async () => { + // This test verifies that afterOrOnDate operator works correctly with time measures + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.maxCreatedAt', + operator: 'afterOrOnDate', + values: ['2020-01-05'] + } + ] + }); + // All orders should be counted since max date (2020-01-05) is equal to 2020-01-05 + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('time measure filter - inDateRange operator', async () => { + // This test verifies that inDateRange operator works correctly with time measures + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.maxCreatedAt', + operator: 'inDateRange', + values: ['2020-01-04', '2020-01-06'] + } + ] + }); + // All orders should be counted since max date (2020-01-05) is within the range + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('time measure filter - notInDateRange operator', async () => { + // This test verifies that notInDateRange operator works correctly with time measures + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.maxCreatedAt', + operator: 'notInDateRange', + values: ['2020-01-06', '2020-01-07'] + } + ] + }); + // All orders should be counted since max date (2020-01-05) is outside the range + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('time measure filter - equals operator', async () => { + // This test verifies that equals operator works correctly with time measures + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.maxCreatedAt', + operator: 'equals', + values: ['2020-01-05'] + } + ] + }); + // All orders should be counted since max date equals 2020-01-05 + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('time measure filter - notEquals operator', async () => { + // This test verifies that notEquals operator works correctly with time measures + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.maxCreatedAt', + operator: 'notEquals', + values: ['2020-01-06'] + } + ] + }); + // All orders should be counted since max date (2020-01-05) is not equal to 2020-01-06 + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('boolean dimension filter - should NOT use CAST', async () => { + // This test verifies that filters on boolean dimensions don't use CAST + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.isPaid', + operator: 'equals', + values: ['false'] + } + ] + }); + + // There are 2 unpaid orders + expect(response.rawData()[0]['Orders.count']).toBe('2'); + }); + + test('time dimension filter - should NOT use CAST', async () => { + // This test verifies that filters on time types don't use CAST + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.createdAt', + operator: 'afterDate', + values: ['2020-01-03'] + } + ] + }); + + // There are 2 orders after 2020-01-03 + expect(response.rawData()[0]['Orders.count']).toBe('2'); + }); + + test('time dimension filter - beforeDate operator', async () => { + // This test verifies that the beforeDate operator works correctly + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.createdAt', + operator: 'beforeDate', + values: ['2020-01-03'] + } + ] + }); + + // There are 2 orders before 2020-01-03 + expect(response.rawData()[0]['Orders.count']).toBe('2'); + }); + + test('time dimension filter - beforeOrOnDate operator', async () => { + // This test verifies that the beforeOrOnDate operator works correctly + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.createdAt', + operator: 'beforeOrOnDate', + values: ['2020-01-03'] + } + ] + }); + + // There are 3 orders before or on 2020-01-03 + expect(response.rawData()[0]['Orders.count']).toBe('3'); + }); + + test('time dimension filter - afterOrOnDate operator', async () => { + // This test verifies that the afterOrOnDate operator works correctly + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.createdAt', + operator: 'afterOrOnDate', + values: ['2020-01-03'] + } + ] + }); + + // There are 3 orders after or on 2020-01-03 + expect(response.rawData()[0]['Orders.count']).toBe('3'); + }); + + test('time dimension filter - inDateRange operator', async () => { + // This test verifies that the inDateRange operator works correctly + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.createdAt', + operator: 'inDateRange', + values: ['2020-01-02', '2020-01-04'] + } + ] + }); + + // There are 3 orders between 2020-01-02 and 2020-01-04 (inclusive) + expect(response.rawData()[0]['Orders.count']).toBe('3'); + }); + + test('time dimension filter - notInDateRange operator', async () => { + // This test verifies that the notInDateRange operator works correctly + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.createdAt', + operator: 'notInDateRange', + values: ['2020-01-02', '2020-01-04'] + } + ] + }); + + // There are 2 orders outside the range 2020-01-02 to 2020-01-04 + expect(response.rawData()[0]['Orders.count']).toBe('2'); + }); + + test('time dimension filter - set operator', async () => { + // This test verifies that the set operator works correctly + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.createdAt', + operator: 'set' + } + ] + }); + + // All 5 orders have createdAt set + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + test('time measure filter - set operator', async () => { + // This test verifies that the set operator works correctly with measures + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.maxCreatedAt', + operator: 'set' + } + ] + }); + + // All 5 orders have maxCreatedAt set + expect(response.rawData()[0]['Orders.count']).toBe('5'); + }); + + // We can't effectively test notSet with the current schema since all records have dates + // This test is more for API completeness + test('time dimension filter - notSet operator', async () => { + // This test verifies that the notSet operator works correctly + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.createdAt', + operator: 'notSet' + } + ] + }); + + // No orders have createdAt as null + expect(response.rawData()[0]['Orders.count']).toBe('0'); + }); + + test('time dimension filter - equals operator', async () => { + // This test verifies that equals operator works correctly with time dimensions + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.createdAt', + operator: 'equals', + values: ['2020-01-03'] + } + ] + }); + + // There is 1 order on exactly 2020-01-03 + expect(response.rawData()[0]['Orders.count']).toBe('1'); + }); + + test('time dimension filter - notEquals operator', async () => { + // This test verifies that notEquals operator works correctly with time dimensions + const response = await client.load({ + measures: [ + 'Orders.count' + ], + filters: [ + { + member: 'Orders.createdAt', + operator: 'notEquals', + values: ['2020-01-03'] + } + ] + }); + + // There are 4 orders not on 2020-01-03 + expect(response.rawData()[0]['Orders.count']).toBe('4'); + }); });