Skip to content

Commit da231ed

Browse files
authored
feat(databricks-jdbc-driver): Support HLL (#8257)
1 parent 66aa01d commit da231ed

File tree

24 files changed

+4939
-2084
lines changed

24 files changed

+4939
-2084
lines changed

packages/cubejs-databricks-jdbc-driver/src/DatabricksQuery.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@ export class DatabricksQuery extends BaseQuery {
2525
return new DatabricksFilter(this, filter);
2626
}
2727

28+
public hllInit(sql: string) {
29+
return `hll_sketch_agg(${sql})`;
30+
}
31+
32+
public hllMerge(sql: string) {
33+
return `hll_union_agg(${sql})`;
34+
}
35+
36+
public countDistinctApprox(sql: string) {
37+
return `approx_count_distinct(${sql})`;
38+
}
39+
2840
public convertTz(field: string) {
2941
return `from_utc_timestamp(${field}, '${this.timezone}')`;
3042
}

packages/cubejs-jdbc-driver/src/JDBCDriver.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ export class JDBCDriver extends BaseDriver {
294294
reject(err);
295295
return;
296296
}
297+
297298
const rowStream = new QueryStream(res.rows.next, highWaterMark);
298299
resolve({
299300
rowStream,
@@ -322,6 +323,7 @@ export class JDBCDriver extends BaseDriver {
322323
if (options.streamImport) {
323324
return this.stream(query, values, options);
324325
}
326+
325327
return super.downloadQueryResults(query, values, options);
326328
}
327329

packages/cubejs-jdbc-driver/src/QueryStream.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
/* eslint-disable import/no-extraneous-dependencies */
21
import { Readable } from 'stream';
3-
import { getEnv } from '@cubejs-backend/shared';
42

53
export type Row = {
64
[field: string]: boolean | number | string
@@ -25,6 +23,18 @@ export class QueryStream extends Readable {
2523
this.next = nextFn;
2624
}
2725

26+
protected transformRow(row: any) {
27+
// eslint-disable-next-line no-restricted-syntax
28+
for (const [name, field] of Object.entries(row)) {
29+
// console.log({ name, field });
30+
if (field instanceof Int8Array) {
31+
row[name] = Buffer.from(field).toString('base64');
32+
}
33+
}
34+
35+
return row;
36+
}
37+
2838
/**
2939
* @override
3040
*/
@@ -34,7 +44,7 @@ export class QueryStream extends Readable {
3444
if (this.next) {
3545
const row = this.next();
3646
if (row.value) {
37-
this.push(row.value);
47+
this.push(this.transformRow(row.value));
3848
}
3949
if (row.done) {
4050
this.push(null);

packages/cubejs-testing-drivers/fixtures/_schemas.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@
4848
"type": "count",
4949
"sql": "customer_id"
5050
},
51+
{
52+
"name": "countApproxByCustomer",
53+
"type": "count_distinct_approx",
54+
"sql": "customer_id"
55+
},
5156
{
5257
"name": "runningTotal",
5358
"type": "count",
@@ -127,6 +132,11 @@
127132
"type": "count",
128133
"sql": "customer_id"
129134
},
135+
{
136+
"name": "countApproxByCustomer",
137+
"type": "count_distinct_approx",
138+
"sql": "customer_id"
139+
},
130140
{
131141
"name": "totalQuantity",
132142
"sql": "quantity",
@@ -244,6 +254,11 @@
244254
"type": "count",
245255
"sql": "customer_id"
246256
},
257+
{
258+
"name": "countApproxByCustomer",
259+
"type": "count_distinct_approx",
260+
"sql": "customer_id"
261+
},
247262
{
248263
"name": "totalQuantity",
249264
"sql": "quantity",

packages/cubejs-testing-drivers/fixtures/athena.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@
6262
"CUBE.totalSales",
6363
"CUBE.totalProfit"
6464
]
65+
},
66+
{
67+
"name": "CountByProduct",
68+
"time_dimension": "CUBE.orderDate",
69+
"granularity": "month",
70+
"partition_granularity": "month",
71+
"dimensions": ["CUBE.productName"],
72+
"measures": [
73+
"CUBE.countApproxByCustomer"
74+
]
6575
}
6676
],
6777
"BigECommerce": [
@@ -77,6 +87,16 @@
7787
"CUBE.totalSales",
7888
"CUBE.totalProfit"
7989
]
90+
},
91+
{
92+
"name": "CountByProduct",
93+
"time_dimension": "CUBE.orderDate",
94+
"granularity": "month",
95+
"partition_granularity": "month",
96+
"dimensions": ["CUBE.productName"],
97+
"measures": [
98+
"CUBE.countApproxByCustomer"
99+
]
80100
}
81101
]
82102
},

packages/cubejs-testing-drivers/fixtures/bigquery.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,16 @@
6363
"CUBE.totalSales",
6464
"CUBE.totalProfit"
6565
]
66+
},
67+
{
68+
"name": "CountByProduct",
69+
"time_dimension": "CUBE.orderDate",
70+
"granularity": "month",
71+
"partition_granularity": "month",
72+
"dimensions": ["CUBE.productName"],
73+
"measures": [
74+
"CUBE.countApproxByCustomer"
75+
]
6676
}
6777
],
6878
"BigECommerce": [
@@ -78,6 +88,16 @@
7888
"CUBE.totalSales",
7989
"CUBE.totalProfit"
8090
]
91+
},
92+
{
93+
"name": "CountByProduct",
94+
"time_dimension": "CUBE.orderDate",
95+
"granularity": "month",
96+
"partition_granularity": "month",
97+
"dimensions": ["CUBE.productName"],
98+
"measures": [
99+
"CUBE.countApproxByCustomer"
100+
]
81101
}
82102
]
83103
},

packages/cubejs-testing-drivers/fixtures/databricks-jdbc.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,16 @@
7373
"CUBE.totalSales",
7474
"CUBE.totalProfit"
7575
]
76+
},
77+
{
78+
"name": "CountByProduct",
79+
"time_dimension": "CUBE.orderDate",
80+
"granularity": "month",
81+
"partition_granularity": "month",
82+
"dimensions": ["CUBE.productName"],
83+
"measures": [
84+
"CUBE.countApproxByCustomer"
85+
]
7686
}
7787
],
7888
"BigECommerce": [
@@ -88,6 +98,16 @@
8898
"CUBE.totalSales",
8999
"CUBE.totalProfit"
90100
]
101+
},
102+
{
103+
"name": "CountByProduct",
104+
"time_dimension": "CUBE.orderDate",
105+
"granularity": "month",
106+
"partition_granularity": "month",
107+
"dimensions": ["CUBE.productName"],
108+
"measures": [
109+
"CUBE.countApproxByCustomer"
110+
]
91111
}
92112
]
93113
},

packages/cubejs-testing-drivers/fixtures/postgres.json

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
"ports" : ["4000", "5656"]
2020
},
2121
"data": {
22-
"image": "postgres:13",
22+
"image": "hbontempo/postgres-hll:16-v2.18",
2323
"environment": [
2424
"POSTGRES_PASSWORD=test",
2525
"POSTGRES_USER=test",
@@ -74,6 +74,16 @@
7474
"CUBE.totalSales",
7575
"CUBE.totalProfit"
7676
]
77+
},
78+
{
79+
"name": "CountByProduct",
80+
"time_dimension": "CUBE.orderDate",
81+
"granularity": "month",
82+
"partition_granularity": "month",
83+
"dimensions": ["CUBE.productName"],
84+
"measures": [
85+
"CUBE.countApproxByCustomer"
86+
]
7787
}
7888
],
7989
"BigECommerce": [
@@ -89,6 +99,16 @@
8999
"CUBE.totalSales",
90100
"CUBE.totalProfit"
91101
]
102+
},
103+
{
104+
"name": "CountByProduct",
105+
"time_dimension": "CUBE.orderDate",
106+
"granularity": "month",
107+
"partition_granularity": "month",
108+
"dimensions": ["CUBE.productName"],
109+
"measures": [
110+
"CUBE.countApproxByCustomer"
111+
]
92112
}
93113
]
94114
},

packages/cubejs-testing-drivers/fixtures/snowflake.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@
7474
"CUBE.totalSales",
7575
"CUBE.totalProfit"
7676
]
77+
},
78+
{
79+
"name": "CountByProduct",
80+
"time_dimension": "CUBE.orderDate",
81+
"granularity": "month",
82+
"partition_granularity": "month",
83+
"dimensions": ["CUBE.productName"],
84+
"measures": [
85+
"CUBE.countApproxByCustomer"
86+
]
7787
}
7888
],
7989
"BigECommerce": [
@@ -89,6 +99,16 @@
8999
"CUBE.totalSales",
90100
"CUBE.totalProfit"
91101
]
102+
},
103+
{
104+
"name": "CountByProduct",
105+
"time_dimension": "CUBE.orderDate",
106+
"granularity": "month",
107+
"partition_granularity": "month",
108+
"dimensions": ["CUBE.productName"],
109+
"measures": [
110+
"CUBE.countApproxByCustomer"
111+
]
92112
}
93113
]
94114
},

packages/cubejs-testing-drivers/src/dataset.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,9 @@ export const BigECommerce = {
208208
select 7293 as row_id, 'CA-2017-109183' as order_id, ${DATE_PREFIX}'2020-12-04'${DATE_SUFFIX} as order_date, 'LR-16915' as customer_id, 'Columbus' as city, 'Technology' as category, 'Machines' as sub_category, 'Okidata C610n Printer' as product_name, 649.00000 as sales, 2 as quantity, 0.50000 as discount, -272.58000 as profit, ${falseLiteral} as is_returning
209209
`;
210210
if (!GENERATE_BIG_SERIES) {
211-
return `SELECT row_id as id, row_id, order_id, order_date, city, category, sub_category, product_name, sales, quantity, discount, profit, is_returning from (${data}) d`;
211+
return `SELECT row_id as id, row_id, order_id, order_date, city, category, sub_category, product_name, customer_id, sales, quantity, discount, profit, is_returning from (${data}) d`;
212212
}
213-
return `select value * 10000 + row_id as id, row_id, order_id, order_date, city, category, sub_category, product_name, sales, quantity, discount, profit, is_returning from ${GENERATE_BIG_SERIES} CROSS JOIN (${data}) d`;
213+
return `select value * 10000 + row_id as id, row_id, order_id, order_date, city, category, sub_category, product_name, customer_id, sales, quantity, discount, profit, is_returning from ${GENERATE_BIG_SERIES} CROSS JOIN (${data}) d`;
214214
},
215215
create: (cast: Cast, name: string, suf?: string) => create(name, BigECommerce.select(cast), cast, suf),
216216
};

0 commit comments

Comments
 (0)