Skip to content

Commit 5849a1d

Browse files
KSDaemonmarianore-muttdata
authored andcommitted
feat(databricks-jdbc-driver): Switch to the latest OSS Databricks JDBC driver (cube-js#9450)
* feat(databricks-jdbc-driver): Switch to the latest OSS Databricks JDBC driver * remove UID and PWD from URL * fix some notes after review * update driver version * add comment * Revert "update driver version" This reverts commit 5e9f7dc.
1 parent b399509 commit 5849a1d

File tree

8 files changed

+82
-113
lines changed

8 files changed

+82
-113
lines changed

packages/cubejs-backend-shared/src/env.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -960,10 +960,17 @@ const variables: Record<string, (...args: any) => any> = {
960960
/**
961961
* Accept Databricks policy flag. This environment variable doesn't
962962
* need to be split by the data source.
963+
* TODO: Tech-debt: Remove totally someday
963964
*/
964-
databrickAcceptPolicy: () => (
965-
get('CUBEJS_DB_DATABRICKS_ACCEPT_POLICY').asBoolStrict()
966-
),
965+
databrickAcceptPolicy: () => {
966+
const val = get('CUBEJS_DB_DATABRICKS_ACCEPT_POLICY').asBoolStrict();
967+
968+
if (val !== undefined) {
969+
console.warn(
970+
'The CUBEJS_DB_DATABRICKS_ACCEPT_POLICY is not needed anymore. Please, remove it'
971+
);
972+
}
973+
},
967974

968975
/**
969976
* Databricks jdbc-connection url.

packages/cubejs-backend-shared/src/http-utils.ts

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,11 @@ export async function streamWithProgress(
6666
type DownloadAndExtractFile = {
6767
showProgress: boolean;
6868
cwd: string;
69+
skipExtract?: boolean;
70+
dstFileName?: string;
6971
};
7072

71-
export async function downloadAndExtractFile(url: string, { cwd }: DownloadAndExtractFile) {
73+
export async function downloadAndExtractFile(url: string, { cwd, skipExtract, dstFileName }: DownloadAndExtractFile) {
7274
const request = new Request(url, {
7375
headers: new Headers({
7476
'Content-Type': 'application/octet-stream',
@@ -99,7 +101,18 @@ export async function downloadAndExtractFile(url: string, { cwd }: DownloadAndEx
99101
});
100102
});
101103

102-
await decompress(savedFilePath, cwd);
104+
if (skipExtract) {
105+
if (dstFileName) {
106+
fs.copyFileSync(savedFilePath, path.resolve(path.join(cwd, dstFileName)));
107+
} else {
108+
// We still need some name for a file
109+
const tmpFileName = path.basename(savedFilePath);
110+
const destPath = path.join(cwd, tmpFileName);
111+
fs.copyFileSync(savedFilePath, destPath);
112+
}
113+
} else {
114+
await decompress(savedFilePath, cwd);
115+
}
103116

104117
try {
105118
fs.unlinkSync(savedFilePath);

packages/cubejs-backend-shared/test/db_env_multi.test.ts

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,34 +1192,6 @@ describe('Multiple datasources', () => {
11921192
);
11931193
});
11941194

1195-
test('getEnv("databrickAcceptPolicy")', () => {
1196-
process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'true';
1197-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(true);
1198-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(true);
1199-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(true);
1200-
1201-
process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'false';
1202-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(false);
1203-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(false);
1204-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(false);
1205-
1206-
process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'wrong';
1207-
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toThrow(
1208-
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
1209-
);
1210-
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toThrow(
1211-
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
1212-
);
1213-
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toThrow(
1214-
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
1215-
);
1216-
1217-
delete process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY;
1218-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toBeUndefined();
1219-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toBeUndefined();
1220-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toBeUndefined();
1221-
});
1222-
12231195
test('getEnv("athenaAwsKey")', () => {
12241196
process.env.CUBEJS_AWS_KEY = 'default1';
12251197
process.env.CUBEJS_DS_POSTGRES_AWS_KEY = 'postgres1';

packages/cubejs-backend-shared/test/db_env_single.test.ts

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -760,34 +760,6 @@ describe('Single datasources', () => {
760760
expect(getEnv('databricksCatalog', { dataSource: 'wrong' })).toBeUndefined();
761761
});
762762

763-
test('getEnv("databrickAcceptPolicy")', () => {
764-
process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'true';
765-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(true);
766-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(true);
767-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(true);
768-
769-
process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'false';
770-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(false);
771-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(false);
772-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(false);
773-
774-
process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'wrong';
775-
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toThrow(
776-
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
777-
);
778-
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toThrow(
779-
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
780-
);
781-
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toThrow(
782-
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
783-
);
784-
785-
delete process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY;
786-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toBeUndefined();
787-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toBeUndefined();
788-
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toBeUndefined();
789-
});
790-
791763
test('getEnv("athenaAwsKey")', () => {
792764
process.env.CUBEJS_AWS_KEY = 'default1';
793765
expect(getEnv('athenaAwsKey', { dataSource: 'default' })).toEqual('default1');

packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ import {
1818
} from '@cubejs-backend/base-driver';
1919
import { JDBCDriver, JDBCDriverConfiguration, } from '@cubejs-backend/jdbc-driver';
2020
import { DatabricksQuery } from './DatabricksQuery';
21-
import { extractUidFromJdbcUrl, resolveJDBCDriver } from './helpers';
21+
import {
22+
extractAndRemoveUidPwdFromJdbcUrl,
23+
resolveJDBCDriver
24+
} from './helpers';
2225

2326
const SUPPORTED_BUCKET_TYPES = ['s3', 'gcs', 'azure'];
2427

@@ -133,7 +136,7 @@ export class DatabricksDriver extends JDBCDriver {
133136
/**
134137
* Show warning message flag.
135138
*/
136-
private showSparkProtocolWarn: boolean;
139+
private readonly showSparkProtocolWarn: boolean;
137140

138141
/**
139142
* Driver Configuration.
@@ -187,20 +190,20 @@ export class DatabricksDriver extends JDBCDriver {
187190
url = url.replace('jdbc:spark://', 'jdbc:databricks://');
188191
}
189192

193+
const [uid, pwd, cleanedUrl] = extractAndRemoveUidPwdFromJdbcUrl(url);
194+
190195
const config: DatabricksDriverConfiguration = {
191196
...conf,
192-
url,
197+
url: cleanedUrl,
193198
dbType: 'databricks',
194199
drivername: 'com.databricks.client.jdbc.Driver',
195200
customClassPath: undefined,
196201
properties: {
197-
UID: extractUidFromJdbcUrl(url),
198-
// PWD-parameter passed to the connection string has higher priority,
199-
// so we can set this one to an empty string to avoid a Java error.
202+
UID: uid,
200203
PWD:
201204
conf?.token ||
202205
getEnv('databrickToken', { dataSource }) ||
203-
'',
206+
pwd,
204207
UserAgentEntry: 'CubeDev_Cube',
205208
},
206209
catalog:
Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import fs from 'fs';
22
import path from 'path';
33

4-
import { downloadJDBCDriver } from './installer';
4+
import { downloadJDBCDriver, OSS_DRIVER_VERSION } from './installer';
55

66
async function fileExistsOr(
77
fsPath: string,
@@ -15,26 +15,39 @@ async function fileExistsOr(
1515

1616
export async function resolveJDBCDriver(): Promise<string> {
1717
return fileExistsOr(
18-
path.join(process.cwd(), 'DatabricksJDBC42.jar'),
18+
path.join(process.cwd(), `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`),
1919
async () => fileExistsOr(
20-
path.join(__dirname, '..', 'download', 'DatabricksJDBC42.jar'),
20+
path.join(__dirname, '..', 'download', `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`),
2121
async () => {
2222
const pathOrNull = await downloadJDBCDriver();
2323
if (pathOrNull) {
2424
return pathOrNull;
2525
}
2626
throw new Error(
27-
'Please download and place DatabricksJDBC42.jar inside your ' +
27+
`Please download and place databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar inside your ` +
2828
'project directory'
2929
);
3030
}
3131
)
3232
);
3333
}
3434

35-
export function extractUidFromJdbcUrl(jdbcUrl: string): string {
36-
const { pathname } = new URL(jdbcUrl);
37-
const [_, ...params] = pathname.split(';');
38-
const searchParams = new URLSearchParams(params.join('&'));
39-
return searchParams.get('UID') || 'token';
35+
/**
36+
* Extract if exist UID and PWD from URL and return UID, PWD and URL without these params.
37+
* New Databricks OSS driver throws an error if UID and PWD are provided in the URL and as a separate params
38+
* passed to the driver instance. That's why we strip them out from the URL if they exist there.
39+
* @param jdbcUrl
40+
*/
41+
export function extractAndRemoveUidPwdFromJdbcUrl(jdbcUrl: string): [uid: string, pwd: string, cleanedUrl: string] {
42+
const uidMatch = jdbcUrl.match(/UID=([^;]*)/i);
43+
const pwdMatch = jdbcUrl.match(/PWD=([^;]*)/i);
44+
45+
const uid = uidMatch?.[1] || 'token';
46+
const pwd = pwdMatch?.[1] || '';
47+
48+
const cleanedUrl = jdbcUrl
49+
.replace(/;?UID=[^;]*/i, '')
50+
.replace(/;?PWD=[^;]*/i, '');
51+
52+
return [uid, pwd, cleanedUrl];
4053
}
Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,32 @@
11
import path from 'path';
22
import { downloadAndExtractFile, getEnv } from '@cubejs-backend/shared';
33

4-
function acceptedByEnv() {
5-
const acceptStatus = getEnv('databrickAcceptPolicy');
6-
if (acceptStatus) {
7-
console.log('You accepted Terms & Conditions for JDBC driver from DataBricks by CUBEJS_DB_DATABRICKS_ACCEPT_POLICY');
8-
}
9-
10-
if (acceptStatus === false) {
11-
console.log('You declined Terms & Conditions for JDBC driver from DataBricks by CUBEJS_DB_DATABRICKS_ACCEPT_POLICY');
12-
console.log('Installation will be skipped');
13-
}
14-
15-
return acceptStatus;
16-
}
17-
4+
export const OSS_DRIVER_VERSION = '1.0.2';
5+
6+
/**
7+
* In the beginning of 2025 Databricks released their open-source version of JDBC driver and encourage
8+
* all users to migrate to it as company plans to focus on improving and evolving it over legacy simba driver.
9+
* More info about OSS Driver could be found at https://docs.databricks.com/aws/en/integrations/jdbc/oss
10+
* As of March 2025 To use the Databricks JDBC Driver (OSS), the following requirements must be met:
11+
* Java Runtime Environment (JRE) 11.0 or above. CI testing is supported on JRE 11, 17, and 21.
12+
*/
1813
export async function downloadJDBCDriver(): Promise<string | null> {
19-
const driverAccepted = acceptedByEnv();
20-
21-
if (driverAccepted) {
22-
console.log('Downloading DatabricksJDBC42-2.6.29.1051');
14+
// TODO: Just to throw a console warning that this ENV is obsolete and could be safely removed
15+
getEnv('databrickAcceptPolicy');
2316

24-
await downloadAndExtractFile(
25-
'https://databricks-bi-artifacts.s3.us-east-2.amazonaws.com/simbaspark-drivers/jdbc/2.6.29/DatabricksJDBC42-2.6.29.1051.zip',
26-
{
27-
showProgress: true,
28-
cwd: path.resolve(path.join(__dirname, '..', 'download')),
29-
}
30-
);
17+
console.log(`Downloading databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`);
3118

32-
console.log('Release notes: https://databricks-bi-artifacts.s3.us-east-2.amazonaws.com/simbaspark-drivers/jdbc/2.6.29/docs/release-notes.txt');
19+
await downloadAndExtractFile(
20+
`https://repo1.maven.org/maven2/com/databricks/databricks-jdbc/${OSS_DRIVER_VERSION}-oss/databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`,
21+
{
22+
showProgress: true,
23+
cwd: path.resolve(path.join(__dirname, '..', 'download')),
24+
skipExtract: true,
25+
dstFileName: `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`,
26+
}
27+
);
3328

34-
return path.resolve(path.join(__dirname, '..', 'download', 'DatabricksJDBC42.jar'));
35-
}
29+
console.log(`Release notes: https://mvnrepository.com/artifact/com.databricks/databricks-jdbc/${OSS_DRIVER_VERSION}-oss`);
3630

37-
return null;
31+
return path.resolve(path.join(__dirname, '..', 'download', `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`));
3832
}

packages/cubejs-databricks-jdbc-driver/src/post-install.ts

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,11 @@
11
import 'source-map-support/register';
22

33
import { displayCLIError } from '@cubejs-backend/shared';
4-
5-
import fs from 'fs';
6-
import path from 'path';
7-
import { downloadJDBCDriver } from './installer';
4+
import { resolveJDBCDriver } from './helpers';
85

96
(async () => {
107
try {
11-
if (!fs.existsSync(path.join(__dirname, '..', 'download', 'SparkJDBC42.jar'))) {
12-
await downloadJDBCDriver();
13-
}
8+
await resolveJDBCDriver();
149
} catch (e: any) {
1510
await displayCLIError(e, 'Cube.js Databricks JDBC Installer');
1611
}

0 commit comments

Comments
 (0)