diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index c56ffc62cd0ee..9e48c7fc1fbb5 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -954,10 +954,17 @@ const variables: Record any> = { /** * Accept Databricks policy flag. This environment variable doesn't * need to be split by the data source. + * TODO: Tech-debt: Remove totally someday */ - databrickAcceptPolicy: () => ( - get('CUBEJS_DB_DATABRICKS_ACCEPT_POLICY').asBoolStrict() - ), + databrickAcceptPolicy: () => { + const val = get('CUBEJS_DB_DATABRICKS_ACCEPT_POLICY').asBoolStrict(); + + if (val !== undefined) { + console.warn( + 'The CUBEJS_DB_DATABRICKS_ACCEPT_POLICY is not needed anymore. Please, remove it' + ); + } + }, /** * Databricks jdbc-connection url. diff --git a/packages/cubejs-backend-shared/src/http-utils.ts b/packages/cubejs-backend-shared/src/http-utils.ts index a69de11928854..246c95d569e3e 100644 --- a/packages/cubejs-backend-shared/src/http-utils.ts +++ b/packages/cubejs-backend-shared/src/http-utils.ts @@ -66,9 +66,11 @@ export async function streamWithProgress( type DownloadAndExtractFile = { showProgress: boolean; cwd: string; + skipExtract?: boolean; + dstFileName?: string; }; -export async function downloadAndExtractFile(url: string, { cwd }: DownloadAndExtractFile) { +export async function downloadAndExtractFile(url: string, { cwd, skipExtract, dstFileName }: DownloadAndExtractFile) { const request = new Request(url, { headers: new Headers({ 'Content-Type': 'application/octet-stream', @@ -99,7 +101,18 @@ export async function downloadAndExtractFile(url: string, { cwd }: DownloadAndEx }); }); - await decompress(savedFilePath, cwd); + if (skipExtract) { + if (dstFileName) { + fs.copyFileSync(savedFilePath, path.resolve(path.join(cwd, dstFileName))); + } else { + // We still need some name for a file + const tmpFileName = path.basename(savedFilePath); + const destPath = path.join(cwd, tmpFileName); + fs.copyFileSync(savedFilePath, destPath); + } + } else { + await decompress(savedFilePath, cwd); + } try { fs.unlinkSync(savedFilePath); diff --git a/packages/cubejs-backend-shared/test/db_env_multi.test.ts b/packages/cubejs-backend-shared/test/db_env_multi.test.ts index f42e177fbbb83..7219d039e422a 100644 --- a/packages/cubejs-backend-shared/test/db_env_multi.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_multi.test.ts @@ -1192,34 +1192,6 @@ describe('Multiple datasources', () => { ); }); - test('getEnv("databrickAcceptPolicy")', () => { - process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'true'; - expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(true); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(true); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(true); - - process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'false'; - expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(false); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(false); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(false); - - process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'wrong'; - expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toThrow( - 'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"' - ); - expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toThrow( - 'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"' - ); - expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toThrow( - 'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"' - ); - - delete process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY; - expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toBeUndefined(); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toBeUndefined(); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toBeUndefined(); - }); - test('getEnv("athenaAwsKey")', () => { process.env.CUBEJS_AWS_KEY = 'default1'; process.env.CUBEJS_DS_POSTGRES_AWS_KEY = 'postgres1'; diff --git a/packages/cubejs-backend-shared/test/db_env_single.test.ts b/packages/cubejs-backend-shared/test/db_env_single.test.ts index f5de389afe9a2..1dd5612309f32 100644 --- a/packages/cubejs-backend-shared/test/db_env_single.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_single.test.ts @@ -760,34 +760,6 @@ describe('Single datasources', () => { expect(getEnv('databricksCatalog', { dataSource: 'wrong' })).toBeUndefined(); }); - test('getEnv("databrickAcceptPolicy")', () => { - process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'true'; - expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(true); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(true); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(true); - - process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'false'; - expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(false); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(false); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(false); - - process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'wrong'; - expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toThrow( - 'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"' - ); - expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toThrow( - 'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"' - ); - expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toThrow( - 'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"' - ); - - delete process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY; - expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toBeUndefined(); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toBeUndefined(); - expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toBeUndefined(); - }); - test('getEnv("athenaAwsKey")', () => { process.env.CUBEJS_AWS_KEY = 'default1'; expect(getEnv('athenaAwsKey', { dataSource: 'default' })).toEqual('default1'); diff --git a/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts b/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts index 15c6d2c04cab2..34eb5547fa429 100644 --- a/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts +++ b/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts @@ -18,7 +18,10 @@ import { } from '@cubejs-backend/base-driver'; import { JDBCDriver, JDBCDriverConfiguration, } from '@cubejs-backend/jdbc-driver'; import { DatabricksQuery } from './DatabricksQuery'; -import { extractUidFromJdbcUrl, resolveJDBCDriver } from './helpers'; +import { + extractAndRemoveUidPwdFromJdbcUrl, + resolveJDBCDriver +} from './helpers'; const SUPPORTED_BUCKET_TYPES = ['s3', 'gcs', 'azure']; @@ -133,7 +136,7 @@ export class DatabricksDriver extends JDBCDriver { /** * Show warning message flag. */ - private showSparkProtocolWarn: boolean; + private readonly showSparkProtocolWarn: boolean; /** * Driver Configuration. @@ -187,20 +190,20 @@ export class DatabricksDriver extends JDBCDriver { url = url.replace('jdbc:spark://', 'jdbc:databricks://'); } + const [uid, pwd, cleanedUrl] = extractAndRemoveUidPwdFromJdbcUrl(url); + const config: DatabricksDriverConfiguration = { ...conf, - url, + url: cleanedUrl, dbType: 'databricks', drivername: 'com.databricks.client.jdbc.Driver', customClassPath: undefined, properties: { - UID: extractUidFromJdbcUrl(url), - // PWD-parameter passed to the connection string has higher priority, - // so we can set this one to an empty string to avoid a Java error. + UID: uid, PWD: conf?.token || getEnv('databrickToken', { dataSource }) || - '', + pwd, UserAgentEntry: 'CubeDev_Cube', }, catalog: diff --git a/packages/cubejs-databricks-jdbc-driver/src/helpers.ts b/packages/cubejs-databricks-jdbc-driver/src/helpers.ts index b864d07b4c007..35b05dba3298f 100644 --- a/packages/cubejs-databricks-jdbc-driver/src/helpers.ts +++ b/packages/cubejs-databricks-jdbc-driver/src/helpers.ts @@ -1,7 +1,7 @@ import fs from 'fs'; import path from 'path'; -import { downloadJDBCDriver } from './installer'; +import { downloadJDBCDriver, OSS_DRIVER_VERSION } from './installer'; async function fileExistsOr( fsPath: string, @@ -15,16 +15,16 @@ async function fileExistsOr( export async function resolveJDBCDriver(): Promise { return fileExistsOr( - path.join(process.cwd(), 'DatabricksJDBC42.jar'), + path.join(process.cwd(), `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`), async () => fileExistsOr( - path.join(__dirname, '..', 'download', 'DatabricksJDBC42.jar'), + path.join(__dirname, '..', 'download', `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`), async () => { const pathOrNull = await downloadJDBCDriver(); if (pathOrNull) { return pathOrNull; } throw new Error( - 'Please download and place DatabricksJDBC42.jar inside your ' + + `Please download and place databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar inside your ` + 'project directory' ); } @@ -32,9 +32,22 @@ export async function resolveJDBCDriver(): Promise { ); } -export function extractUidFromJdbcUrl(jdbcUrl: string): string { - const { pathname } = new URL(jdbcUrl); - const [_, ...params] = pathname.split(';'); - const searchParams = new URLSearchParams(params.join('&')); - return searchParams.get('UID') || 'token'; +/** + * Extract if exist UID and PWD from URL and return UID, PWD and URL without these params. + * New Databricks OSS driver throws an error if UID and PWD are provided in the URL and as a separate params + * passed to the driver instance. That's why we strip them out from the URL if they exist there. + * @param jdbcUrl + */ +export function extractAndRemoveUidPwdFromJdbcUrl(jdbcUrl: string): [uid: string, pwd: string, cleanedUrl: string] { + const uidMatch = jdbcUrl.match(/UID=([^;]*)/i); + const pwdMatch = jdbcUrl.match(/PWD=([^;]*)/i); + + const uid = uidMatch?.[1] || 'token'; + const pwd = pwdMatch?.[1] || ''; + + const cleanedUrl = jdbcUrl + .replace(/;?UID=[^;]*/i, '') + .replace(/;?PWD=[^;]*/i, ''); + + return [uid, pwd, cleanedUrl]; } diff --git a/packages/cubejs-databricks-jdbc-driver/src/installer.ts b/packages/cubejs-databricks-jdbc-driver/src/installer.ts index fb405e325a701..fce2a28330468 100644 --- a/packages/cubejs-databricks-jdbc-driver/src/installer.ts +++ b/packages/cubejs-databricks-jdbc-driver/src/installer.ts @@ -1,38 +1,32 @@ import path from 'path'; import { downloadAndExtractFile, getEnv } from '@cubejs-backend/shared'; -function acceptedByEnv() { - const acceptStatus = getEnv('databrickAcceptPolicy'); - if (acceptStatus) { - console.log('You accepted Terms & Conditions for JDBC driver from DataBricks by CUBEJS_DB_DATABRICKS_ACCEPT_POLICY'); - } - - if (acceptStatus === false) { - console.log('You declined Terms & Conditions for JDBC driver from DataBricks by CUBEJS_DB_DATABRICKS_ACCEPT_POLICY'); - console.log('Installation will be skipped'); - } - - return acceptStatus; -} - +export const OSS_DRIVER_VERSION = '1.0.2'; + +/** + * In the beginning of 2025 Databricks released their open-source version of JDBC driver and encourage + * all users to migrate to it as company plans to focus on improving and evolving it over legacy simba driver. + * More info about OSS Driver could be found at https://docs.databricks.com/aws/en/integrations/jdbc/oss + * As of March 2025 To use the Databricks JDBC Driver (OSS), the following requirements must be met: + * Java Runtime Environment (JRE) 11.0 or above. CI testing is supported on JRE 11, 17, and 21. + */ export async function downloadJDBCDriver(): Promise { - const driverAccepted = acceptedByEnv(); - - if (driverAccepted) { - console.log('Downloading DatabricksJDBC42-2.6.29.1051'); + // TODO: Just to throw a console warning that this ENV is obsolete and could be safely removed + getEnv('databrickAcceptPolicy'); - await downloadAndExtractFile( - 'https://databricks-bi-artifacts.s3.us-east-2.amazonaws.com/simbaspark-drivers/jdbc/2.6.29/DatabricksJDBC42-2.6.29.1051.zip', - { - showProgress: true, - cwd: path.resolve(path.join(__dirname, '..', 'download')), - } - ); + console.log(`Downloading databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`); - console.log('Release notes: https://databricks-bi-artifacts.s3.us-east-2.amazonaws.com/simbaspark-drivers/jdbc/2.6.29/docs/release-notes.txt'); + await downloadAndExtractFile( + `https://repo1.maven.org/maven2/com/databricks/databricks-jdbc/${OSS_DRIVER_VERSION}-oss/databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`, + { + showProgress: true, + cwd: path.resolve(path.join(__dirname, '..', 'download')), + skipExtract: true, + dstFileName: `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`, + } + ); - return path.resolve(path.join(__dirname, '..', 'download', 'DatabricksJDBC42.jar')); - } + console.log(`Release notes: https://mvnrepository.com/artifact/com.databricks/databricks-jdbc/${OSS_DRIVER_VERSION}-oss`); - return null; + return path.resolve(path.join(__dirname, '..', 'download', `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`)); } diff --git a/packages/cubejs-databricks-jdbc-driver/src/post-install.ts b/packages/cubejs-databricks-jdbc-driver/src/post-install.ts index 8f934ef9b9103..51e4a506d4588 100644 --- a/packages/cubejs-databricks-jdbc-driver/src/post-install.ts +++ b/packages/cubejs-databricks-jdbc-driver/src/post-install.ts @@ -1,16 +1,11 @@ import 'source-map-support/register'; import { displayCLIError } from '@cubejs-backend/shared'; - -import fs from 'fs'; -import path from 'path'; -import { downloadJDBCDriver } from './installer'; +import { resolveJDBCDriver } from './helpers'; (async () => { try { - if (!fs.existsSync(path.join(__dirname, '..', 'download', 'SparkJDBC42.jar'))) { - await downloadJDBCDriver(); - } + await resolveJDBCDriver(); } catch (e: any) { await displayCLIError(e, 'Cube.js Databricks JDBC Installer'); }