diff --git a/.github/workflows/drivers-tests.yml b/.github/workflows/drivers-tests.yml index b323c47961c0a..56ccaacaad269 100644 --- a/.github/workflows/drivers-tests.yml +++ b/.github/workflows/drivers-tests.yml @@ -355,6 +355,8 @@ jobs: DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_TOKEN: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_TOKEN }} DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_KEY: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_KEY }} DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET }} + DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID }} + DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET }} # Redshift DRIVERS_TESTS_CUBEJS_DB_REDSHIFT_HOST: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_REDSHIFT_HOST }} diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index c7b0c2e3bd584..d30d2eba44ae9 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -951,25 +951,10 @@ const variables: Record any> = { * Databricks Driver * ***************************************************************** */ - /** - * Accept Databricks policy flag. This environment variable doesn't - * need to be split by the data source. - * TODO: Tech-debt: Remove totally someday - */ - databrickAcceptPolicy: () => { - const val = get('CUBEJS_DB_DATABRICKS_ACCEPT_POLICY').asBoolStrict(); - - if (val !== undefined) { - console.warn( - 'The CUBEJS_DB_DATABRICKS_ACCEPT_POLICY is not needed anymore. Please, remove it' - ); - } - }, - /** * Databricks jdbc-connection url. */ - databrickUrl: ({ + databricksUrl: ({ dataSource, }: { dataSource: string, @@ -990,7 +975,7 @@ const variables: Record any> = { /** * Databricks jdbc-connection token. */ - databrickToken: ({ + databricksToken: ({ dataSource, }: { dataSource: string, @@ -1012,6 +997,32 @@ const variables: Record any> = { keyByDataSource('CUBEJS_DB_DATABRICKS_CATALOG', dataSource) ], + /** + * Databricks OAuth Client ID (Same as the service principal UUID) + */ + databricksOAuthClientId: ({ + dataSource, + }: { + dataSource: string, + }) => ( + process.env[ + keyByDataSource('CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID', dataSource) + ] + ), + + /** + * Databricks OAuth Client Secret. + */ + databricksOAuthClientSecret: ({ + dataSource, + }: { + dataSource: string, + }) => ( + process.env[ + keyByDataSource('CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET', dataSource) + ] + ), + /** **************************************************************** * Athena Driver * ***************************************************************** */ diff --git a/packages/cubejs-backend-shared/test/db_env_multi.test.ts b/packages/cubejs-backend-shared/test/db_env_multi.test.ts index 7219d039e422a..c93b0fa4329fb 100644 --- a/packages/cubejs-backend-shared/test/db_env_multi.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_multi.test.ts @@ -1105,31 +1105,31 @@ describe('Multiple datasources', () => { process.env.CUBEJS_DB_DATABRICKS_URL = 'default1'; process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL = 'postgres1'; process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_URL = 'wrong1'; - expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default1'); - expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('postgres1'); - expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow( + expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('postgres1'); + expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow( 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' ); process.env.CUBEJS_DB_DATABRICKS_URL = 'default2'; process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL = 'postgres2'; process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_URL = 'wrong2'; - expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default2'); - expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('postgres2'); - expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow( + expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('postgres2'); + expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow( 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' ); delete process.env.CUBEJS_DB_DATABRICKS_URL; delete process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL; delete process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_URL; - expect(() => getEnv('databrickUrl', { dataSource: 'default' })).toThrow( + expect(() => getEnv('databricksUrl', { dataSource: 'default' })).toThrow( 'The CUBEJS_DB_DATABRICKS_URL is required and missing.' ); - expect(() => getEnv('databrickUrl', { dataSource: 'postgres' })).toThrow( + expect(() => getEnv('databricksUrl', { dataSource: 'postgres' })).toThrow( 'The CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL is required and missing.' ); - expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow( + expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow( 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' ); }); @@ -1138,27 +1138,27 @@ describe('Multiple datasources', () => { process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default1'; process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_TOKEN = 'postgres1'; process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_TOKEN = 'wrong1'; - expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default1'); - expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('postgres1'); - expect(() => getEnv('databrickToken', { dataSource: 'wrong' })).toThrow( + expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('postgres1'); + expect(() => getEnv('databricksToken', { dataSource: 'wrong' })).toThrow( 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' ); process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default2'; process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_TOKEN = 'postgres2'; process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_TOKEN = 'wrong2'; - expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default2'); - expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('postgres2'); - expect(() => getEnv('databrickToken', { dataSource: 'wrong' })).toThrow( + expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('postgres2'); + expect(() => getEnv('databricksToken', { dataSource: 'wrong' })).toThrow( 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' ); delete process.env.CUBEJS_DB_DATABRICKS_TOKEN; delete process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_TOKEN; delete process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_TOKEN; - expect(getEnv('databrickToken', { dataSource: 'default' })).toBeUndefined(); - expect(getEnv('databrickToken', { dataSource: 'postgres' })).toBeUndefined(); - expect(() => getEnv('databrickToken', { dataSource: 'wrong' })).toThrow( + expect(getEnv('databricksToken', { dataSource: 'default' })).toBeUndefined(); + expect(getEnv('databricksToken', { dataSource: 'postgres' })).toBeUndefined(); + expect(() => getEnv('databricksToken', { dataSource: 'wrong' })).toThrow( 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' ); }); diff --git a/packages/cubejs-backend-shared/test/db_env_single.test.ts b/packages/cubejs-backend-shared/test/db_env_single.test.ts index 1dd5612309f32..411aa0eb79558 100644 --- a/packages/cubejs-backend-shared/test/db_env_single.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_single.test.ts @@ -705,42 +705,42 @@ describe('Single datasources', () => { test('getEnv("databrickUrl")', () => { process.env.CUBEJS_DB_DATABRICKS_URL = 'default1'; - expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default1'); - expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('default1'); - expect(getEnv('databrickUrl', { dataSource: 'wrong' })).toEqual('default1'); + expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('default1'); + expect(getEnv('databricksUrl', { dataSource: 'wrong' })).toEqual('default1'); process.env.CUBEJS_DB_DATABRICKS_URL = 'default2'; - expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default2'); - expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('default2'); - expect(getEnv('databrickUrl', { dataSource: 'wrong' })).toEqual('default2'); + expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('default2'); + expect(getEnv('databricksUrl', { dataSource: 'wrong' })).toEqual('default2'); delete process.env.CUBEJS_DB_DATABRICKS_URL; - expect(() => getEnv('databrickUrl', { dataSource: 'default' })).toThrow( + expect(() => getEnv('databricksUrl', { dataSource: 'default' })).toThrow( 'The CUBEJS_DB_DATABRICKS_URL is required and missing.' ); - expect(() => getEnv('databrickUrl', { dataSource: 'postgres' })).toThrow( + expect(() => getEnv('databricksUrl', { dataSource: 'postgres' })).toThrow( 'The CUBEJS_DB_DATABRICKS_URL is required and missing.' ); - expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow( + expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow( 'The CUBEJS_DB_DATABRICKS_URL is required and missing.' ); }); test('getEnv("databrickToken")', () => { process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default1'; - expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default1'); - expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('default1'); - expect(getEnv('databrickToken', { dataSource: 'wrong' })).toEqual('default1'); + expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('default1'); + expect(getEnv('databricksToken', { dataSource: 'wrong' })).toEqual('default1'); process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default2'; - expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default2'); - expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('default2'); - expect(getEnv('databrickToken', { dataSource: 'wrong' })).toEqual('default2'); + expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('default2'); + expect(getEnv('databricksToken', { dataSource: 'wrong' })).toEqual('default2'); delete process.env.CUBEJS_DB_DATABRICKS_TOKEN; - expect(getEnv('databrickToken', { dataSource: 'default' })).toBeUndefined(); - expect(getEnv('databrickToken', { dataSource: 'postgres' })).toBeUndefined(); - expect(getEnv('databrickToken', { dataSource: 'wrong' })).toBeUndefined(); + expect(getEnv('databricksToken', { dataSource: 'default' })).toBeUndefined(); + expect(getEnv('databricksToken', { dataSource: 'postgres' })).toBeUndefined(); + expect(getEnv('databricksToken', { dataSource: 'wrong' })).toBeUndefined(); }); test('getEnv("databricksCatalog")', () => { diff --git a/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts b/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts index 85cc01578951a..efcc602b5eb63 100644 --- a/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts +++ b/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts @@ -90,6 +90,16 @@ export type DatabricksDriverConfiguration = JDBCDriverConfiguration & */ token?: string, + /** + * Databricks OAuth Client ID. + */ + oauthClientId?: string, + + /** + * Databricks OAuth Client Secret. + */ + oauthClientSecret?: string, + /** * Azure tenant Id */ @@ -152,6 +162,10 @@ export class DatabricksDriver extends JDBCDriver { private readonly parsedConnectionProperties: ParsedConnectionProperties; + private accessToken: string | undefined; + + private accessTokenExpires: number = 0; + public static dialectClass() { return DatabricksQuery; } @@ -192,7 +206,7 @@ export class DatabricksDriver extends JDBCDriver { let showSparkProtocolWarn = false; let url: string = conf?.url || - getEnv('databrickUrl', { dataSource }) || + getEnv('databricksUrl', { dataSource }) || getEnv('jdbcUrl', { dataSource }); if (url.indexOf('jdbc:spark://') !== -1) { showSparkProtocolWarn = true; @@ -200,6 +214,39 @@ export class DatabricksDriver extends JDBCDriver { } const [uid, pwd, cleanedUrl] = extractAndRemoveUidPwdFromJdbcUrl(url); + const passwd = conf?.token || + getEnv('databricksToken', { dataSource }) || + pwd; + const oauthClientId = conf?.oauthClientId || getEnv('databricksOAuthClientId', { dataSource }); + const oauthClientSecret = conf?.oauthClientSecret || getEnv('databricksOAuthClientSecret', { dataSource }); + + if (oauthClientId && !oauthClientSecret) { + throw new Error('Invalid credentials: No OAuth Client Secret provided'); + } else if (!oauthClientId && oauthClientSecret) { + throw new Error('Invalid credentials: No OAuth Client ID provided'); + } else if (!oauthClientId && !oauthClientSecret && !passwd) { + throw new Error('No credentials provided'); + } + + let authProps: Record = {}; + + // OAuth has an advantage over UID+PWD + // For magic numbers below - see Databricks docs: + // https://docs.databricks.com/aws/en/integrations/jdbc-oss/configure#authenticate-the-driver + if (oauthClientId) { + authProps = { + OAuth2ClientID: oauthClientId, + OAuth2Secret: oauthClientSecret, + AuthMech: 11, + Auth_Flow: 1, + }; + } else { + authProps = { + UID: uid, + PWD: passwd, + AuthMech: 3, + }; + } const config: DatabricksDriverConfiguration = { ...conf, @@ -208,11 +255,7 @@ export class DatabricksDriver extends JDBCDriver { drivername: 'com.databricks.client.jdbc.Driver', customClassPath: undefined, properties: { - UID: uid, - PWD: - conf?.token || - getEnv('databrickToken', { dataSource }) || - pwd, + ...authProps, UserAgentEntry: 'CubeDev_Cube', }, catalog: @@ -291,8 +334,55 @@ export class DatabricksDriver extends JDBCDriver { this.showDeprecations(); } + private async fetchAccessToken(): Promise { + // Need to exchange client ID + Secret => Access token + + const basicAuth = Buffer.from(`${this.config.properties.OAuth2ClientID}:${this.config.properties.OAuth2Secret}`).toString('base64'); + + const res = await fetch(`https://${this.parsedConnectionProperties.host}/oidc/v1/token`, { + method: 'POST', + headers: { + Authorization: `Basic ${basicAuth}`, + 'Content-Type': 'application/x-www-form-urlencoded', + }, + body: new URLSearchParams({ + grant_type: 'client_credentials', + scope: 'all-apis', + }), + }); + + if (!res.ok) { + throw new Error(`Failed to get access token: ${res.statusText}`); + } + + const resp = await res.json(); + + this.accessToken = resp.access_token; + this.accessTokenExpires = Date.now() + resp.expires_in * 1000 - 60_000; + } + + private async getValidAccessToken(): Promise { + if ( + !this.accessToken || + !this.accessTokenExpires || + Date.now() >= this.accessTokenExpires + ) { + await this.fetchAccessToken(); + } + return this.accessToken!; + } + public override async testConnection() { - const token = `Bearer ${this.config.properties.PWD}`; + let token: string; + + // Databricks docs on accessing REST API + // https://docs.databricks.com/aws/en/dev-tools/auth/oauth-m2m + if (this.config.properties.OAuth2Secret) { + const at = await this.getValidAccessToken(); + token = `Bearer ${at}`; + } else { + token = `Bearer ${this.config.properties.PWD}`; + } const res = await fetch(`https://${this.parsedConnectionProperties.host}/api/2.0/sql/warehouses/${this.parsedConnectionProperties.warehouseId}`, { headers: { Authorization: token }, diff --git a/packages/cubejs-databricks-jdbc-driver/src/helpers.ts b/packages/cubejs-databricks-jdbc-driver/src/helpers.ts index 7e62951d32a9c..75333b178e348 100644 --- a/packages/cubejs-databricks-jdbc-driver/src/helpers.ts +++ b/packages/cubejs-databricks-jdbc-driver/src/helpers.ts @@ -35,7 +35,7 @@ export async function resolveJDBCDriver(): Promise { /** * Extract if exist UID and PWD from URL and return UID, PWD and URL without these params. - * New Databricks OSS driver throws an error if UID and PWD are provided in the URL and as a separate params + * New Databricks OSS driver throws an error if any parameter is provided in the URL and as a separate param * passed to the driver instance. That's why we strip them out from the URL if they exist there. * @param jdbcUrl */ @@ -48,7 +48,8 @@ export function extractAndRemoveUidPwdFromJdbcUrl(jdbcUrl: string): [uid: string const cleanedUrl = jdbcUrl .replace(/;?UID=[^;]*/i, '') - .replace(/;?PWD=[^;]*/i, ''); + .replace(/;?PWD=[^;]*/i, '') + .replace(/;?AuthMech=[^;]*/i, ''); return [uid, pwd, cleanedUrl]; } diff --git a/packages/cubejs-databricks-jdbc-driver/src/installer.ts b/packages/cubejs-databricks-jdbc-driver/src/installer.ts index fce2a28330468..d0cd5339201ad 100644 --- a/packages/cubejs-databricks-jdbc-driver/src/installer.ts +++ b/packages/cubejs-databricks-jdbc-driver/src/installer.ts @@ -1,7 +1,7 @@ import path from 'path'; import { downloadAndExtractFile, getEnv } from '@cubejs-backend/shared'; -export const OSS_DRIVER_VERSION = '1.0.2'; +export const OSS_DRIVER_VERSION = '1.0.6'; /** * In the beginning of 2025 Databricks released their open-source version of JDBC driver and encourage @@ -11,9 +11,6 @@ export const OSS_DRIVER_VERSION = '1.0.2'; * Java Runtime Environment (JRE) 11.0 or above. CI testing is supported on JRE 11, 17, and 21. */ export async function downloadJDBCDriver(): Promise { - // TODO: Just to throw a console warning that this ENV is obsolete and could be safely removed - getEnv('databrickAcceptPolicy'); - console.log(`Downloading databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`); await downloadAndExtractFile( diff --git a/packages/cubejs-testing-drivers/fixtures/databricks-jdbc.json b/packages/cubejs-testing-drivers/fixtures/databricks-jdbc.json index fb19c793fe6c2..0dc7bd8106dfe 100644 --- a/packages/cubejs-testing-drivers/fixtures/databricks-jdbc.json +++ b/packages/cubejs-testing-drivers/fixtures/databricks-jdbc.json @@ -7,7 +7,11 @@ "CUBEJS_DB_EXPORT_BUCKET": "s3://databricks-drivers-tests-preaggs", "CUBEJS_DB_EXPORT_BUCKET_AWS_KEY": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_KEY}", "CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET}", - "CUBEJS_DB_EXPORT_BUCKET_AWS_REGION": "us-east-1" + "CUBEJS_DB_EXPORT_BUCKET_AWS_REGION": "us-east-1", + "Cannot_left_comments_in_json": "Use OAuth machine-to-machine (M2M) authentication here for testing it too", + "CUBEJS_DB_DATABRICKS_TOKEN": "", + "CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID": "${DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID}", + "CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET": "${DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET}" } } },