Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/drivers-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ jobs:
databricks-jdbc-export-bucket-s3-prefix
databricks-jdbc-export-bucket-azure
databricks-jdbc-export-bucket-azure-prefix
databricks-jdbc-export-bucket-gcs
databricks-jdbc-export-bucket-gcs-prefix
redshift
redshift-export-bucket-s3
snowflake
Expand Down Expand Up @@ -237,6 +239,8 @@ jobs:
- databricks-jdbc-export-bucket-s3-prefix
- databricks-jdbc-export-bucket-azure
- databricks-jdbc-export-bucket-azure-prefix
- databricks-jdbc-export-bucket-gcs
- databricks-jdbc-export-bucket-gcs-prefix
- mssql
- mysql
- postgres
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,24 @@ CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET=<AWS_SECRET>
CUBEJS_DB_EXPORT_BUCKET_AWS_REGION=<AWS_REGION>
```

#### Google Cloud Storage

<InfoBox>

When using an export bucket, remember to assign the **Storage Object Admin**
role to your Google Cloud credentials (`CUBEJS_DB_EXPORT_GCS_CREDENTIALS`).

</InfoBox>

To use Google Cloud Storage as an export bucket, first complete [the Databricks guide on
connecting to cloud object storage using Unity Catalog][databricks-docs-uc-gcs].

```dotenv
CUBEJS_DB_EXPORT_BUCKET=gs://databricks-export-bucket
CUBEJS_DB_EXPORT_BUCKET_TYPE=gcs
CUBEJS_DB_EXPORT_GCS_CREDENTIALS=<BASE64_ENCODED_SERVICE_CREDENTIALS_JSON>
```

#### Azure Blob Storage

To use Azure Blob Storage as an export bucket, follow [the Databricks guide on
Expand All @@ -136,7 +154,7 @@ CUBEJS_DB_EXPORT_BUCKET=wasbs://[email protected]
CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY=<AZURE_STORAGE_ACCOUNT_ACCESS_KEY>
```

Access key provides full access to the configuration and data,
Access key provides full access to the configuration and data,
to use a fine-grained control over access to storage resources, follow [the Databricks guide on authorize with Azure Active Directory][authorize-with-azure-active-directory].

[Create the service principal][azure-authentication-with-service-principal] and replace the access key as follows:
Expand Down Expand Up @@ -173,6 +191,8 @@ bucket][self-preaggs-export-bucket] **must be** configured.
https://docs.databricks.com/data/data-sources/azure/azure-storage.html
[databricks-docs-uc-s3]:
https://docs.databricks.com/en/connect/unity-catalog/index.html
[databricks-docs-uc-gcs]:
https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage.html
[databricks-docs-jdbc-url]:
https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url
[databricks-docs-pat]:
Expand Down
76 changes: 49 additions & 27 deletions packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,23 @@
* @fileoverview The `DatabricksDriver` and related types declaration.
*/

import { assertDataSource, getEnv, } from '@cubejs-backend/shared';
import {
getEnv,
assertDataSource,
} from '@cubejs-backend/shared';
import {
DatabaseStructure,
DriverCapabilities,
GenericDataBaseType,
QueryColumnsResult,
QueryOptions,
QuerySchemasResult,
QueryTablesResult,
UnloadOptions,
GenericDataBaseType,
TableColumn,
DatabaseStructure,
UnloadOptions,
} from '@cubejs-backend/base-driver';
import {
JDBCDriver,
JDBCDriverConfiguration,
} from '@cubejs-backend/jdbc-driver';
import { JDBCDriver, JDBCDriverConfiguration, } from '@cubejs-backend/jdbc-driver';
import { DatabricksQuery } from './DatabricksQuery';
import { resolveJDBCDriver, extractUidFromJdbcUrl } from './helpers';
import { extractUidFromJdbcUrl, resolveJDBCDriver } from './helpers';

const SUPPORTED_BUCKET_TYPES = ['s3', 'gcs', 'azure'];

export type DatabricksDriverConfiguration = JDBCDriverConfiguration &
{
Expand Down Expand Up @@ -103,6 +99,11 @@ export type DatabricksDriverConfiguration = JDBCDriverConfiguration &
* Azure service principal client secret
*/
azureClientSecret?: string,

/**
* GCS credentials JSON content
*/
gcsCredentials?: string,
};

type ShowTableRow = {
Expand Down Expand Up @@ -209,7 +210,7 @@ export class DatabricksDriver extends JDBCDriver {
// common export bucket config
bucketType:
conf?.bucketType ||
getEnv('dbExportBucketType', { supported: ['s3', 'azure'], dataSource }),
getEnv('dbExportBucketType', { supported: SUPPORTED_BUCKET_TYPES, dataSource }),
exportBucket:
conf?.exportBucket ||
getEnv('dbExportBucket', { dataSource }),
Expand Down Expand Up @@ -246,6 +247,10 @@ export class DatabricksDriver extends JDBCDriver {
azureClientSecret:
conf?.azureClientSecret ||
getEnv('dbExportBucketAzureClientSecret', { dataSource }),
// GCS credentials
gcsCredentials:
conf?.gcsCredentials ||
getEnv('dbExportGCSCredentials', { dataSource }),
};
if (config.readOnly === undefined) {
// we can set readonly to true if there is no bucket config provided
Expand Down Expand Up @@ -429,8 +434,7 @@ export class DatabricksDriver extends JDBCDriver {
metadata[database] = {};
}

const columns = await this.tableColumnTypes(`${database}.${tableName}`);
metadata[database][tableName] = columns;
metadata[database][tableName] = await this.tableColumnTypes(`${database}.${tableName}`);
}));

return metadata;
Expand Down Expand Up @@ -527,7 +531,7 @@ export class DatabricksDriver extends JDBCDriver {
* Returns table columns types.
*/
public override async tableColumnTypes(table: string): Promise<TableColumn[]> {
let tableFullName = '';
let tableFullName: string;
const tableArray = table.split('.');

if (tableArray.length === 3) {
Expand Down Expand Up @@ -643,7 +647,7 @@ export class DatabricksDriver extends JDBCDriver {
* export bucket data.
*/
public async unload(tableName: string, options: UnloadOptions) {
if (!['azure', 's3'].includes(this.config.bucketType as string)) {
if (!SUPPORTED_BUCKET_TYPES.includes(this.config.bucketType as string)) {
throw new Error(`Unsupported export bucket type: ${
this.config.bucketType
}`);
Expand Down Expand Up @@ -733,6 +737,12 @@ export class DatabricksDriver extends JDBCDriver {
url.host,
objectSearchPrefix,
);
} else if (this.config.bucketType === 'gcs') {
return this.extractFilesFromGCS(
{ credentials: this.config.gcsCredentials },
url.host,
objectSearchPrefix,
);
} else {
throw new Error(`Unsupported export bucket type: ${
this.config.bucketType
Expand All @@ -759,16 +769,22 @@ export class DatabricksDriver extends JDBCDriver {
*
* For Azure blob storage you need to configure account access key in
* Cluster -> Configuration -> Advanced options
* (https://docs.databricks.com/data/data-sources/azure/azure-storage.html#access-azure-blob-storage-directly)
* https://docs.databricks.com/data/data-sources/azure/azure-storage.html#access-azure-blob-storage-directly
*
* `fs.azure.account.key.<storage-account-name>.blob.core.windows.net <storage-account-access-key>`
*
* For S3 bucket storage you need to configure AWS access key and secret in
* Cluster -> Configuration -> Advanced options
* (https://docs.databricks.com/data/data-sources/aws/amazon-s3.html#access-s3-buckets-directly)
* https://docs.databricks.com/data/data-sources/aws/amazon-s3.html#access-s3-buckets-directly
*
* `fs.s3a.access.key <aws-access-key>`
* `fs.s3a.secret.key <aws-secret-key>`
*
* For Google cloud storage you can configure storage credentials and create an external location to access it
* or configure account service key (legacy)
* https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/storage-credentials
* https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/external-locations
* https://docs.databricks.com/aws/en/connect/storage/gcs
*/
private async createExternalTableFromSql(tableFullName: string, sql: string, params: unknown[], columns: ColumnInfo[]) {
let select = sql;
Expand All @@ -780,15 +796,15 @@ export class DatabricksDriver extends JDBCDriver {
try {
await this.query(
`
CREATE TABLE ${tableFullName}
USING CSV LOCATION '${this.config.exportBucketMountDir || this.config.exportBucket}/${tableFullName}.csv'
CREATE TABLE ${tableFullName}_tmp
USING CSV LOCATION '${this.config.exportBucketMountDir || this.config.exportBucket}/${tableFullName}'
OPTIONS (escape = '"')
AS (${select});
`,
params,
);
} finally {
await this.query(`DROP TABLE IF EXISTS ${tableFullName};`, []);
await this.query(`DROP TABLE IF EXISTS ${tableFullName}_tmp;`, []);
}
}

Expand All @@ -798,30 +814,36 @@ export class DatabricksDriver extends JDBCDriver {
*
* For Azure blob storage you need to configure account access key in
* Cluster -> Configuration -> Advanced options
* (https://docs.databricks.com/data/data-sources/azure/azure-storage.html#access-azure-blob-storage-directly)
* https://docs.databricks.com/data/data-sources/azure/azure-storage.html#access-azure-blob-storage-directly
*
* `fs.azure.account.key.<storage-account-name>.blob.core.windows.net <storage-account-access-key>`
*
* For S3 bucket storage you need to configure AWS access key and secret in
* Cluster -> Configuration -> Advanced options
* (https://docs.databricks.com/data/data-sources/aws/amazon-s3.html#access-s3-buckets-directly)
* https://docs.databricks.com/data/data-sources/aws/amazon-s3.html#access-s3-buckets-directly
*
* `fs.s3a.access.key <aws-access-key>`
* `fs.s3a.secret.key <aws-secret-key>`
*
* For Google cloud storage you can configure storage credentials and create an external location to access it
* or configure account service key (legacy)
* https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/storage-credentials
* https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/external-locations
* https://docs.databricks.com/aws/en/connect/storage/gcs
*/
private async createExternalTableFromTable(tableFullName: string, columns: ColumnInfo[]) {
try {
await this.query(
`
CREATE TABLE _${tableFullName}
USING CSV LOCATION '${this.config.exportBucketMountDir || this.config.exportBucket}/${tableFullName}.csv'
CREATE TABLE ${tableFullName}_tmp
USING CSV LOCATION '${this.config.exportBucketMountDir || this.config.exportBucket}/${tableFullName}'
OPTIONS (escape = '"')
AS SELECT ${this.generateTableColumnsForExport(columns)} FROM ${tableFullName}
`,
[],
);
} finally {
await this.query(`DROP TABLE IF EXISTS _${tableFullName};`, []);
await this.query(`DROP TABLE IF EXISTS ${tableFullName}_tmp;`, []);
}
}
}
18 changes: 18 additions & 0 deletions packages/cubejs-testing-drivers/fixtures/databricks-jdbc.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,24 @@
"CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY}"
}
}
},
"export-bucket-gcs": {
"cube": {
"environment": {
"CUBEJS_DB_EXPORT_BUCKET_TYPE": "gcs",
"CUBEJS_DB_EXPORT_BUCKET": "gs://databricks-drivers-tests-preaggs",
"CUBEJS_DB_EXPORT_GCS_CREDENTIALS": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_GCS_CREDENTIALS}"
}
}
},
"export-bucket-gcs-prefix": {
"cube": {
"environment": {
"CUBEJS_DB_EXPORT_BUCKET_TYPE": "gcs",
"CUBEJS_DB_EXPORT_BUCKET": "gs://databricks-drivers-tests-preaggs/testing_prefix/for_export_buckets",
"CUBEJS_DB_EXPORT_GCS_CREDENTIALS": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_GCS_CREDENTIALS}"
}
}
}
},
"cube": {
Expand Down
2 changes: 2 additions & 0 deletions packages/cubejs-testing-drivers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
"databricks-jdbc-export-bucket-s3-prefix-full": "yarn test-driver -i dist/test/databricks-jdbc-export-bucket-s3-prefix-full.test.js",
"databricks-jdbc-export-bucket-azure-full": "yarn test-driver -i dist/test/databricks-jdbc-export-bucket-azure-full.test.js",
"databricks-jdbc-export-bucket-azure-prefix-full": "yarn test-driver -i dist/test/databricks-jdbc-export-bucket-azure-prefix-full.test.js",
"databricks-jdbc-export-bucket-gcs-full": "yarn test-driver -i dist/test/databricks-jdbc-export-bucket-gcs-full.test.js",
"databricks-jdbc-export-bucket-gcs-prefix-full": "yarn test-driver -i dist/test/databricks-jdbc-export-bucket-gcs-prefix-full.test.js",
"mssql-driver": "yarn test-driver -i dist/test/mssql-driver.test.js",
"mssql-core": "yarn test-driver -i dist/test/mssql-core.test.js",
"mssql-full": "yarn test-driver -i dist/test/mssql-full.test.js",
Expand Down
Loading
Loading