Skip to content

Commit fc5097b

Browse files
committed
Databricks export bucket for google cloud storage
1 parent 7e363f7 commit fc5097b

File tree

2 files changed

+40
-4
lines changed

2 files changed

+40
-4
lines changed

docs/pages/product/configuration/data-sources/databricks-jdbc.mdx

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,19 @@ CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID=<AZURE_CLIENT_ID>
147147
CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET=<AZURE_CLIENT_SECRET>
148148
```
149149

150+
#### Google Cloud Storage
151+
152+
To use Google Cloud Storage as an export bucket, follow [the Databricks guide on
153+
connecting to Google Cloud Storage][databricks-docs-uc-gcs].
154+
155+
[Create the service account][databricks-docs-uc-gcs-service-account] and replace the access key as follows:
156+
157+
```dotenv
158+
CUBEJS_DB_EXPORT_BUCKET=gs://my-bucket-on-gcs
159+
CUBEJS_DB_EXPORT_BUCKET_TYPE=gcs
160+
CUBEJS_DB_EXPORT_GCS_CREDENTIALS=<BASE64_ENCODED_SERVICE_CREDENTIALS_JSON>
161+
```
162+
150163
## SSL/TLS
151164

152165
Cube does not require any additional configuration to enable SSL/TLS for
@@ -173,6 +186,8 @@ bucket][self-preaggs-export-bucket] **must be** configured.
173186
https://docs.databricks.com/data/data-sources/azure/azure-storage.html
174187
[databricks-docs-uc-s3]:
175188
https://docs.databricks.com/en/connect/unity-catalog/index.html
189+
[databricks-docs-uc-gcs]:
190+
https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage.html
176191
[databricks-docs-jdbc-url]:
177192
https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url
178193
[databricks-docs-pat]:

packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ export type DatabricksDriverConfiguration = JDBCDriverConfiguration &
103103
* Azure service principal client secret
104104
*/
105105
azureClientSecret?: string,
106+
107+
/**
108+
* GCS credentials JSON content
109+
*/
110+
gcsCredentials?: string,
106111
};
107112

108113
type ShowTableRow = {
@@ -209,7 +214,7 @@ export class DatabricksDriver extends JDBCDriver {
209214
// common export bucket config
210215
bucketType:
211216
conf?.bucketType ||
212-
getEnv('dbExportBucketType', { supported: ['s3', 'azure'], dataSource }),
217+
getEnv('dbExportBucketType', { supported: ['s3', 'azure', 'gcs'], dataSource }),
213218
exportBucket:
214219
conf?.exportBucket ||
215220
getEnv('dbExportBucket', { dataSource }),
@@ -246,6 +251,10 @@ export class DatabricksDriver extends JDBCDriver {
246251
azureClientSecret:
247252
conf?.azureClientSecret ||
248253
getEnv('dbExportBucketAzureClientSecret', { dataSource }),
254+
// GCS credentials
255+
gcsCredentials:
256+
conf?.gcsCredentials ||
257+
getEnv('dbExportGCSCredentials', { dataSource }),
249258
};
250259
if (config.readOnly === undefined) {
251260
// we can set readonly to true if there is no bucket config provided
@@ -643,16 +652,19 @@ export class DatabricksDriver extends JDBCDriver {
643652
* export bucket data.
644653
*/
645654
public async unload(tableName: string, options: UnloadOptions) {
646-
if (!['azure', 's3'].includes(this.config.bucketType as string)) {
655+
if (!['azure', 's3', 'gcs'].includes(this.config.bucketType as string)) {
647656
throw new Error(`Unsupported export bucket type: ${
648657
this.config.bucketType
649658
}`);
650659
}
660+
// Construct a fully qualified table name with proper quoting
661+
// 1. Quotes are needed to handle special characters in identifiers, e.g. `my-table`
662+
// 2. Table name may include schema (e.g. 'schema.table'), so we split and quote each part, e.g. `schema`.`table`
651663
const tableFullName = `${
652664
this.config.catalog
653-
? `${this.config.catalog}.`
665+
? `${this.quoteIdentifier(this.config.catalog)}.`
654666
: ''
655-
}${tableName}`;
667+
}${tableName.split('.').map(part => this.quoteIdentifier(part)).join('.')}`;
656668
const types = options.query
657669
? await this.unloadWithSql(
658670
tableFullName,
@@ -733,6 +745,12 @@ export class DatabricksDriver extends JDBCDriver {
733745
url.host,
734746
objectSearchPrefix,
735747
);
748+
} else if (this.config.bucketType === 'gcs') {
749+
return this.extractFilesFromGCS(
750+
{ credentials: this.config.gcsCredentials },
751+
url.host,
752+
objectSearchPrefix,
753+
);
736754
} else {
737755
throw new Error(`Unsupported export bucket type: ${
738756
this.config.bucketType
@@ -769,6 +787,9 @@ export class DatabricksDriver extends JDBCDriver {
769787
*
770788
* `fs.s3a.access.key <aws-access-key>`
771789
* `fs.s3a.secret.key <aws-secret-key>`
790+
* For Google cloud storage you can configure storage credentials and create an external location to access it
791+
* (https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/storage-credentials
792+
* https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/external-locations)
772793
*/
773794
private async createExternalTableFromSql(tableFullName: string, sql: string, params: unknown[], columns: ColumnInfo[]) {
774795
let select = sql;

0 commit comments

Comments
 (0)