Skip to content

Commit 6bf53a4

Browse files
committed
Merge branch 'feature/gcs-export-bucket' of github.com:qiao-x/cube into qiao-x-feature/gcs-export-bucket
2 parents fcb5711 + 7df6bac commit 6bf53a4

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

docs/pages/product/configuration/data-sources/databricks-jdbc.mdx

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,17 @@ CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID=<AZURE_CLIENT_ID>
147147
CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET=<AZURE_CLIENT_SECRET>
148148
```
149149

150+
#### Google Cloud Storage
151+
152+
To use Google Cloud Storage as an export bucket, follow [the Databricks guide on
153+
connecting to Google Cloud Storage][databricks-docs-uc-gcs].
154+
155+
```dotenv
156+
CUBEJS_DB_EXPORT_BUCKET=gs://my-bucket-on-gcs
157+
CUBEJS_DB_EXPORT_BUCKET_TYPE=gcs
158+
CUBEJS_DB_EXPORT_GCS_CREDENTIALS=<BASE64_ENCODED_SERVICE_CREDENTIALS_JSON>
159+
```
160+
150161
## SSL/TLS
151162

152163
Cube does not require any additional configuration to enable SSL/TLS for
@@ -173,6 +184,8 @@ bucket][self-preaggs-export-bucket] **must be** configured.
173184
https://docs.databricks.com/data/data-sources/azure/azure-storage.html
174185
[databricks-docs-uc-s3]:
175186
https://docs.databricks.com/en/connect/unity-catalog/index.html
187+
[databricks-docs-uc-gcs]:
188+
https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage.html
176189
[databricks-docs-jdbc-url]:
177190
https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url
178191
[databricks-docs-pat]:

packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import {
2626
import { DatabricksQuery } from './DatabricksQuery';
2727
import { resolveJDBCDriver, extractUidFromJdbcUrl } from './helpers';
2828

29+
const SUPPORTED_BUCKET_TYPES = ['s3', 'gcs', 'azure'];
30+
2931
export type DatabricksDriverConfiguration = JDBCDriverConfiguration &
3032
{
3133
/**
@@ -103,6 +105,11 @@ export type DatabricksDriverConfiguration = JDBCDriverConfiguration &
103105
* Azure service principal client secret
104106
*/
105107
azureClientSecret?: string,
108+
109+
/**
110+
* GCS credentials JSON content
111+
*/
112+
gcsCredentials?: string,
106113
};
107114

108115
type ShowTableRow = {
@@ -209,7 +216,7 @@ export class DatabricksDriver extends JDBCDriver {
209216
// common export bucket config
210217
bucketType:
211218
conf?.bucketType ||
212-
getEnv('dbExportBucketType', { supported: ['s3', 'azure'], dataSource }),
219+
getEnv('dbExportBucketType', { supported: SUPPORTED_BUCKET_TYPES, dataSource }),
213220
exportBucket:
214221
conf?.exportBucket ||
215222
getEnv('dbExportBucket', { dataSource }),
@@ -246,6 +253,10 @@ export class DatabricksDriver extends JDBCDriver {
246253
azureClientSecret:
247254
conf?.azureClientSecret ||
248255
getEnv('dbExportBucketAzureClientSecret', { dataSource }),
256+
// GCS credentials
257+
gcsCredentials:
258+
conf?.gcsCredentials ||
259+
getEnv('dbExportGCSCredentials', { dataSource }),
249260
};
250261
if (config.readOnly === undefined) {
251262
// we can set readonly to true if there is no bucket config provided
@@ -643,7 +654,7 @@ export class DatabricksDriver extends JDBCDriver {
643654
* export bucket data.
644655
*/
645656
public async unload(tableName: string, options: UnloadOptions) {
646-
if (!['azure', 's3'].includes(this.config.bucketType as string)) {
657+
if (!SUPPORTED_BUCKET_TYPES.includes(this.config.bucketType as string)) {
647658
throw new Error(`Unsupported export bucket type: ${
648659
this.config.bucketType
649660
}`);
@@ -733,6 +744,15 @@ export class DatabricksDriver extends JDBCDriver {
733744
url.host,
734745
objectSearchPrefix,
735746
);
747+
} else if (this.config.bucketType === 'gcs') {
748+
return this.extractFilesFromGCS(
749+
{ credentials: this.config.gcsCredentials },
750+
url.host,
751+
objectSearchPrefix+".csv",
752+
).then(files => files.filter(file =>
753+
decodeURIComponent(new URL(file).pathname).endsWith('.csv') ||
754+
decodeURIComponent(new URL(file).pathname).endsWith('.csv.gz')
755+
));
736756
} else {
737757
throw new Error(`Unsupported export bucket type: ${
738758
this.config.bucketType
@@ -769,6 +789,9 @@ export class DatabricksDriver extends JDBCDriver {
769789
*
770790
* `fs.s3a.access.key <aws-access-key>`
771791
* `fs.s3a.secret.key <aws-secret-key>`
792+
* For Google cloud storage you can configure storage credentials and create an external location to access it
793+
* (https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/storage-credentials
794+
* https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/external-locations)
772795
*/
773796
private async createExternalTableFromSql(tableFullName: string, sql: string, params: unknown[], columns: ColumnInfo[]) {
774797
let select = sql;

0 commit comments

Comments
 (0)