Skip to content

Commit e1f22da

Browse files
authored
feat(snowflake-driver): Add support for export Buckets with paths (#9587)
* refactor: implement parseBucketUrl() in baseDriver * typo * feat(snowflake-driver): Add support for export Buckets with paths * refactor DatabricksDriver * update docs * fix BaseDriver
1 parent dd53c38 commit e1f22da

File tree

13 files changed

+49420
-42
lines changed

13 files changed

+49420
-42
lines changed

.github/workflows/drivers-tests.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,9 +228,12 @@ jobs:
228228
snowflake
229229
snowflake-encrypted-pk
230230
snowflake-export-bucket-s3
231+
snowflake-export-bucket-s3-prefix
231232
snowflake-export-bucket-azure
233+
snowflake-export-bucket-azure-prefix
232234
snowflake-export-bucket-azure-via-storage-integration
233235
snowflake-export-bucket-gcs
236+
snowflake-export-bucket-gcs-prefix
234237
# As per docs:
235238
# Secrets cannot be directly referenced in if: conditionals. Instead, consider setting
236239
# secrets as job-level environment variables, then referencing the environment variables
@@ -259,9 +262,12 @@ jobs:
259262
- snowflake
260263
- snowflake-encrypted-pk
261264
- snowflake-export-bucket-s3
265+
- snowflake-export-bucket-s3-prefix
262266
- snowflake-export-bucket-azure
267+
- snowflake-export-bucket-azure-prefix
263268
- snowflake-export-bucket-azure-via-storage-integration
264269
- snowflake-export-bucket-gcs
270+
- snowflake-export-bucket-gcs-prefix
265271
fail-fast: false
266272

267273
steps:

packages/cubejs-base-driver/src/BaseDriver.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,21 @@ export type GoogleStorageClientConfig = {
9595
credentials: any,
9696
};
9797

98+
export type ParsedBucketUrl = {
99+
/**
100+
* may be 's3', 'wasbs', 'gs', 'azure', etc
101+
*/
102+
schema?: string;
103+
bucketName: string;
104+
/**
105+
* prefix/path without leading and trailing / or empty string if not presented
106+
*/
107+
path: string;
108+
username?: string;
109+
password?: string;
110+
original: string;
111+
};
112+
98113
const sortByKeys = (unordered: any) => {
99114
const ordered: any = {};
100115

@@ -695,6 +710,44 @@ export abstract class BaseDriver implements DriverInterface {
695710
query.query = `SELECT * FROM (${query.query}) AS t LIMIT ${query.limit}`;
696711
}
697712

713+
/**
714+
* Returns parsed bucket structure.
715+
* Supported variants:
716+
* s3://my-bucket-name/prefix/longer/
717+
* s3://my-bucket-name
718+
* my-bucket-name/some-path
719+
* my-bucket-name
720+
721+
*/
722+
protected parseBucketUrl(input: string | null | undefined): ParsedBucketUrl {
723+
const original = input?.trim() || '';
724+
725+
if (!original) {
726+
return {
727+
bucketName: '',
728+
path: '',
729+
original,
730+
};
731+
}
732+
733+
const hasSchema = /^[a-zA-Z][a-zA-Z0-9+\-.]*:\/\//.test(original);
734+
const normalized = hasSchema ? original : `schema://${original}`;
735+
736+
const url = new URL(normalized);
737+
738+
const path = url.pathname.replace(/^\/+|\/+$/g, '');
739+
const schema = url.protocol.replace(/:$/, '');
740+
741+
return {
742+
schema: schema || undefined,
743+
bucketName: url.hostname,
744+
path,
745+
username: url.username || undefined,
746+
password: url.password || undefined,
747+
original,
748+
};
749+
}
750+
698751
/**
699752
* Returns an array of signed AWS S3 URLs of the unloaded csv files.
700753
*/

packages/cubejs-clickhouse-driver/src/ClickHouseDriver.ts

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -584,33 +584,19 @@ export class ClickHouseDriver extends BaseDriver implements DriverInterface {
584584
);
585585
}
586586

587-
/**
588-
* Returns clean S3 bucket name and prefix path ending with / (if set)
589-
*/
590-
private parseS3Path(input: string): { bucket: string; prefix: string | null } {
591-
let trimmed = input.startsWith('s3://') ? input.slice(5) : input;
592-
trimmed = trimmed.endsWith('/') ? trimmed.slice(0, -1) : trimmed;
593-
const parts = trimmed.split('/');
594-
const bucket = parts[0];
595-
const prefixParts = parts.slice(1);
596-
const prefix = prefixParts.length > 0 ? `${prefixParts.join('/')}/` : null;
597-
598-
return { bucket, prefix };
599-
}
600-
601587
public async unloadFromQuery(sql: string, params: unknown[], _options: UnloadOptions): Promise<DownloadTableCSVData> {
602588
if (!this.config.exportBucket) {
603589
throw new Error('Unload is not configured');
604590
}
605591

606592
const types = await this.queryColumnTypes(`(${sql})`, params);
607-
const { bucket, prefix } = this.parseS3Path(this.config.exportBucket.bucketName);
608-
const exportPrefix = prefix ? `${prefix}${uuidv4()}` : uuidv4();
593+
const { bucketName, path } = this.parseBucketUrl(this.config.exportBucket.bucketName);
594+
const exportPrefix = path ? `${path}/${uuidv4()}` : uuidv4();
609595

610596
const formattedQuery = sqlstring.format(`
611597
INSERT INTO FUNCTION
612598
s3(
613-
'https://${bucket}.s3.${this.config.exportBucket.region}.amazonaws.com/${exportPrefix}/export.csv.gz',
599+
'https://${bucketName}.s3.${this.config.exportBucket.region}.amazonaws.com/${exportPrefix}/export.csv.gz',
614600
'${this.config.exportBucket.keyId}',
615601
'${this.config.exportBucket.secretKey}',
616602
'CSV'
@@ -628,7 +614,7 @@ export class ClickHouseDriver extends BaseDriver implements DriverInterface {
628614
},
629615
region: this.config.exportBucket.region,
630616
},
631-
bucket,
617+
bucketName,
632618
exportPrefix,
633619
);
634620

packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -721,10 +721,6 @@ export class DatabricksDriver extends JDBCDriver {
721721
// s3://real-bucket-name
722722
// wasbs://[email protected]
723723
// The extractors in BaseDriver expect just clean bucket name
724-
const url = new URL(this.config.exportBucket || '');
725-
const prefix = url.pathname.slice(1);
726-
const delimiter = (prefix && !prefix.endsWith('/')) ? '/' : '';
727-
const objectSearchPrefix = `${prefix}${delimiter}${tableName}`;
728724

729725
if (this.config.bucketType === 'azure') {
730726
const {
@@ -733,14 +729,22 @@ export class DatabricksDriver extends JDBCDriver {
733729
azureTenantId: tenantId,
734730
azureClientSecret: clientSecret
735731
} = this.config;
732+
733+
const { bucketName, path, username } = this.parseBucketUrl(this.config.exportBucket);
734+
const azureBucketPath = `${bucketName}/${username}`;
735+
const exportPrefix = path ? `${path}/${tableName}` : tableName;
736+
736737
return this.extractFilesFromAzure(
737738
{ azureKey, clientId, tenantId, clientSecret },
738739
// Databricks uses different bucket address form, so we need to transform it
739740
// to the one understandable by extractFilesFromAzure implementation
740-
`${url.host}/${url.username}`,
741-
objectSearchPrefix,
741+
azureBucketPath,
742+
exportPrefix,
742743
);
743744
} else if (this.config.bucketType === 's3') {
745+
const { bucketName, path } = this.parseBucketUrl(this.config.exportBucket);
746+
const exportPrefix = path ? `${path}/${tableName}` : tableName;
747+
744748
return this.extractUnloadedFilesFromS3(
745749
{
746750
credentials: {
@@ -749,14 +753,17 @@ export class DatabricksDriver extends JDBCDriver {
749753
},
750754
region: this.config.awsRegion || '',
751755
},
752-
url.host,
753-
objectSearchPrefix,
756+
bucketName,
757+
exportPrefix,
754758
);
755759
} else if (this.config.bucketType === 'gcs') {
760+
const { bucketName, path } = this.parseBucketUrl(this.config.exportBucket);
761+
const exportPrefix = path ? `${path}/${tableName}` : tableName;
762+
756763
return this.extractFilesFromGCS(
757764
{ credentials: this.config.gcsCredentials },
758-
url.host,
759-
objectSearchPrefix,
765+
bucketName,
766+
exportPrefix,
760767
);
761768
} else {
762769
throw new Error(`Unsupported export bucket type: ${

packages/cubejs-snowflake-driver/src/SnowflakeDriver.ts

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ export class SnowflakeDriver extends BaseDriver implements DriverInterface {
490490
}
491491

492492
/**
493-
* Executes query and rerutns queried rows.
493+
* Executes query and returns queried rows.
494494
*/
495495
public async query<R = unknown>(query: string, values?: unknown[]): Promise<R> {
496496
return this.getConnection().then((connection) => this.execute<R>(connection, query, values));
@@ -545,10 +545,25 @@ export class SnowflakeDriver extends BaseDriver implements DriverInterface {
545545
} else {
546546
const types = await this.queryColumnTypes(options.query.sql, options.query.params);
547547
const connection = await this.getConnection();
548-
const { bucketType, bucketName } =
548+
const { bucketType } =
549549
<SnowflakeDriverExportBucket> this.config.exportBucket;
550+
551+
let bucketName: string;
552+
let exportPrefix: string;
553+
let path: string;
554+
555+
if (bucketType === 'azure') {
556+
({ bucketName, path } = this.parseBucketUrl(this.config.exportBucket!.bucketName));
557+
const pathArr = path.split('/');
558+
bucketName = `${bucketName}/${pathArr[0]}`;
559+
exportPrefix = pathArr.length > 1 ? `${pathArr.slice(1).join('/')}/${tableName}` : tableName;
560+
} else {
561+
({ bucketName, path } = this.parseBucketUrl(this.config.exportBucket!.bucketName));
562+
exportPrefix = path ? `${path}/${tableName}` : tableName;
563+
}
564+
550565
const unloadSql = `
551-
COPY INTO '${bucketType}://${bucketName}/${tableName}/'
566+
COPY INTO '${bucketType}://${bucketName}/${exportPrefix}/'
552567
FROM (${options.query.sql})
553568
${this.exportOptionsClause(options)}`;
554569
const result = await this.execute<UnloadResponse[]>(
@@ -594,10 +609,25 @@ export class SnowflakeDriver extends BaseDriver implements DriverInterface {
594609
): Promise<TableStructure> {
595610
const types = await this.tableColumnTypes(tableName);
596611
const connection = await this.getConnection();
597-
const { bucketType, bucketName } =
612+
const { bucketType } =
598613
<SnowflakeDriverExportBucket> this.config.exportBucket;
614+
615+
let bucketName: string;
616+
let exportPrefix: string;
617+
let path: string;
618+
619+
if (bucketType === 'azure') {
620+
({ bucketName, path } = this.parseBucketUrl(this.config.exportBucket!.bucketName));
621+
const pathArr = path.split('/');
622+
bucketName = `${bucketName}/${pathArr[0]}`;
623+
exportPrefix = pathArr.length > 1 ? `${pathArr.slice(1).join('/')}/${tableName}` : tableName;
624+
} else {
625+
({ bucketName, path } = this.parseBucketUrl(this.config.exportBucket!.bucketName));
626+
exportPrefix = path ? `${path}/${tableName}` : tableName;
627+
}
628+
599629
const unloadSql = `
600-
COPY INTO '${bucketType}://${bucketName}/${tableName}/'
630+
COPY INTO '${bucketType}://${bucketName}/${exportPrefix}/'
601631
FROM ${tableName}
602632
${this.exportOptionsClause(options)}`;
603633
const result = await this.execute<UnloadResponse[]>(
@@ -695,36 +725,50 @@ export class SnowflakeDriver extends BaseDriver implements DriverInterface {
695725
* Returns an array of signed URLs of the unloaded csv files.
696726
*/
697727
private async getCsvFiles(tableName: string): Promise<string[]> {
698-
const { bucketType, bucketName } =
728+
const { bucketType } =
699729
<SnowflakeDriverExportBucket> this.config.exportBucket;
700730

701731
if (bucketType === 's3') {
702-
const cfg = <SnowflakeDriverExportAWS> this.config.exportBucket;
732+
const { keyId, secretKey, region } = <SnowflakeDriverExportAWS> this.config.exportBucket;
733+
734+
const { bucketName, path } = this.parseBucketUrl(this.config.exportBucket!.bucketName);
735+
const exportPrefix = path ? `${path}/${tableName}` : tableName;
703736

704737
return this.extractUnloadedFilesFromS3(
705738
{
706739
credentials: {
707-
accessKeyId: cfg.keyId,
708-
secretAccessKey: cfg.secretKey,
740+
accessKeyId: keyId,
741+
secretAccessKey: secretKey,
709742
},
710-
region: cfg.region,
743+
region,
711744
},
712745
bucketName,
713-
tableName,
746+
exportPrefix,
714747
);
715748
} else if (bucketType === 'gcs') {
716749
const { credentials } = (
717750
<SnowflakeDriverExportGCS> this.config.exportBucket
718751
);
719-
return this.extractFilesFromGCS({ credentials }, bucketName, tableName);
752+
753+
const { bucketName, path } = this.parseBucketUrl(this.config.exportBucket!.bucketName);
754+
const exportPrefix = path ? `${path}/${tableName}` : tableName;
755+
756+
return this.extractFilesFromGCS({ credentials }, bucketName, exportPrefix);
720757
} else if (bucketType === 'azure') {
721758
const { azureKey, sasToken, clientId, tenantId, tokenFilePath } = (
722759
<SnowflakeDriverExportAzure> this.config.exportBucket
723760
);
761+
762+
const { bucketName, path } = this.parseBucketUrl(this.config.exportBucket!.bucketName);
763+
const pathArr = path.split('/');
764+
const azureBucketPath = `${bucketName}/${pathArr[0]}`;
765+
766+
const exportPrefix = pathArr.length > 1 ? `${pathArr.slice(1).join('/')}/${tableName}` : tableName;
767+
724768
return this.extractFilesFromAzure(
725769
{ azureKey, sasToken, clientId, tenantId, tokenFilePath },
726-
bucketName,
727-
tableName,
770+
azureBucketPath,
771+
exportPrefix,
728772
);
729773
} else {
730774
throw new Error(`Unsupported export bucket type: ${bucketType}`);

packages/cubejs-testing-drivers/fixtures/snowflake.json

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,17 @@
1111
}
1212
}
1313
},
14+
"export-bucket-s3-prefix": {
15+
"cube": {
16+
"environment": {
17+
"CUBEJS_DB_EXPORT_BUCKET_TYPE": "s3",
18+
"CUBEJS_DB_EXPORT_BUCKET": "snowflake-drivers-tests-preaggs/testing_prefix/for_export_buckets",
19+
"CUBEJS_DB_EXPORT_BUCKET_AWS_KEY": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_KEY}",
20+
"CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET}",
21+
"CUBEJS_DB_EXPORT_BUCKET_AWS_REGION": "us-west-1"
22+
}
23+
}
24+
},
1425
"export-bucket-azure": {
1526
"cube": {
1627
"environment": {
@@ -21,6 +32,16 @@
2132
}
2233
}
2334
},
35+
"export-bucket-azure-prefix": {
36+
"cube": {
37+
"environment": {
38+
"CUBEJS_DB_EXPORT_BUCKET_TYPE": "azure",
39+
"CUBEJS_DB_EXPORT_BUCKET": "coreteamdevtest.blob.core.windows.net/snowflake-drivers-tests-preaggs/testing_prefix/for_export_buckets/",
40+
"CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY}",
41+
"CUBEJS_DB_EXPORT_BUCKET_AZURE_SAS_TOKEN": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AZURE_SAS_TOKEN}"
42+
}
43+
}
44+
},
2445
"export-bucket-azure-via-storage-integration": {
2546
"cube": {
2647
"environment": {
@@ -41,6 +62,16 @@
4162
}
4263
}
4364
},
65+
"export-bucket-gcs-prefix": {
66+
"cube": {
67+
"environment": {
68+
"CUBEJS_DB_EXPORT_BUCKET_TYPE": "gcs",
69+
"CUBEJS_DB_EXPORT_BUCKET": "snowflake-drivers-tests-preaggs/testing_prefix/for_export_buckets",
70+
"CUBEJS_DB_EXPORT_INTEGRATION": "drivers_tests_preaggs_gcs",
71+
"CUBEJS_DB_EXPORT_GCS_CREDENTIALS": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_GCS_CREDENTIALS}"
72+
}
73+
}
74+
},
4475
"encrypted-pk": {
4576
"cube": {
4677
"environment": {

packages/cubejs-testing-drivers/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,12 @@
4949
"snowflake-full": "yarn test-driver -i dist/test/snowflake-full.test.js",
5050
"snowflake-encrypted-pk-full": "yarn test-driver -i dist/test/snowflake-encrypted-pk-full.test.js",
5151
"snowflake-export-bucket-s3-full": "yarn test-driver -i dist/test/snowflake-export-bucket-s3-full.test.js",
52+
"snowflake-export-bucket-s3-prefix-full": "yarn test-driver -i dist/test/snowflake-export-bucket-s3-prefix-full.test.js",
5253
"snowflake-export-bucket-azure-full": "yarn test-driver -i dist/test/snowflake-export-bucket-azure-full.test.js",
54+
"snowflake-export-bucket-azure-prefix-full": "yarn test-driver -i dist/test/snowflake-export-bucket-azure-prefix-full.test.js",
5355
"snowflake-export-bucket-azure-via-storage-integration-full": "yarn test-driver -i dist/test/snowflake-export-bucket-azure-via-storage-integration-full.test.js",
5456
"snowflake-export-bucket-gcs-full": "yarn test-driver -i dist/test/snowflake-export-bucket-gcs-full.test.js",
57+
"snowflake-export-bucket-gcs-prefix-full": "yarn test-driver -i dist/test/snowflake-export-bucket-gcs-prefix-full.test.js",
5558
"redshift-driver": "yarn test-driver -i dist/test/redshift-driver.test.js",
5659
"redshift-core": "yarn test-driver -i dist/test/redshift-core.test.js",
5760
"redshift-full": "yarn test-driver -i dist/test/redshift-full.test.js",

0 commit comments

Comments
 (0)