Skip to content

Commit 2e47147

Browse files
authored
feat(clickhouse-driver): Add support for S3 Bucket with paths (#9585)
1 parent 6f5f3bc commit 2e47147

File tree

6 files changed

+9001
-14
lines changed

6 files changed

+9001
-14
lines changed

.github/workflows/drivers-tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ jobs:
215215
athena-export-bucket-s3
216216
bigquery-export-bucket-gcs
217217
clickhouse-export-bucket-s3
218+
clickhouse-export-bucket-s3-prefix
218219
databricks-jdbc
219220
databricks-jdbc-export-bucket-s3
220221
databricks-jdbc-export-bucket-s3-prefix
@@ -242,6 +243,7 @@ jobs:
242243
- bigquery-export-bucket-gcs
243244
- clickhouse
244245
- clickhouse-export-bucket-s3
246+
- clickhouse-export-bucket-s3-prefix
245247
- databricks-jdbc
246248
- databricks-jdbc-export-bucket-s3
247249
- databricks-jdbc-export-bucket-s3-prefix

packages/cubejs-clickhouse-driver/src/ClickHouseDriver.ts

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ import sqlstring from 'sqlstring';
3333

3434
import { transformRow, transformStreamRow } from './HydrationStream';
3535

36+
const SUPPORTED_BUCKET_TYPES = ['s3'];
37+
3638
const ClickhouseTypeToGeneric: Record<string, string> = {
3739
enum: 'text',
3840
string: 'text',
@@ -489,11 +491,9 @@ export class ClickHouseDriver extends BaseDriver implements DriverInterface {
489491
protected getExportBucket(
490492
dataSource: string,
491493
): ClickhouseDriverExportAWS | null {
492-
const supportedBucketTypes = ['s3'];
493-
494494
const requiredExportBucket: ClickhouseDriverExportRequiredAWS = {
495495
bucketType: getEnv('dbExportBucketType', {
496-
supported: supportedBucketTypes,
496+
supported: SUPPORTED_BUCKET_TYPES,
497497
dataSource,
498498
}),
499499
bucketName: getEnv('dbExportBucket', { dataSource }),
@@ -507,9 +507,9 @@ export class ClickHouseDriver extends BaseDriver implements DriverInterface {
507507
};
508508

509509
if (exportBucket.bucketType) {
510-
if (!supportedBucketTypes.includes(exportBucket.bucketType)) {
510+
if (!SUPPORTED_BUCKET_TYPES.includes(exportBucket.bucketType)) {
511511
throw new Error(
512-
`Unsupported EXPORT_BUCKET_TYPE, supported: ${supportedBucketTypes.join(',')}`
512+
`Unsupported EXPORT_BUCKET_TYPE, supported: ${SUPPORTED_BUCKET_TYPES.join(',')}`
513513
);
514514
}
515515

@@ -529,11 +529,7 @@ export class ClickHouseDriver extends BaseDriver implements DriverInterface {
529529
}
530530

531531
public async isUnloadSupported() {
532-
if (this.config.exportBucket) {
533-
return true;
534-
}
535-
536-
return false;
532+
return !!this.config.exportBucket;
537533
}
538534

539535
/**
@@ -588,18 +584,33 @@ export class ClickHouseDriver extends BaseDriver implements DriverInterface {
588584
);
589585
}
590586

591-
public async unloadFromQuery(sql: string, params: unknown[], options: UnloadOptions): Promise<DownloadTableCSVData> {
587+
/**
588+
* Returns clean S3 bucket name and prefix path ending with / (if set)
589+
*/
590+
private parseS3Path(input: string): { bucket: string; prefix: string | null } {
591+
let trimmed = input.startsWith('s3://') ? input.slice(5) : input;
592+
trimmed = trimmed.endsWith('/') ? trimmed.slice(0, -1) : trimmed;
593+
const parts = trimmed.split('/');
594+
const bucket = parts[0];
595+
const prefixParts = parts.slice(1);
596+
const prefix = prefixParts.length > 0 ? `${prefixParts.join('/')}/` : null;
597+
598+
return { bucket, prefix };
599+
}
600+
601+
public async unloadFromQuery(sql: string, params: unknown[], _options: UnloadOptions): Promise<DownloadTableCSVData> {
592602
if (!this.config.exportBucket) {
593603
throw new Error('Unload is not configured');
594604
}
595605

596606
const types = await this.queryColumnTypes(`(${sql})`, params);
597-
const exportPrefix = uuidv4();
607+
const { bucket, prefix } = this.parseS3Path(this.config.exportBucket.bucketName);
608+
const exportPrefix = prefix ? `${prefix}${uuidv4()}` : uuidv4();
598609

599610
const formattedQuery = sqlstring.format(`
600611
INSERT INTO FUNCTION
601612
s3(
602-
'https://${this.config.exportBucket.bucketName}.s3.${this.config.exportBucket.region}.amazonaws.com/${exportPrefix}/export.csv.gz',
613+
'https://${bucket}.s3.${this.config.exportBucket.region}.amazonaws.com/${exportPrefix}/export.csv.gz',
603614
'${this.config.exportBucket.keyId}',
604615
'${this.config.exportBucket.secretKey}',
605616
'CSV'
@@ -617,7 +628,7 @@ export class ClickHouseDriver extends BaseDriver implements DriverInterface {
617628
},
618629
region: this.config.exportBucket.region,
619630
},
620-
this.config.exportBucket.bucketName,
631+
bucket,
621632
exportPrefix,
622633
);
623634

packages/cubejs-testing-drivers/fixtures/clickhouse.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,17 @@
1010
"CUBEJS_DB_EXPORT_BUCKET_AWS_REGION": "us-east-1"
1111
}
1212
}
13+
},
14+
"export-bucket-s3-prefix": {
15+
"cube": {
16+
"environment": {
17+
"CUBEJS_DB_EXPORT_BUCKET_TYPE": "s3",
18+
"CUBEJS_DB_EXPORT_BUCKET": "clickhouse-drivers-tests-preaggs/testing_prefix/for_export_buckets/",
19+
"CUBEJS_DB_EXPORT_BUCKET_AWS_KEY": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_KEY}",
20+
"CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET": "${DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET}",
21+
"CUBEJS_DB_EXPORT_BUCKET_AWS_REGION": "us-east-1"
22+
}
23+
}
1324
}
1425
},
1526
"cube": {

packages/cubejs-testing-drivers/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"clickhouse-core": "yarn test-driver -i dist/test/clickhouse-core.test.js",
2626
"clickhouse-full": "yarn test-driver -i dist/test/clickhouse-full.test.js",
2727
"clickhouse-export-bucket-s3-full": "yarn test-driver -i dist/test/clickhouse-export-bucket-s3-full.test.js",
28+
"clickhouse-export-bucket-s3-prefix-full": "yarn test-driver -i dist/test/clickhouse-export-bucket-s3-prefix-full.test.js",
2829
"databricks-jdbc-driver": "yarn test-driver -i dist/test/databricks-jdbc-driver.test.js",
2930
"databricks-jdbc-core": "yarn test-driver -i dist/test/databricks-jdbc-core.test.js",
3031
"databricks-jdbc-full": "yarn test-driver -i dist/test/databricks-jdbc-full.test.js",

0 commit comments

Comments
 (0)