@@ -15,20 +15,6 @@ import {
1515 isSslCert ,
1616} from '@cubejs-backend/shared' ;
1717import fs from 'fs' ;
18- import { getSignedUrl } from '@aws-sdk/s3-request-presigner' ;
19- import { S3 , GetObjectCommand , S3ClientConfig } from '@aws-sdk/client-s3' ;
20- import { Storage } from '@google-cloud/storage' ;
21- import {
22- BlobServiceClient ,
23- StorageSharedKeyCredential ,
24- ContainerSASPermissions ,
25- SASProtocol ,
26- generateBlobSASQueryParameters ,
27- } from '@azure/storage-blob' ;
28- import {
29- DefaultAzureCredential ,
30- ClientSecretCredential ,
31- } from '@azure/identity' ;
3218
3319import { cancelCombinator } from './utils' ;
3420import {
@@ -57,44 +43,10 @@ import {
5743 InformationSchemaColumn ,
5844} from './driver.interface' ;
5945
60- /**
61- * @see {@link DefaultAzureCredential } constructor options
62- */
63- export type AzureStorageClientConfig = {
64- azureKey ?: string ,
65- sasToken ?: string ,
66- /**
67- * The client ID of a Microsoft Entra app registration.
68- * In case of DefaultAzureCredential flow if it is omitted
69- * the Azure library will try to use the AZURE_CLIENT_ID env
70- */
71- clientId ?: string ,
72- /**
73- * ID of the application's Microsoft Entra tenant. Also called its directory ID.
74- * In case of DefaultAzureCredential flow if it is omitted
75- * the Azure library will try to use the AZURE_TENANT_ID env
76- */
77- tenantId ?: string ,
78- /**
79- * Azure service principal client secret.
80- * Enables authentication to Microsoft Entra ID using a client secret that was generated
81- * for an App Registration. More information on how to configure a client secret can be found here:
82- * https://learn.microsoft.com/entra/identity-platform/quickstart-configure-app-access-web-apis#add-credentials-to-your-web-application
83- * In case of DefaultAzureCredential flow if it is omitted
84- * the Azure library will try to use the AZURE_CLIENT_SECRET env
85- */
86- clientSecret ?: string ,
87- /**
88- * The path to a file containing a Kubernetes service account token that authenticates the identity.
89- * In case of DefaultAzureCredential flow if it is omitted
90- * the Azure library will try to use the AZURE_FEDERATED_TOKEN_FILE env
91- */
92- tokenFilePath ?: string ,
93- } ;
94-
95- export type GoogleStorageClientConfig = {
96- credentials : any ,
97- } ;
46+ // Import only types, because these SDKs are quite large and should be loaded lazily
47+ import type { AzureStorageClientConfig } from './storage-fs/azure.fs' ;
48+ import type { S3StorageClientConfig } from './storage-fs/aws.fs' ;
49+ import type { GoogleStorageClientConfig } from './storage-fs/gcs.fs' ;
9850
9951export type ParsedBucketUrl = {
10052 /**
@@ -800,38 +752,12 @@ export abstract class BaseDriver implements DriverInterface {
800752 * Returns an array of signed AWS S3 URLs of the unloaded csv files.
801753 */
802754 protected async extractUnloadedFilesFromS3 (
803- clientOptions : S3ClientConfig ,
755+ clientOptions : S3StorageClientConfig ,
804756 bucketName : string ,
805757 prefix : string
806758 ) : Promise < string [ ] > {
807- const storage = new S3 ( clientOptions ) ;
808- // It looks that different driver configurations use different formats
809- // for the bucket - some expect only names, some - full url-like names.
810- // So we unify this.
811- bucketName = bucketName . replace ( / ^ [ a - z A - Z ] + : \/ \/ / , '' ) ;
812-
813- const list = await storage . listObjectsV2 ( {
814- Bucket : bucketName ,
815- Prefix : prefix ,
816- } ) ;
817- if ( list ) {
818- if ( ! list . Contents ) {
819- return [ ] ;
820- } else {
821- const csvFiles = await Promise . all (
822- list . Contents . map ( async ( file ) => {
823- const command = new GetObjectCommand ( {
824- Bucket : bucketName ,
825- Key : file . Key ,
826- } ) ;
827- return getSignedUrl ( storage , command , { expiresIn : 3600 } ) ;
828- } )
829- ) ;
830- return csvFiles ;
831- }
832- }
833-
834- throw new Error ( 'Unable to retrieve list of files from S3 storage after unloading.' ) ;
759+ // Lazy loading, because it's using azure SDK, which is quite heavy.
760+ return ( await import ( './storage-fs/aws.fs' ) ) . extractUnloadedFilesFromS3 ( clientOptions , bucketName , prefix ) ;
835761 }
836762
837763 /**
@@ -842,124 +768,16 @@ export abstract class BaseDriver implements DriverInterface {
842768 bucketName : string ,
843769 tableName : string
844770 ) : Promise < string [ ] > {
845- const storage = new Storage (
846- gcsConfig . credentials
847- ? { credentials : gcsConfig . credentials , projectId : gcsConfig . credentials . project_id }
848- : undefined
849- ) ;
850- const bucket = storage . bucket ( bucketName ) ;
851- const [ files ] = await bucket . getFiles ( { prefix : `${ tableName } /` } ) ;
852- if ( files . length ) {
853- const csvFiles = await Promise . all ( files . map ( async ( file ) => {
854- const [ url ] = await file . getSignedUrl ( {
855- action : 'read' ,
856- expires : new Date ( new Date ( ) . getTime ( ) + 60 * 60 * 1000 )
857- } ) ;
858- return url ;
859- } ) ) ;
860-
861- return csvFiles ;
862- } else {
863- throw new Error ( 'No CSV files were obtained from the bucket' ) ;
864- }
771+ // Lazy loading, because it's using azure SDK, which is quite heavy.
772+ return ( await import ( './storage-fs/gcs.fs' ) ) . extractFilesFromGCS ( gcsConfig , bucketName , tableName ) ;
865773 }
866774
867775 protected async extractFilesFromAzure (
868776 azureConfig : AzureStorageClientConfig ,
869777 bucketName : string ,
870778 tableName : string
871779 ) : Promise < string [ ] > {
872- const splitter = bucketName . includes ( 'blob.core' ) ? '.blob.core.windows.net/' : '.dfs.core.windows.net/' ;
873- const parts = bucketName . split ( splitter ) ;
874- const account = parts [ 0 ] ;
875- const container = parts [ 1 ] . split ( '/' ) [ 0 ] ;
876- let credential : StorageSharedKeyCredential | ClientSecretCredential | DefaultAzureCredential ;
877- let blobServiceClient : BlobServiceClient ;
878- let getSas ;
879-
880- if ( azureConfig . azureKey ) {
881- credential = new StorageSharedKeyCredential ( account , azureConfig . azureKey ) ;
882- getSas = async ( name : string , startsOn : Date , expiresOn : Date ) => generateBlobSASQueryParameters (
883- {
884- containerName : container ,
885- blobName : name ,
886- permissions : ContainerSASPermissions . parse ( 'r' ) ,
887- startsOn,
888- expiresOn,
889- protocol : SASProtocol . Https ,
890- version : '2020-08-04' ,
891- } ,
892- credential as StorageSharedKeyCredential
893- ) . toString ( ) ;
894- } else if ( azureConfig . clientSecret && azureConfig . tenantId && azureConfig . clientId ) {
895- credential = new ClientSecretCredential (
896- azureConfig . tenantId ,
897- azureConfig . clientId ,
898- azureConfig . clientSecret ,
899- ) ;
900- getSas = async ( name : string , startsOn : Date , expiresOn : Date ) => {
901- const userDelegationKey = await blobServiceClient . getUserDelegationKey ( startsOn , expiresOn ) ;
902- return generateBlobSASQueryParameters (
903- {
904- containerName : container ,
905- blobName : name ,
906- permissions : ContainerSASPermissions . parse ( 'r' ) ,
907- startsOn,
908- expiresOn,
909- protocol : SASProtocol . Https ,
910- version : '2020-08-04' ,
911- } ,
912- userDelegationKey ,
913- account
914- ) . toString ( ) ;
915- } ;
916- } else {
917- const opts = {
918- tenantId : azureConfig . tenantId ,
919- clientId : azureConfig . clientId ,
920- tokenFilePath : azureConfig . tokenFilePath ,
921- } ;
922- credential = new DefaultAzureCredential ( opts ) ;
923- getSas = async ( name : string , startsOn : Date , expiresOn : Date ) => {
924- // getUserDelegationKey works only for authorization with Microsoft Entra ID
925- const userDelegationKey = await blobServiceClient . getUserDelegationKey ( startsOn , expiresOn ) ;
926- return generateBlobSASQueryParameters (
927- {
928- containerName : container ,
929- blobName : name ,
930- permissions : ContainerSASPermissions . parse ( 'r' ) ,
931- startsOn,
932- expiresOn,
933- protocol : SASProtocol . Https ,
934- version : '2020-08-04' ,
935- } ,
936- userDelegationKey ,
937- account ,
938- ) . toString ( ) ;
939- } ;
940- }
941-
942- const url = `https://${ account } .blob.core.windows.net` ;
943- blobServiceClient = azureConfig . sasToken ?
944- new BlobServiceClient ( `${ url } ?${ azureConfig . sasToken } ` ) :
945- new BlobServiceClient ( url , credential ) ;
946-
947- const csvFiles : string [ ] = [ ] ;
948- const containerClient = blobServiceClient . getContainerClient ( container ) ;
949- const blobsList = containerClient . listBlobsFlat ( { prefix : `${ tableName } ` } ) ;
950- for await ( const blob of blobsList ) {
951- if ( blob . name && ( blob . name . endsWith ( '.csv.gz' ) || blob . name . endsWith ( '.csv' ) ) ) {
952- const starts = new Date ( ) ;
953- const expires = new Date ( starts . valueOf ( ) + 1000 * 60 * 60 ) ;
954- const sas = await getSas ( blob . name , starts , expires ) ;
955- csvFiles . push ( `${ url } /${ container } /${ blob . name } ?${ sas } ` ) ;
956- }
957- }
958-
959- if ( csvFiles . length === 0 ) {
960- throw new Error ( 'No CSV files were obtained from the bucket' ) ;
961- }
962-
963- return csvFiles ;
780+ // Lazy loading, because it's using azure SDK, which is quite (extremely) heavy.
781+ return ( await import ( './storage-fs/azure.fs' ) ) . extractFilesFromAzure ( azureConfig , bucketName , tableName ) ;
964782 }
965783}
0 commit comments