@@ -5,147 +5,45 @@ import rds = require('@aws-cdk/aws-rds');
55import glue = require( '@aws-cdk/aws-glue' ) ;
66import s3 = require( '@aws-cdk/aws-s3' ) ;
77import s3assets = require( '@aws-cdk/aws-s3-assets' ) ;
8- import { DataSetEnrollmentProps , DataLakeEnrollmentWorkflow } from './data-set-enrollment' ;
8+ import { RDSdataSetSetEnrollmentProps , RDSPostgresDataSetEnrollment } from './rds- data-set-enrollment' ;
99
1010
11- export interface RDSdataSetSetEnrollmentProps extends DataSetEnrollmentProps {
11+ export interface ChemblStackEnrollmentProps extends cdk . StackProps {
1212 databaseSecret : rds . DatabaseSecret ;
1313 database : rds . DatabaseInstance ;
1414 accessSecurityGroup : ec2 . SecurityGroup ;
15+ dataLakeBucket : s3 . Bucket ;
1516}
1617
17- export class Chembl25Stack extends cdk . Stack {
18-
19- constructor ( scope : cdk . Construct , id : string , props : RDSdataSetSetEnrollmentProps ) {
18+ export class ChemblStack extends cdk . Stack {
19+ constructor ( scope : cdk . Construct , id : string , props : ChemblStackEnrollmentProps ) {
2020 super ( scope , id , props ) ;
21-
22-
23- const chembl_25_src = new glue . Database ( this , 'chembl-25-src' , {
24- databaseName : 'chembl_25_src' ,
25- locationUri : `s3://${ props . dataLakeBucket . bucketName } /chembl/src/chembl25`
26- } ) ;
27- const chembl_25_dl = new glue . Database ( this , 'chembl-25-dl' , {
28- databaseName : 'chembl_25_dl' ,
29- locationUri : `s3://${ props . dataLakeBucket . bucketName } /chembl/dl/chembl25`
30- } ) ;
31-
32- var chemblConnectionInput = {
33- connectionProperties : {
34- USERNAME : props . databaseSecret . secretValueFromJson ( 'username' ) ,
35- JDBC_ENFORCE_SSL : "false" ,
36- PASSWORD : props . databaseSecret . secretValueFromJson ( 'password' ) ,
37- JDBC_CONNECTION_URL : `jdbc:postgresql://${ props . database . dbInstanceEndpointAddress } :5432/chembl_25`
38- }
39- , connectionType : "JDBC"
40- , description : "chembl-25-src connection"
41- , name : "chembl-25-src"
42- , physicalConnectionRequirements : {
43- availabilityZone : props . database . vpc . privateSubnets [ 0 ] . availabilityZone ,
44- subnetId : props . database . vpc . privateSubnets [ 0 ] . subnetId ,
45- securityGroupIdList : [ props . accessSecurityGroup . securityGroupId ] ,
46-
47- }
48- } ;
49-
50- const chemblConnection = new glue . CfnConnection ( this , 'chembl-25-src-connection' , {
51- catalogId : chembl_25_src . catalogId ,
52- connectionInput : chemblConnectionInput
53- } ) ;
54-
55- const chemblGlueRole = new iam . Role ( this , 'chembleGlueROle' , {
56- assumedBy : new iam . ServicePrincipal ( 'glue.amazonaws.com' )
57- } ) ;
58-
59- chemblGlueRole . addManagedPolicy ( iam . ManagedPolicy . fromAwsManagedPolicyName ( 'service-role/AWSGlueServiceRole' ) ) ;
60- chemblGlueRole . addManagedPolicy ( iam . ManagedPolicy . fromAwsManagedPolicyName ( 'CloudWatchAgentServerPolicy' ) ) ;
61- props . dataLakeBucket . grantReadWrite ( chemblGlueRole ) ;
62-
63-
64- const chembl25Crawler = new glue . CfnCrawler ( this , 'chembl25-crawler' , {
65- name : "chembl_25_src_crawler" ,
66- targets : {
67- jdbcTargets : [
68- {
69- path : "chembl_25/%" ,
70- exclusions : [ ] ,
71- connectionName : chemblConnectionInput . name
72- }
73- ] ,
74- catalogTargets : [ ] ,
75- s3Targets : [ ] ,
76- } ,
77- role : chemblGlueRole . roleName ,
78- databaseName : chembl_25_src . databaseName ,
79- schemaChangePolicy : {
80- deleteBehavior : "DEPRECATE_IN_DATABASE" ,
81- updateBehavior : "UPDATE_IN_DATABASE" ,
82- } ,
83- tablePrefix : "" ,
84- classifiers : [ ]
85- } ) ;
86-
87-
88- const chemblCopyTablesSparkScript = new s3assets . Asset ( this , 'chemblCopyTablesSparkScript' , {
89- path : 'scripts/glue.s3importchembl25.py'
90- } ) ;
91- chemblCopyTablesSparkScript . grantRead ( chemblGlueRole ) ;
92-
93-
94- const chembl_etl_job = new glue . CfnJob ( this , 'chembl_etl_job' , {
95- executionProperty : {
96- maxConcurrentRuns : 1
97- } ,
98- name : "chembl_src_to_dl_etl" ,
99- timeout : 2880 ,
100- glueVersion : "1.0" ,
101- maxCapacity : 11.0 ,
102- connections : {
103- connections : [
104- chemblConnectionInput . name
105- ]
106- } ,
107- command : {
108- scriptLocation : `s3://${ chemblCopyTablesSparkScript . s3BucketName } /${ chemblCopyTablesSparkScript . s3ObjectKey } ` ,
109- name : "glueetl" ,
110- pythonVersion : "3"
111- } ,
112- role : chemblGlueRole . roleArn ,
113- maxRetries : 0 ,
114- defaultArguments : {
21+
22+
23+ const dataSetName = "chembl_25" ;
24+
25+ new RDSPostgresDataSetEnrollment ( this , 'chembl-25-enrollment' , {
26+ databaseSecret : props . databaseSecret ,
27+ database : props . database ,
28+ accessSecurityGroup : props . accessSecurityGroup ,
29+ dataLakeBucket : props . dataLakeBucket ,
30+ DataSetName : dataSetName ,
31+ JdbcTargetIncludePaths : [ "chembl_25/%" ] ,
32+ GlueScriptPath : "scripts/glue.s3importchembl25.py" ,
33+ GlueScriptArguments : {
11534 "--job-language" : "python" ,
11635 "--job-bookmark-option" : "job-bookmark-disable" ,
11736 "--enable-metrics" : "" ,
11837 "--DL_BUCKET" : props . dataLakeBucket . bucketName ,
11938 "--DL_PREFIX" : "/chembl/25/" ,
12039 "--DL_REGION" : cdk . Stack . of ( this ) . region ,
121- "--GLUE_SRC_DATABASE" : chembl_25_src . databaseName
122- }
123- } ) ;
124-
125- const chembl_datalake_crawler = new glue . CfnCrawler ( this , 'chembl_datalake_crawler' , {
126- name : "chembl_25_dl_crawler" ,
127- targets : {
128- s3Targets : [
129- {
130- path : `s3://${ props . dataLakeBucket . bucketName } /chembl/25/`
131- }
132- ]
133- } ,
134- role : chemblGlueRole . roleArn ,
135- databaseName : chembl_25_dl . databaseName ,
136- schemaChangePolicy : {
137- deleteBehavior : "DEPRECATE_IN_DATABASE" ,
138- updateBehavior : "UPDATE_IN_DATABASE"
139- } ,
140- tablePrefix : ""
141- } ) ;
142-
143- const datalakeEnrollmentWorkflow = new DataLakeEnrollmentWorkflow ( this , 'chemblDataLakeWorkflow' , {
144- workfowName : "chemblDataLakeEnrollmentWorkflow" ,
145- srcCrawler : chembl25Crawler ,
146- etlJob : chembl_etl_job ,
147- datalakeCrawler : chembl_datalake_crawler
148-
149- } )
40+ "--GLUE_SRC_DATABASE" : "chembl_25_src"
41+ }
42+
43+ } ) ;
15044 }
151- }
45+ }
46+
47+
48+
49+
0 commit comments