Skip to content

Commit da66e41

Browse files
committed
[sdlf-foundations][cdk] data_bucket method to handle raw/stage/analytics buckets creation
1 parent f709ee1 commit da66e41

1 file changed

Lines changed: 59 additions & 112 deletions

File tree

sdlf-foundations/src/foundations.py

Lines changed: 59 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -135,29 +135,29 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
135135
],
136136
)
137137

138-
lakeformationdataaccess_role = iam.Role(
138+
self.lakeformationdataaccess_role = iam.Role(
139139
self,
140140
"rLakeFormationDataAccessRole",
141141
assumed_by=iam.CompositePrincipal(
142142
iam.ServicePrincipal("lakeformation.amazonaws.com"),
143143
iam.ServicePrincipal("glue.amazonaws.com"),
144144
),
145145
)
146-
lakeformationdataaccess_role.attach_inline_policy(lakeformationdataaccess_role_policy)
146+
self.lakeformationdataaccess_role.attach_inline_policy(lakeformationdataaccess_role_policy)
147147

148148
ssm.StringParameter(
149149
self,
150150
"rLakeFormationDataAccessRoleSsm",
151151
description="Lake Formation Data Access Role",
152152
parameter_name="/SDLF/IAM/LakeFormationDataAccessRoleArn",
153-
string_value=lakeformationdataaccess_role.role_arn,
153+
string_value=self.lakeformationdataaccess_role.role_arn,
154154
)
155155
ssm.StringParameter(
156156
self,
157157
"rLakeFormationDataAccessRoleNameSsm",
158158
description="Lake Formation Data Access Role",
159159
parameter_name="/SDLF/IAM/LakeFormationDataAccessRole",
160-
string_value=lakeformationdataaccess_role.role_name,
160+
string_value=self.lakeformationdataaccess_role.role_name,
161161
)
162162

163163
######## KMS #########
@@ -264,36 +264,36 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
264264
sid="Allow LakeFormation access",
265265
effect=iam.Effect.ALLOW,
266266
principals=[
267-
iam.ArnPrincipal(lakeformationdataaccess_role.role_arn),
267+
iam.ArnPrincipal(self.lakeformationdataaccess_role.role_arn),
268268
],
269269
actions=["kms:Encrypt*", "kms:Decrypt*", "kms:ReEncrypt*", "kms:GenerateDataKey*", "kms:Describe*"],
270270
resources=["*"],
271271
),
272272
]
273273
)
274274

275-
kms_key = kms.Key(
275+
self.kms_key = kms.Key(
276276
self,
277277
"rKMSKey",
278278
removal_policy=RemovalPolicy.RETAIN_ON_UPDATE_OR_DELETE,
279279
description="SDLF Foundations KMS Key",
280280
enable_key_rotation=True,
281281
policy=kms_key_policy,
282282
)
283-
kms_key.add_alias("alias/sdlf-kms-key").apply_removal_policy(RemovalPolicy.RETAIN_ON_UPDATE_OR_DELETE)
283+
self.kms_key.add_alias("alias/sdlf-kms-key").apply_removal_policy(RemovalPolicy.RETAIN_ON_UPDATE_OR_DELETE)
284284

285285
ssm.StringParameter(
286286
self,
287287
"rKMSKeySsm",
288288
description="ARN of the KMS key",
289289
parameter_name="/SDLF/KMS/KeyArn",
290-
string_value=kms_key.key_arn,
290+
string_value=self.kms_key.key_arn,
291291
)
292292

293293
######## S3 #########
294294
####### Access Logging Bucket ######
295295
access_logs_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-s3logs"
296-
access_logs_bucket = s3.Bucket(
296+
self.access_logs_bucket = s3.Bucket(
297297
self,
298298
"rS3AccessLogsBucket",
299299
bucket_name=access_logs_bucket_name, # TODO
@@ -318,7 +318,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
318318
),
319319
],
320320
encryption=s3.BucketEncryption.KMS,
321-
encryption_key=kms_key,
321+
encryption_key=self.kms_key,
322322
bucket_key_enabled=True,
323323
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
324324
)
@@ -327,18 +327,18 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
327327
"rS3AccessLogsBucketSsm",
328328
description="S3 Access Logs Bucket",
329329
parameter_name="/SDLF/S3/AccessLogsBucket",
330-
string_value=access_logs_bucket.bucket_name,
330+
string_value=self.access_logs_bucket.bucket_name,
331331
)
332332

333333
artifacts_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-artifacts"
334334
artifacts_bucket = s3.Bucket(
335335
self,
336336
"rArtifactsBucket",
337337
bucket_name=artifacts_bucket_name, # TODO
338-
server_access_logs_bucket=access_logs_bucket, # automatically add policy statement to access logs bucket policy
338+
server_access_logs_bucket=self.access_logs_bucket, # automatically add policy statement to access logs bucket policy
339339
server_access_logs_prefix=artifacts_bucket_name,
340340
encryption=s3.BucketEncryption.KMS,
341-
encryption_key=kms_key,
341+
encryption_key=self.kms_key,
342342
bucket_key_enabled=True,
343343
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
344344
enforce_ssl=True,
@@ -351,105 +351,19 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
351351
string_value=artifacts_bucket.bucket_name,
352352
)
353353

354-
raw_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-raw"
355-
raw_bucket = s3.Bucket(
356-
self,
357-
"rRawBucket",
358-
bucket_name=raw_bucket_name, # TODO
359-
server_access_logs_bucket=access_logs_bucket,
360-
server_access_logs_prefix=raw_bucket_name,
361-
encryption=s3.BucketEncryption.KMS,
362-
encryption_key=kms_key,
363-
bucket_key_enabled=True,
364-
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
365-
enforce_ssl=True,
366-
versioned=True,
367-
event_bridge_enabled=True,
368-
)
369-
lakeformation.CfnResource(
370-
self,
371-
"rRawBucketLakeFormationS3Registration",
372-
resource_arn=f"{raw_bucket.bucket_arn}/", # the trailing slash is important to Lake Formation somehow
373-
use_service_linked_role=False,
374-
role_arn=lakeformationdataaccess_role.role_arn,
375-
)
376-
ssm.StringParameter(
377-
self,
378-
"rS3RawBucketSsm",
379-
description="Name of the Raw S3 bucket",
380-
parameter_name="/SDLF/S3/RawBucket",
381-
string_value=raw_bucket.bucket_name,
382-
)
383-
384-
stage_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-stage"
385-
stage_bucket = s3.Bucket(
386-
self,
387-
"rStageBucket",
388-
bucket_name=stage_bucket_name, # TODO
389-
server_access_logs_bucket=access_logs_bucket,
390-
server_access_logs_prefix=stage_bucket_name,
391-
encryption=s3.BucketEncryption.KMS,
392-
encryption_key=kms_key,
393-
bucket_key_enabled=True,
394-
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
395-
enforce_ssl=True,
396-
versioned=True,
397-
event_bridge_enabled=True,
398-
)
399-
lakeformation.CfnResource(
400-
self,
401-
"rStageBucketLakeFormationS3Registration",
402-
resource_arn=f"{stage_bucket.bucket_arn}/", # the trailing slash is important to Lake Formation somehow
403-
use_service_linked_role=False,
404-
role_arn=lakeformationdataaccess_role.role_arn,
405-
)
406-
ssm.StringParameter(
407-
self,
408-
"rS3StageBucketSsm",
409-
description="Name of the Stage S3 bucket",
410-
parameter_name="/SDLF/S3/StageBucket",
411-
string_value=stage_bucket.bucket_name,
412-
)
413-
414-
analytics_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-analytics"
415-
analytics_bucket = s3.Bucket(
416-
self,
417-
"rAnalyticsBucket",
418-
bucket_name=analytics_bucket_name, # TODO
419-
server_access_logs_bucket=access_logs_bucket,
420-
server_access_logs_prefix=analytics_bucket_name,
421-
encryption=s3.BucketEncryption.KMS,
422-
encryption_key=kms_key,
423-
bucket_key_enabled=True,
424-
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
425-
enforce_ssl=True,
426-
versioned=True,
427-
event_bridge_enabled=True,
428-
)
429-
lakeformation.CfnResource(
430-
self,
431-
"rAnalyticsBucketLakeFormationS3Registration",
432-
resource_arn=f"{analytics_bucket.bucket_arn}/", # the trailing slash is important to Lake Formation somehow
433-
use_service_linked_role=False,
434-
role_arn=lakeformationdataaccess_role.role_arn,
435-
)
436-
ssm.StringParameter(
437-
self,
438-
"rS3AnalyticsBucketSsm",
439-
description="Name of the Analytics S3 bucket",
440-
parameter_name="/SDLF/S3/AnalyticsBucket",
441-
string_value=analytics_bucket.bucket_name,
442-
)
354+
raw_bucket = self.data_bucket(p_org.value_as_string, p_domain.value_as_string, p_environment.value_as_string, scope.region, scope.account, "raw")
355+
stage_bucket = self.data_bucket(p_org.value_as_string, p_domain.value_as_string, p_environment.value_as_string, scope.region, scope.account, "stage")
356+
analytics_bucket = self.data_bucket(p_org.value_as_string, p_domain.value_as_string, p_environment.value_as_string, scope.region, scope.account, "analytics")
443357

444358
athena_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-athena"
445359
athena_bucket = s3.Bucket(
446360
self,
447361
"rAthenaBucket",
448362
bucket_name=athena_bucket_name, # TODO
449-
server_access_logs_bucket=access_logs_bucket,
363+
server_access_logs_bucket=self.access_logs_bucket,
450364
server_access_logs_prefix=athena_bucket_name,
451365
encryption=s3.BucketEncryption.KMS,
452-
encryption_key=kms_key,
366+
encryption_key=self.kms_key,
453367
bucket_key_enabled=True,
454368
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
455369
enforce_ssl=True,
@@ -507,7 +421,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
507421
),
508422
],
509423
)
510-
lakeformationdataaccess_role.attach_inline_policy(s3_lakeformationdataaccess_role_policy)
424+
self.lakeformationdataaccess_role.attach_inline_policy(s3_lakeformationdataaccess_role_policy)
511425

512426
######## Lambda & SQS #########
513427
catalog_dlq = sqs.Queue(
@@ -517,7 +431,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
517431
queue_name="sdlf-catalog-dlq",
518432
retention_period=Duration.days(14),
519433
visibility_timeout=Duration.seconds(60),
520-
encryption_master_key=kms_key,
434+
encryption_master_key=self.kms_key,
521435
)
522436

523437
catalog_queue = sqs.Queue(
@@ -527,7 +441,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
527441
queue_name="sdlf-catalog-queue",
528442
retention_period=Duration.days(7),
529443
visibility_timeout=Duration.seconds(60),
530-
encryption_master_key=kms_key,
444+
encryption_master_key=self.kms_key,
531445
dead_letter_queue=sqs.DeadLetterQueue(
532446
max_receive_count=1,
533447
queue=catalog_dlq,
@@ -633,7 +547,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
633547
"kms:GenerateDataKey*",
634548
"kms:ReEncrypt*",
635549
],
636-
resources=[kms_key.key_arn],
550+
resources=[self.kms_key.key_arn],
637551
),
638552
iam.PolicyStatement(
639553
actions=[
@@ -671,7 +585,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
671585
timeout=Duration.seconds(60),
672586
role=lambdaexecution_role,
673587
environment={"ENV": p_environment.value_as_string},
674-
environment_encryption=kms_key,
588+
environment_encryption=self.kms_key,
675589
# vpcconfig TODO
676590
)
677591

@@ -687,7 +601,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
687601
timeout=Duration.seconds(60),
688602
role=lambdaexecution_role,
689603
environment={"QUEUE": catalog_queue.queue_name, "DLQ": catalog_dlq.queue_name},
690-
environment_encryption=kms_key,
604+
environment_encryption=self.kms_key,
691605
# vpcconfig TODO
692606
)
693607
logs.LogGroup(
@@ -697,7 +611,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
697611
log_group_name=f"/aws/lambda/{catalog_redrive_function.function_name}",
698612
retention=logs.RetentionDays.ONE_MONTH,
699613
# retention=Duration.days(p_cloudwatchlogsretentionindays.value_as_number),
700-
encryption_key=kms_key,
614+
encryption_key=self.kms_key,
701615
)
702616

703617
catalog_function.add_event_source(eventsources.SqsEventSource(catalog_queue, batch_size=10))
@@ -715,7 +629,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
715629
table_name=f"octagon-ObjectMetadata-{p_environment.value_as_string}",
716630
stream=ddb.StreamViewType.NEW_AND_OLD_IMAGES,
717631
encryption=ddb.TableEncryption.CUSTOMER_MANAGED,
718-
encryption_key=kms_key,
632+
encryption_key=self.kms_key,
719633
point_in_time_recovery=True,
720634
)
721635
ssm.StringParameter(
@@ -740,3 +654,36 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
740654
description="Name of the domain's Artifacts S3 bucket",
741655
value=artifacts_bucket.bucket_name,
742656
)
657+
658+
def data_bucket(self, org, domain, environment, region, account, bucket_layer):
659+
data_bucket_name = f"{org}-{domain}-{environment}-{region}-{account}-{bucket_layer}"
660+
data_bucket = s3.Bucket(
661+
self,
662+
f"r{bucket_layer.capitalize()}Bucket",
663+
bucket_name=data_bucket_name, # TODO
664+
server_access_logs_bucket=self.access_logs_bucket,
665+
server_access_logs_prefix=data_bucket_name,
666+
encryption=s3.BucketEncryption.KMS,
667+
encryption_key=self.kms_key,
668+
bucket_key_enabled=True,
669+
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
670+
enforce_ssl=True,
671+
versioned=True,
672+
event_bridge_enabled=True,
673+
)
674+
lakeformation.CfnResource(
675+
self,
676+
f"r{bucket_layer.capitalize()}BucketLakeFormationS3Registration",
677+
resource_arn=f"{data_bucket.bucket_arn}/", # the trailing slash is important to Lake Formation somehow
678+
use_service_linked_role=False,
679+
role_arn=self.lakeformationdataaccess_role.role_arn,
680+
)
681+
ssm.StringParameter(
682+
self,
683+
f"rS3{bucket_layer.capitalize()}BucketSsm",
684+
description=f"Name of the {bucket_layer.capitalize()} S3 bucket",
685+
parameter_name=f"/SDLF/S3/{bucket_layer.capitalize()}Bucket",
686+
string_value=data_bucket.bucket_name,
687+
)
688+
689+
return data_bucket

0 commit comments

Comments
 (0)