Skip to content

Commit 5ef1783

Browse files
committed
[sdlf-stage-glue] remove pDataset parameter
it was wrongly used in place of pBucketPrefix and unnecessarily used in resource names
1 parent 0de54db commit 5ef1783

File tree

5 files changed

+36
-47
lines changed

5 files changed

+36
-47
lines changed

sdlf-stage-glue/src/glue.yaml

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ Parameters:
3232
Description: KMS key set as infra key for the dataset
3333
Type: String
3434
Default: "" # if not provided, pDatasetDeploymentInstance must be specified
35+
pBucketPrefix:
36+
Description: Bucket prefix for the dataset
37+
Type: String
38+
Default: "" # if not provided, pDatasetDeploymentInstance must be specified
3539
pEventBus:
3640
Description: EventBridge bus for the dataset
3741
Type: String
@@ -56,10 +60,6 @@ Parameters:
5660
# Description: Analytics bucket
5761
# Type: String
5862
# Default: "" # if not provided, pStorageDeploymentInstance must be specified
59-
pDataset:
60-
Description: The name of the dataset (all lowercase, no symbols or spaces)
61-
Type: String
62-
AllowedPattern: "[a-z0-9]{2,14}"
6363
pStageEnabled:
6464
Description: Whether the stage is enabled or not
6565
Type: String
@@ -158,7 +158,7 @@ Globals:
158158
- "{{resolve:ssm:/SDLF/Lambda/LatestDatalakeLibraryLayer}}"
159159
Environment:
160160
Variables:
161-
DATASET: !Ref pDataset
161+
S3_PREFIX: !If [FetchFromDatasetSsm, !Sub "{{resolve:ssm:/sdlf/dataset/rS3Prefix/${pDatasetDeploymentInstance}}}", !Ref pBucketPrefix]
162162
DEPLOYMENT_INSTANCE: !Ref pDeploymentInstance
163163
STORAGE_DEPLOYMENT_INSTANCE: !Ref pStorageDeploymentInstance
164164
DATASET_DEPLOYMENT_INSTANCE: !Ref pDatasetDeploymentInstance
@@ -182,7 +182,6 @@ Resources:
182182
pInfraKmsKey: !Ref pInfraKmsKey
183183
pEventBus: !Ref pEventBus
184184
pScheduleGroup: !Ref pScheduleGroup
185-
pDataset: !Ref pDataset
186185
pStageEnabled: !Ref pStageEnabled
187186
pTriggerType: !Ref pTriggerType
188187
pSchedule: !Ref pSchedule
@@ -193,7 +192,7 @@ Resources:
193192
rLambdaCommonPolicy:
194193
Type: AWS::IAM::ManagedPolicy
195194
Properties:
196-
Path: !Sub /sdlf-${pDataset}/
195+
Path: !Sub /sdlf-${pDeploymentInstance}/
197196
PolicyDocument:
198197
Version: "2012-10-17"
199198
Statement:
@@ -203,7 +202,7 @@ Resources:
203202
- logs:CreateLogStream
204203
- logs:PutLogEvents
205204
Resource:
206-
- !Sub arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/sdlf-${pDataset}-${pDeploymentInstance}-*
205+
- !Sub arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/sdlf-${pDeploymentInstance}-*
207206
- Effect: Allow
208207
Action:
209208
- ssm:GetParameter
@@ -238,7 +237,7 @@ Resources:
238237
rRoleLambdaExecutionRoutingStep:
239238
Type: AWS::IAM::Role
240239
Properties:
241-
Path: !Sub /sdlf-${pDataset}/
240+
Path: !Sub /sdlf-${pDeploymentInstance}/
242241
# PermissionsBoundary: !Sub "{{resolve:ssm:/SDLF/IAM/${pDataset}/TeamPermissionsBoundary}}"
243242
ManagedPolicyArns:
244243
- !Ref rLambdaCommonPolicy
@@ -254,7 +253,7 @@ Resources:
254253
Service: lambda.amazonaws.com
255254
Action: sts:AssumeRole
256255
Policies:
257-
- PolicyName: !Sub sdlf-${pDataset}-${pDeploymentInstance}-routing
256+
- PolicyName: !Sub sdlf-${pDeploymentInstance}-routing
258257
PolicyDocument:
259258
Version: "2012-10-17"
260259
Statement:
@@ -274,14 +273,14 @@ Resources:
274273
- sqs:ReceiveMessage
275274
- sqs:SendMessage
276275
Resource:
277-
- !Sub arn:${AWS::Partition}:sqs:${AWS::Region}:${AWS::AccountId}:sdlf-${pDataset}-${pDeploymentInstance}-queue-*
278-
- !Sub arn:${AWS::Partition}:sqs:${AWS::Region}:${AWS::AccountId}:sdlf-${pDataset}-${pDeploymentInstance}-dlq-*
276+
- !Sub arn:${AWS::Partition}:sqs:${AWS::Region}:${AWS::AccountId}:sdlf-${pDeploymentInstance}-queue.fifo
277+
- !Sub arn:${AWS::Partition}:sqs:${AWS::Region}:${AWS::AccountId}:sdlf-${pDeploymentInstance}-dlq.fifo
279278

280279
# Metadata Step Role (fetch metadata, update pipeline execution history...)
281280
rRoleLambdaExecutionMetadataStep:
282281
Type: AWS::IAM::Role
283282
Properties:
284-
Path: !Sub /sdlf-${pDataset}/
283+
Path: !Sub /sdlf-${pDeploymentInstance}/
285284
# PermissionsBoundary: !Sub "{{resolve:ssm:/SDLF/IAM/${pDataset}/TeamPermissionsBoundary}}"
286285
ManagedPolicyArns:
287286
- !Ref rLambdaCommonPolicy
@@ -301,7 +300,7 @@ Resources:
301300
rRoleLambdaExecutionErrorStep:
302301
Type: AWS::IAM::Role
303302
Properties:
304-
Path: !Sub /sdlf-${pDataset}/
303+
Path: !Sub /sdlf-${pDeploymentInstance}/
305304
# PermissionsBoundary: !Sub "{{resolve:ssm:/SDLF/IAM/${pDataset}/TeamPermissionsBoundary}}"
306305
ManagedPolicyArns:
307306
- !Ref rLambdaCommonPolicy
@@ -317,7 +316,7 @@ Resources:
317316
Service: lambda.amazonaws.com
318317
Action: sts:AssumeRole
319318
Policies:
320-
- PolicyName: !Sub sdlf-${pDataset}-${pDeploymentInstance}-error
319+
- PolicyName: !Sub sdlf-${pDeploymentInstance}-error
321320
PolicyDocument:
322321
Version: "2012-10-17"
323322
Statement:
@@ -332,7 +331,7 @@ Resources:
332331
- sqs:ReceiveMessage
333332
- sqs:SendMessage
334333
Resource:
335-
- !Sub arn:${AWS::Partition}:sqs:${AWS::Region}:${AWS::AccountId}:sdlf-${pDataset}-${pDeploymentInstance}-dlq-*
334+
- !Sub arn:${AWS::Partition}:sqs:${AWS::Region}:${AWS::AccountId}:sdlf-${pDeploymentInstance}-dlq.fifo
336335

337336
######## LAMBDA FUNCTIONS #########
338337
rLambdaRoutingStep:
@@ -344,7 +343,7 @@ Resources:
344343
reason: Permissions to write CloudWatch Logs are granted by rLambdaCommonPolicy
345344
Properties:
346345
CodeUri: ./lambda/routing/src
347-
FunctionName: !Sub sdlf-${pDataset}-${pDeploymentInstance}-routing
346+
FunctionName: !Sub sdlf-${pDeploymentInstance}-routing
348347
Description: Checks if items are to be processed and route them to state machine
349348
Environment:
350349
Variables:
@@ -362,7 +361,7 @@ Resources:
362361
reason: Permissions to write CloudWatch Logs are granted by rLambdaCommonPolicy
363362
Properties:
364363
CodeUri: ./lambda/redrive/src
365-
FunctionName: !Sub sdlf-${pDataset}-${pDeploymentInstance}-redrive
364+
FunctionName: !Sub sdlf-${pDeploymentInstance}-redrive
366365
Description: Redrives Failed messages to the routing queue
367366
MemorySize: 192
368367
Timeout: 300
@@ -377,7 +376,7 @@ Resources:
377376
reason: Permissions to write CloudWatch Logs are granted by rLambdaCommonPolicy
378377
Properties:
379378
CodeUri: ./lambda/postupdate-metadata/src
380-
FunctionName: !Sub sdlf-${pDataset}-${pDeploymentInstance}-postupdate
379+
FunctionName: !Sub sdlf-${pDeploymentInstance}-postupdate
381380
Description: Post-Update the metadata in the DynamoDB Catalog table
382381
MemorySize: 192
383382
Timeout: 300
@@ -392,7 +391,7 @@ Resources:
392391
reason: Permissions to write CloudWatch Logs are granted by rLambdaCommonPolicy
393392
Properties:
394393
CodeUri: ./lambda/error/src
395-
FunctionName: !Sub sdlf-${pDataset}-${pDeploymentInstance}-error
394+
FunctionName: !Sub sdlf-${pDeploymentInstance}-error
396395
Description: Fallback lambda to handle messages which failed processing
397396
MemorySize: 192
398397
Timeout: 300
@@ -405,7 +404,7 @@ Resources:
405404
Name: !Sub /sdlf/pipeline/rLambdaRoutingStep/${pDeploymentInstance}
406405
Type: String
407406
Value: !GetAtt rLambdaRoutingStep.Arn
408-
Description: !Sub "ARN of the ${pDataset} ${pDeploymentInstance} Routing Lambda" # TODO
407+
Description: !Sub "ARN of the ${pDeploymentInstance} Routing Lambda" # TODO
409408

410409
rLambdaRoutingStepLogGroup:
411410
Type: AWS::Logs::LogGroup
@@ -452,7 +451,7 @@ Resources:
452451
- id: W11
453452
reason: The actions with "*" are all ones that do not have resource limitations associated with them
454453
Properties:
455-
Path: !Sub /sdlf-${pDataset}/
454+
Path: !Sub /sdlf-${pDeploymentInstance}/
456455
# PermissionsBoundary: !Sub "{{resolve:ssm:/SDLF/IAM/${pDataset}/TeamPermissionsBoundary}}"
457456
AssumeRolePolicyDocument:
458457
Version: "2012-10-17"
@@ -466,26 +465,26 @@ Resources:
466465
StringEquals:
467466
"aws:SourceAccount": !Sub ${AWS::AccountId}
468467
Policies:
469-
- PolicyName: !Sub sdlf-${pDataset}-${pDeploymentInstance}-sm
468+
- PolicyName: !Sub sdlf-${pDeploymentInstance}-sm
470469
PolicyDocument:
471470
Version: "2012-10-17"
472471
Statement:
473472
- Effect: Allow
474473
Action:
475474
- lambda:InvokeFunction
476-
Resource: !Sub arn:${AWS::Partition}:lambda:${AWS::Region}:${AWS::AccountId}:function:sdlf-${pDataset}-${pDeploymentInstance}-*
475+
Resource: !Sub arn:${AWS::Partition}:lambda:${AWS::Region}:${AWS::AccountId}:function:sdlf-${pDeploymentInstance}-*
477476
- Effect: Allow
478477
Action:
479478
- glue:StartJobRun
480479
- glue:GetJobRun
481480
- glue:GetJobRuns
482481
- glue:BatchStopJobRun
483-
Resource: !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:job/sdlf-${pDataset}-*
482+
Resource: !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:job/sdlf-${pDeploymentInstance}-*
484483
- Effect: Allow
485484
Action:
486485
- glue:StartCrawler
487486
- glue:GetCrawler
488-
Resource: !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:crawler/sdlf-${pDataset}-*
487+
Resource: !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:crawler/sdlf-${pDatasetDeploymentInstance}-*
489488
- Effect: Allow
490489
Action:
491490
- xray:PutTraceSegments # W11 exception
@@ -514,7 +513,7 @@ Resources:
514513
rStateMachine:
515514
Type: AWS::Serverless::StateMachine
516515
Properties:
517-
Name: !Sub sdlf-${pDataset}-${pDeploymentInstance}-sm
516+
Name: !Sub sdlf-${pDeploymentInstance}-sm
518517
DefinitionUri: ./state-machine/stage-glue.asl.json
519518
DefinitionSubstitutions:
520519
lPostMetadata: !GetAtt rLambdaPostMetadataStep.Arn
@@ -537,7 +536,7 @@ Resources:
537536
Name: !Sub /sdlf/pipeline/rStateMachine/${pDeploymentInstance}
538537
Type: String
539538
Value: !Ref rStateMachine
540-
Description: !Sub "ARN of the ${pDataset} ${pDeploymentInstance} State Machine" # TODO
539+
Description: !Sub "ARN of the ${pDeploymentInstance} State Machine" # TODO
541540

542541
Outputs:
543542
oPipelineReference:

sdlf-stage-glue/src/lambda/error/src/lambda_function.py

100755100644
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
from datalake_library.sdlf import SQSConfiguration
77

88
logger = init_logger(__name__)
9-
dataset = os.environ["DATASET"]
10-
pipeline = os.environ["PIPELINE"]
11-
pipeline_stage = os.environ["PIPELINE_STAGE"]
129
deployment_instance = os.environ["DEPLOYMENT_INSTANCE"]
1310

1411

sdlf-stage-glue/src/lambda/postupdate-metadata/src/lambda_function.py

100755100644
Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
from datalake_library.sdlf import PipelineExecutionHistoryAPI
55

66
logger = init_logger(__name__)
7-
dataset = os.environ["DATASET"]
8-
pipeline = os.environ["PIPELINE"]
9-
pipeline_stage = os.environ["PIPELINE_STAGE"]
107
deployment_instance = os.environ["DEPLOYMENT_INSTANCE"]
118
peh_table_instance = os.environ["DATASET_DEPLOYMENT_INSTANCE"]
129
manifests_table_instance = os.environ["DATASET_DEPLOYMENT_INSTANCE"]
@@ -42,7 +39,7 @@ def lambda_handler(event, context):
4239

4340
if not partial_failure:
4441
pipeline_execution.update_pipeline_execution(
45-
status=f"{pipeline}-{pipeline_stage} {component} Processing", component=component
42+
status=f"{deployment_instance} {component} Processing", component=component
4643
)
4744
pipeline_execution.end_pipeline_execution_success()
4845
else:
@@ -51,6 +48,6 @@ def lambda_handler(event, context):
5148
except Exception as e:
5249
logger.error("Fatal error", exc_info=True)
5350
pipeline_execution.end_pipeline_execution_failed(
54-
component=component, issue_comment=f"{pipeline}-{pipeline_stage} {component} Error: {repr(e)}"
51+
component=component, issue_comment=f"{deployment_instance} {component} Error: {repr(e)}"
5552
)
5653
raise e

sdlf-stage-glue/src/lambda/redrive/src/lambda_function.py

100755100644
File mode changed.

sdlf-stage-glue/src/lambda/routing/src/lambda_function.py

100755100644
Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,8 @@
1212
)
1313

1414
logger = init_logger(__name__)
15-
dataset = os.environ["DATASET"]
16-
pipeline = os.environ["PIPELINE"]
17-
pipeline_stage = os.environ["PIPELINE_STAGE"]
1815
deployment_instance = os.environ["DEPLOYMENT_INSTANCE"]
19-
peh_table_instance = os.environ["DATASET_DEPLOYMENT_INSTANCE"]
20-
manifests_table_instance = os.environ["DATASET_DEPLOYMENT_INSTANCE"]
16+
dataset_deployment_instance = peh_table_instance = manifests_table_instance = os.environ["DATASET_DEPLOYMENT_INSTANCE"]
2117

2218

2319
def serializer(obj):
@@ -31,8 +27,8 @@ def serializer(obj):
3127

3228
def pipeline_start(pipeline_execution, event):
3329
peh_id = pipeline_execution.start_pipeline_execution(
34-
pipeline_name=f"{pipeline}-{pipeline_stage}",
35-
dataset_name=f"{dataset}",
30+
pipeline_name=deployment_instance,
31+
dataset_name=dataset_deployment_instance,
3632
comment=event, # TODO test maximum size
3733
)
3834
logger.info(f"peh_id: {peh_id}")
@@ -53,14 +49,14 @@ def get_source_records(event):
5349
logger.info("Stage trigger: event-schedule")
5450
min_items_to_process = 1
5551
max_items_to_process = 100
56-
logger.info(f"Pipeline is {pipeline}, stage is {pipeline_stage}")
52+
logger.info(f"Pipeline is {deployment_instance}")
5753
logger.info(
5854
f"Pipeline stage configuration: min_items_to_process {min_items_to_process}, max_items_to_process {max_items_to_process}"
5955
)
6056

6157
sqs_config = SQSConfiguration(instance=deployment_instance)
6258
queue_interface = SQSInterface(sqs_config.stage_queue)
63-
logger.info(f"Querying {dataset} {pipeline}-{pipeline_stage} objects waiting for processing")
59+
logger.info(f"Querying {deployment_instance} objects waiting for processing")
6460
messages = queue_interface.receive_min_max_messages(min_items_to_process, max_items_to_process)
6561
logger.info(f"{len(messages)} Objects ready for processing")
6662

@@ -115,7 +111,7 @@ def lambda_handler(event, context):
115111
state_config.stage_state_machine, json.dumps(records, default=serializer)
116112
)
117113
pipeline_execution.update_pipeline_execution(
118-
status=f"{pipeline}-{pipeline_stage} Transform Processing", component="Transform"
114+
status=f"{deployment_instance} Transform Processing", component="Transform"
119115
)
120116
else:
121117
logger.info("Nothing to process, exiting pipeline")
@@ -126,6 +122,6 @@ def lambda_handler(event, context):
126122
component = context.function_name.split("-")[-2].title()
127123
pipeline_execution.end_pipeline_execution_failed(
128124
component=component,
129-
issue_comment=f"{pipeline}-{pipeline_stage} {component} Error: {repr(e)}",
125+
issue_comment=f"{deployment_instance} {component} Error: {repr(e)}",
130126
)
131127
raise e

0 commit comments

Comments
 (0)