|
| 1 | +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +package com.myorg; |
| 5 | + |
| 6 | +import software.amazon.awscdk.*; |
| 7 | +import software.amazon.awscdk.services.iam.*; |
| 8 | +import software.amazon.awscdk.services.s3.*; |
| 9 | +import software.amazon.awscdk.services.glue.*; |
| 10 | +import software.constructs.Construct; |
| 11 | + |
| 12 | +import java.util.List; |
| 13 | +import java.util.Map; |
| 14 | +import java.util.UUID; |
| 15 | + |
| 16 | +public class EntityResolutionCdkStack extends Stack { |
| 17 | + public EntityResolutionCdkStack(final Construct scope, final String id) { |
| 18 | + this(scope, id, null); |
| 19 | + } |
| 20 | + |
| 21 | + public EntityResolutionCdkStack(final Construct scope, final String id, final StackProps props) { |
| 22 | + super(scope, id, props); |
| 23 | + |
| 24 | + // 1. Create an S3 bucket for the Glue Data Table |
| 25 | + String uniqueId = UUID.randomUUID().toString().replace("-", ""); // Remove dashes to ensure compatibility |
| 26 | + Bucket glueDataBucket = Bucket.Builder.create(this, "GlueDataBucket") |
| 27 | + .bucketName("glue-" + uniqueId) |
| 28 | + .versioned(true) |
| 29 | + .build(); |
| 30 | + |
| 31 | + // 2. Create a Glue database |
| 32 | + CfnDatabase glueDatabase = CfnDatabase.Builder.create(this, "GlueDatabase") |
| 33 | + .catalogId(this.getAccount()) |
| 34 | + .databaseInput(CfnDatabase.DatabaseInputProperty.builder() |
| 35 | + .name("entity_resolution_db") |
| 36 | + .build()) |
| 37 | + .build(); |
| 38 | + |
| 39 | + // 3. Create a Glue table referencing the S3 bucket |
| 40 | + CfnTable glueTable = CfnTable.Builder.create(this, "GlueTable") |
| 41 | + .catalogId(this.getAccount()) |
| 42 | + .databaseName(glueDatabase.getRef()) // Ensure Glue Table references the database correctly |
| 43 | + .tableInput(CfnTable.TableInputProperty.builder() |
| 44 | + .name("entity_resolution") // Fixed table name reference |
| 45 | + .tableType("EXTERNAL_TABLE") |
| 46 | + .storageDescriptor(CfnTable.StorageDescriptorProperty.builder() |
| 47 | + .columns(List.of( |
| 48 | + CfnTable.ColumnProperty.builder().name("id").type("string").build(), // Fixed: id is a string, |
| 49 | + CfnTable.ColumnProperty.builder().name("name").type("string").build(), |
| 50 | + CfnTable.ColumnProperty.builder().name("email").type("string").build() |
| 51 | + )) |
| 52 | + .location("s3://" + glueDataBucket.getBucketName() + "/data/") // Append subpath for data |
| 53 | + .inputFormat("org.apache.hadoop.mapred.TextInputFormat") |
| 54 | + .outputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat") |
| 55 | + .serdeInfo(CfnTable.SerdeInfoProperty.builder() |
| 56 | + .serializationLibrary("org.openx.data.jsonserde.JsonSerDe") // Set JSON SerDe |
| 57 | + .parameters(Map.of("serialization.format", "1")) // Optional: Set the format for JSON |
| 58 | + .build()) |
| 59 | + .build()) |
| 60 | + .build()) |
| 61 | + .build(); |
| 62 | + |
| 63 | + // Ensure Glue Table is created after the Database |
| 64 | + glueTable.addDependency(glueDatabase); |
| 65 | + |
| 66 | + // 4. Create an IAM Role for AWS Entity Resolution |
| 67 | + Role entityResolutionRole = Role.Builder.create(this, "EntityResolutionRole") |
| 68 | + .assumedBy(new ServicePrincipal("entityresolution.amazonaws.com")) // AWS Entity Resolution assumes this role |
| 69 | + .managedPolicies(List.of( |
| 70 | + ManagedPolicy.fromAwsManagedPolicyName("AmazonS3FullAccess"), |
| 71 | + ManagedPolicy.fromAwsManagedPolicyName("AWSEntityResolutionConsoleFullAccess"), |
| 72 | + ManagedPolicy.fromAwsManagedPolicyName("AWSGlueConsoleFullAccess"), |
| 73 | + ManagedPolicy.fromAwsManagedPolicyName("service-role/AWSGlueServiceRole") |
| 74 | + )) |
| 75 | + .build(); |
| 76 | + |
| 77 | + // Add custom permissions for Entity Resolution |
| 78 | + entityResolutionRole.addToPolicy(PolicyStatement.Builder.create() |
| 79 | + .actions(List.of( |
| 80 | + "entityresolution:StartMatchingWorkflow", |
| 81 | + "entityresolution:GetMatchingWorkflow" |
| 82 | + )) |
| 83 | + .resources(List.of("*")) // Adjust permissions if needed |
| 84 | + .build()); |
| 85 | + |
| 86 | + // 5. Create an S3 bucket for output data |
| 87 | + Bucket outputBucket = Bucket.Builder.create(this, "OutputBucket") |
| 88 | + .bucketName("entity-resolution-output-" + id.toLowerCase()) |
| 89 | + .versioned(true) |
| 90 | + .build(); |
| 91 | + |
| 92 | + // 6. Output the Role ARN |
| 93 | + new CfnOutput(this, "EntityResolutionArn", CfnOutputProps.builder() |
| 94 | + .value(entityResolutionRole.getRoleArn()) |
| 95 | + .description("The ARN of the Glue Role") |
| 96 | + .build()); |
| 97 | + |
| 98 | + // 7. Construct and output the Glue Table ARN |
| 99 | + String glueTableArn = String.format("arn:aws:glue:%s:%s:table/%s/%s", |
| 100 | + this.getRegion(), // Region where the stack is deployed |
| 101 | + this.getAccount(), // AWS account ID |
| 102 | + glueDatabase.getRef(), // Glue database name (resolved reference) |
| 103 | + "entity_resolution" // Corrected table name |
| 104 | + ); |
| 105 | + |
| 106 | + new CfnOutput(this, "GlueTableArn", CfnOutputProps.builder() |
| 107 | + .value(glueTableArn) |
| 108 | + .description("The ARN of the Glue Table") |
| 109 | + .build()); |
| 110 | + |
| 111 | + // 8. Output the name of the Glue Data Bucket |
| 112 | + new CfnOutput(this, "GlueDataBucketName", CfnOutputProps.builder() |
| 113 | + .value(glueDataBucket.getBucketName()) // Outputs the bucket name |
| 114 | + .description("The name of the Glue Data Bucket") |
| 115 | + .build()); |
| 116 | + } |
| 117 | +} |
0 commit comments