Skip to content

Commit 12bf0ce

Browse files
committed
add stack that grant LakeFormation Perssions on Glue Job
1 parent d252216 commit 12bf0ce

File tree

5 files changed

+68
-8
lines changed

5 files changed

+68
-8
lines changed

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ command.
114114
</pre>
115115
3. Define a schema for the streaming data
116116
<pre>
117-
(.venv) $ cdk deploy GlueStreamingSinkToIcebergJobRole GlueSchemaOnKinesisStream
117+
(.venv) $ cdk deploy GlueSchemaOnKinesisStream
118118
</pre>
119119

120120
Running `cdk deploy GlueSchemaOnKinesisStream` command is like that we create a schema manually using the AWS Glue Data Catalog as the following steps:
@@ -176,15 +176,17 @@ command.
176176
* (step 2) Provision the Glue Streaming Job
177177

178178
<pre>
179-
(.venv) $ cdk deploy GlueStreamingSinkToIceberg
179+
(.venv) $ cdk deploy GlueStreamingSinkToIcebergJobRole \
180+
GrantLFPermissionsOnGlueJobRole \
181+
GlueStreamingSinkToIceberg
180182
</pre>
181183
6. Make sure the glue job to access the Kinesis Data Streams table in the Glue Catalog database, otherwise grant the glue job to permissions
182184

183185
Wec can get permissions by running the following command:
184186
<pre>
185187
(.venv) $ aws lakeformation list-permissions | jq -r '.PrincipalResourcePermissions[] | select(.Principal.DataLakePrincipalIdentifier | endswith(":role/GlueStreamingJobRole-Iceberg"))'
186188
</pre>
187-
Also, we can grant the glue job to required permissions by running the following command:
189+
If not found, we need manually to grant the glue job to required permissions by running the following command:
188190
<pre>
189191
(.venv) $ aws lakeformation grant-permissions \
190192
--principal DataLakePrincipalIdentifier=arn:aws:iam::<i>{account-id}</i>:role/<i>GlueStreamingJobRole-Iceberg</i> \

app.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
KdsStack,
88
GlueJobRoleStack,
99
GlueStreamDataSchemaStack,
10-
GlueStreamingJobStack
10+
GlueStreamingJobStack,
11+
DataLakePermissionsStack
1112
)
1213

1314
APP_ENV = cdk.Environment(account=os.getenv('CDK_DEFAULT_ACCOUNT'),
@@ -21,14 +22,19 @@
2122
glue_job_role.add_dependency(kds_stack)
2223

2324
glue_stream_schema = GlueStreamDataSchemaStack(app, 'GlueSchemaOnKinesisStream',
24-
kds_stack.kinesis_stream,
25+
kds_stack.kinesis_stream
26+
)
27+
glue_stream_schema.add_dependency(kds_stack)
28+
29+
grant_lake_formation_permissions = DataLakePermissionsStack(app, 'GrantLFPermissionsOnGlueJobRole',
2530
glue_job_role.glue_job_role
2631
)
27-
glue_stream_schema.add_dependency(glue_job_role)
32+
grant_lake_formation_permissions.add_dependency(glue_job_role)
33+
grant_lake_formation_permissions.add_dependency(glue_stream_schema)
2834

2935
glue_streaming_job = GlueStreamingJobStack(app, 'GlueStreamingSinkToIceberg',
3036
glue_job_role.glue_job_role
3137
)
32-
glue_streaming_job.add_dependency(glue_stream_schema)
38+
glue_streaming_job.add_dependency(grant_lake_formation_permissions)
3339

3440
app.synth()

cdk_stacks/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
from .glue_job_role import GlueJobRoleStack
33
from .glue_stream_data_schema import GlueStreamDataSchemaStack
44
from .glue_streaming_job import GlueStreamingJobStack
5+
from .lakeformation_permissions import DataLakePermissionsStack

cdk_stacks/glue_stream_data_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
class GlueStreamDataSchemaStack(Stack):
1111

12-
def __init__(self, scope: Construct, construct_id: str, kinesis_stream, glue_job_role, **kwargs) -> None:
12+
def __init__(self, scope: Construct, construct_id: str, kinesis_stream, **kwargs) -> None:
1313
super().__init__(scope, construct_id, **kwargs)
1414

1515
glue_kinesis_table = self.node.try_get_context('glue_kinesis_table')
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import aws_cdk as cdk
2+
3+
from aws_cdk import (
4+
Stack,
5+
aws_lakeformation
6+
)
7+
from constructs import Construct
8+
9+
class DataLakePermissionsStack(Stack):
10+
11+
def __init__(self, scope: Construct, construct_id: str, glue_job_role, **kwargs) -> None:
12+
super().__init__(scope, construct_id, **kwargs)
13+
14+
glue_job_input_arguments = self.node.try_get_context('glue_kinesis_table')
15+
database_name = glue_job_input_arguments["database_name"]
16+
17+
#XXXX: The role assumed by cdk is not a data lake administrator.
18+
# So, deploying PrincipalPermissions meets the error such as:
19+
# "Resource does not exist or requester is not authorized to access requested permissions."
20+
# In order to solve the error, it is necessary to promote the cdk execution role to the data lake administrator.
21+
# For example, https://github.com/aws-samples/data-lake-as-code/blob/mainline/lib/stacks/datalake-stack.ts#L68
22+
cfn_data_lake_settings = aws_lakeformation.CfnDataLakeSettings(self, "CfnDataLakeSettings",
23+
admins=[aws_lakeformation.CfnDataLakeSettings.DataLakePrincipalProperty(
24+
data_lake_principal_identifier=cdk.Fn.sub(self.synthesizer.cloud_formation_execution_role_arn)
25+
)]
26+
)
27+
28+
cfn_principal_permissions = aws_lakeformation.CfnPrincipalPermissions(self, "CfnPrincipalPermissions",
29+
permissions=["SELECT", "INSERT", "DELETE", "DESCRIBE", "ALTER"],
30+
permissions_with_grant_option=[],
31+
principal=aws_lakeformation.CfnPrincipalPermissions.DataLakePrincipalProperty(
32+
data_lake_principal_identifier=glue_job_role.role_arn
33+
),
34+
resource=aws_lakeformation.CfnPrincipalPermissions.ResourceProperty(
35+
#XXX: Can't specify a TableWithColumns resource and a Table resource
36+
table=aws_lakeformation.CfnPrincipalPermissions.TableResourceProperty(
37+
catalog_id=cdk.Aws.ACCOUNT_ID,
38+
database_name=database_name,
39+
# name="ALL_TABLES",
40+
table_wildcard={}
41+
)
42+
)
43+
)
44+
cfn_principal_permissions.apply_removal_policy(cdk.RemovalPolicy.DESTROY)
45+
46+
#XXX: In order to keep resource destruction order,
47+
# set dependency between CfnDataLakeSettings and CfnPrincipalPermissions
48+
cfn_principal_permissions.add_dependency(cfn_data_lake_settings)
49+
50+
cdk.CfnOutput(self, f'{self.stack_name}_Principal',
51+
value=cfn_principal_permissions.attr_principal_identifier)

0 commit comments

Comments
 (0)