Skip to content

Commit c8ae27c

Browse files
authored
DL-5-create-cloudtrail-table-with-partition-projection (#2367)
* DL-5 create cloudtrail table with partition projection * change the file * create the view and change table name * add all glue related events
1 parent 652553e commit c8ae27c

File tree

1 file changed

+317
-0
lines changed

1 file changed

+317
-0
lines changed
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
# Use partition projection to create a table with partitions updated automatically, valid until 2045
2+
resource "aws_glue_catalog_table" "cloudtrail_management_events" {
3+
name = "cloudtrail_management_events"
4+
database_name = aws_glue_catalog_database.metastore.name
5+
6+
table_type = "EXTERNAL_TABLE"
7+
8+
parameters = {
9+
"projection.enabled" = "true"
10+
"projection.year.type" = "integer"
11+
"projection.year.range" = "2020,2045"
12+
"projection.year.interval" = "1"
13+
"projection.month.type" = "integer"
14+
"projection.month.range" = "1,12"
15+
"projection.month.interval" = "1"
16+
"projection.month.digits" = "2"
17+
"projection.day.type" = "integer"
18+
"projection.day.range" = "1,31"
19+
"projection.day.interval" = "1"
20+
"projection.day.digits" = "2"
21+
"storage.location.template" = "s3://${local.identifier_prefix}-cloudtrail/management-events/AWSLogs/${data.aws_caller_identity.data_platform.account_id}/CloudTrail/eu-west-2/$${year}/$${month}/$${day}/"
22+
"compressionType" = "gzip"
23+
"classification" = "cloudtrail"
24+
}
25+
26+
partition_keys {
27+
name = "year"
28+
type = "string"
29+
}
30+
31+
partition_keys {
32+
name = "month"
33+
type = "string"
34+
}
35+
36+
partition_keys {
37+
name = "day"
38+
type = "string"
39+
}
40+
41+
storage_descriptor {
42+
location = "s3://${local.identifier_prefix}-cloudtrail/management-events/AWSLogs/${data.aws_caller_identity.data_platform.account_id}/CloudTrail/"
43+
input_format = "com.amazon.emr.cloudtrail.CloudTrailInputFormat"
44+
output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
45+
46+
ser_de_info {
47+
serialization_library = "org.apache.hive.hcatalog.data.JsonSerDe"
48+
}
49+
50+
columns {
51+
name = "eventVersion"
52+
type = "string"
53+
}
54+
55+
columns {
56+
name = "userIdentity"
57+
type = <<-EOT
58+
struct<
59+
type:string,
60+
principalId:string,
61+
arn:string,
62+
accountId:string,
63+
invokedBy:string,
64+
accessKeyId:string,
65+
userName:string,
66+
sessionContext:struct<
67+
attributes:struct<
68+
mfaAuthenticated:string,
69+
creationDate:string>,
70+
sessionIssuer:struct<
71+
type:string,
72+
principalId:string,
73+
arn:string,
74+
accountId:string,
75+
userName:string>,
76+
ec2RoleDelivery:string,
77+
webIdFederationData:struct<
78+
federatedprovider:string,
79+
attributes:map<string,string>>
80+
>
81+
>
82+
EOT
83+
}
84+
85+
columns {
86+
name = "eventTime"
87+
type = "string"
88+
}
89+
90+
columns {
91+
name = "eventSource"
92+
type = "string"
93+
}
94+
95+
columns {
96+
name = "eventName"
97+
type = "string"
98+
}
99+
100+
columns {
101+
name = "awsRegion"
102+
type = "string"
103+
}
104+
105+
columns {
106+
name = "sourceIpAddress"
107+
type = "string"
108+
}
109+
110+
columns {
111+
name = "userAgent"
112+
type = "string"
113+
}
114+
115+
columns {
116+
name = "errorCode"
117+
type = "string"
118+
}
119+
120+
columns {
121+
name = "errorMessage"
122+
type = "string"
123+
}
124+
125+
columns {
126+
name = "requestParameters"
127+
type = "string"
128+
}
129+
130+
columns {
131+
name = "responseElements"
132+
type = "string"
133+
}
134+
135+
columns {
136+
name = "additionalEventData"
137+
type = "string"
138+
}
139+
140+
columns {
141+
name = "requestId"
142+
type = "string"
143+
}
144+
145+
columns {
146+
name = "eventId"
147+
type = "string"
148+
}
149+
150+
columns {
151+
name = "resources"
152+
type = "array<struct<arn:string,accountid:string,type:string>>"
153+
}
154+
155+
columns {
156+
name = "eventType"
157+
type = "string"
158+
}
159+
160+
columns {
161+
name = "apiVersion"
162+
type = "string"
163+
}
164+
165+
columns {
166+
name = "readOnly"
167+
type = "string"
168+
}
169+
170+
columns {
171+
name = "recipientAccountId"
172+
type = "string"
173+
}
174+
175+
columns {
176+
name = "serviceEventDetails"
177+
type = "string"
178+
}
179+
180+
columns {
181+
name = "sharedEventID"
182+
type = "string"
183+
}
184+
185+
columns {
186+
name = "vpcEndpointId"
187+
type = "string"
188+
}
189+
190+
columns {
191+
name = "tlsDetails"
192+
type = <<-EOT
193+
struct<
194+
tlsVersion:string,
195+
cipherSuite:string,
196+
clientProvidedHostHeader:string
197+
>
198+
EOT
199+
}
200+
}
201+
202+
lifecycle {
203+
prevent_destroy = true
204+
}
205+
}
206+
207+
resource "aws_glue_catalog_table" "glue_catalog_management_events_view" {
208+
name = "glue_catalog_management_events"
209+
database_name = aws_glue_catalog_database.metastore.name
210+
table_type = "VIRTUAL_VIEW"
211+
212+
view_original_text = "/* Presto View: ${base64encode(jsonencode({
213+
originalSql = <<-EOF
214+
SELECT
215+
eventVersion,
216+
userIdentity,
217+
eventTime,
218+
eventSource,
219+
eventName,
220+
awsRegion,
221+
sourceIpAddress,
222+
userAgent,
223+
errorCode,
224+
errorMessage,
225+
requestParameters,
226+
responseElements,
227+
additionalEventData,
228+
requestId,
229+
eventId,
230+
resources,
231+
eventType,
232+
apiVersion,
233+
readOnly,
234+
recipientAccountId,
235+
serviceEventDetails,
236+
sharedEventID,
237+
vpcEndpointId,
238+
tlsDetails,
239+
year,
240+
month,
241+
day,
242+
year AS import_year,
243+
month AS import_month,
244+
day AS import_day,
245+
concat(year, month, day) AS import_date,
246+
json_extract_scalar(requestParameters, '$.databaseName') AS database_name,
247+
json_extract_scalar(requestParameters, '$.tableName') AS table_name
248+
FROM "${aws_glue_catalog_database.metastore.name}"."${aws_glue_catalog_table.cloudtrail_management_events.name}"
249+
WHERE eventSource = 'glue.amazonaws.com'
250+
AND eventName IN (
251+
-- Database operations
252+
'CreateDatabase', 'GetDatabase', 'GetDatabases', 'UpdateDatabase', 'DeleteDatabase',
253+
-- Table operations
254+
'CreateTable', 'GetTable', 'GetTables', 'UpdateTable', 'DeleteTable',
255+
'GetTableVersion', 'GetTableVersions', 'DeleteTableVersion',
256+
-- Partition operations
257+
'CreatePartition', 'GetPartition', 'GetPartitions', 'UpdatePartition', 'DeletePartition',
258+
'GetPartitionIndexes', 'CreatePartitionIndex', 'DeletePartitionIndex',
259+
'BatchCreatePartition', 'BatchDeletePartition'
260+
)
261+
EOF
262+
catalog = "awsdatacatalog"
263+
schema = aws_glue_catalog_database.metastore.name
264+
columns = [
265+
for col in concat(
266+
aws_glue_catalog_table.cloudtrail_management_events.storage_descriptor[0].columns,
267+
aws_glue_catalog_table.cloudtrail_management_events.partition_keys
268+
) : {
269+
name = col.name
270+
type = col.type
271+
}
272+
]
273+
}))} */"
274+
275+
view_expanded_text = "/* Presto View */"
276+
277+
storage_descriptor {
278+
dynamic "columns" {
279+
for_each = concat(
280+
aws_glue_catalog_table.cloudtrail_management_events.storage_descriptor[0].columns,
281+
aws_glue_catalog_table.cloudtrail_management_events.partition_keys
282+
)
283+
content {
284+
name = columns.value.name
285+
type = columns.value.type
286+
}
287+
}
288+
columns {
289+
name = "import_year"
290+
type = "string"
291+
}
292+
columns {
293+
name = "import_month"
294+
type = "string"
295+
}
296+
columns {
297+
name = "import_day"
298+
type = "string"
299+
}
300+
columns {
301+
name = "import_date"
302+
type = "string"
303+
}
304+
columns {
305+
name = "database_name"
306+
type = "string"
307+
}
308+
columns {
309+
name = "table_name"
310+
type = "string"
311+
}
312+
}
313+
314+
lifecycle {
315+
prevent_destroy = true
316+
}
317+
}

0 commit comments

Comments
 (0)