Skip to content

Commit e762e93

Browse files
committed
Create firehose stream and glue DB for audit logs
1 parent 8826610 commit e762e93

File tree

3 files changed

+310
-0
lines changed

3 files changed

+310
-0
lines changed

terraform/modules/auditlog/main.tf

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
# main.tf
2+
3+
data "aws_caller_identity" "current" {}
4+
data "aws_region" "current" {}
5+
6+
locals {
7+
firehose_stream_name = "${var.ProjectId}-audit-log-stream"
8+
glue_db_name = "${replace(var.ProjectId, "-", "_")}_audit_logs"
9+
glue_table_name = "logs"
10+
s3_bucket_name = "${var.BucketPrefix}-audit-logs"
11+
}
12+
13+
# 1. S3 Bucket and Configuration (No changes)
14+
resource "aws_s3_bucket" "this" {
15+
bucket = local.s3_bucket_name
16+
}
17+
18+
resource "aws_s3_bucket_versioning" "this" {
19+
bucket = aws_s3_bucket.this.id
20+
versioning_configuration {
21+
status = "Enabled"
22+
}
23+
}
24+
25+
resource "aws_s3_bucket_lifecycle_configuration" "this" {
26+
bucket = aws_s3_bucket.this.id
27+
28+
rule {
29+
id = "AbortIncompleteMultipartUploads"
30+
status = "Enabled"
31+
abort_incomplete_multipart_upload {
32+
days_after_initiation = 1
33+
}
34+
}
35+
rule {
36+
id = "intelligent-tiering-transition"
37+
status = "Enabled"
38+
filter {}
39+
transition {
40+
days = 1
41+
storage_class = "INTELLIGENT_TIERING"
42+
}
43+
}
44+
rule {
45+
id = "ExpireNoncurrentVersions"
46+
status = "Enabled"
47+
filter {}
48+
noncurrent_version_expiration {
49+
noncurrent_days = 5
50+
}
51+
}
52+
rule {
53+
id = "DeleteAuditLogsAfterDays"
54+
status = "Enabled"
55+
filter {}
56+
expiration {
57+
days = var.DataExpirationDays
58+
}
59+
}
60+
}
61+
62+
resource "aws_s3_bucket_intelligent_tiering_configuration" "this" {
63+
bucket = aws_s3_bucket.this.id
64+
name = "ArchiveAfterSixMonths"
65+
status = "Enabled"
66+
tiering {
67+
access_tier = "ARCHIVE_ACCESS"
68+
days = 180
69+
}
70+
}
71+
72+
resource "aws_cloudwatch_log_group" "firehose_logs" {
73+
name = "/aws/kinesisfirehose/${local.firehose_stream_name}"
74+
retention_in_days = var.LogRetentionDays
75+
}
76+
77+
resource "aws_cloudwatch_log_stream" "firehose_logs_stream" {
78+
log_group_name = aws_cloudwatch_log_group.firehose_logs.name
79+
name = "DataArchivalS3Delivery"
80+
}
81+
82+
# 3. AWS Glue Catalog for Parquet Conversion (No changes)
83+
resource "aws_glue_catalog_table" "this" {
84+
name = local.glue_table_name
85+
database_name = aws_glue_catalog_database.this.name
86+
table_type = "EXTERNAL_TABLE"
87+
parameters = {
88+
"EXTERNAL" = "TRUE"
89+
"parquet.compression" = "SNAPPY"
90+
}
91+
92+
storage_descriptor {
93+
location = "s3://${aws_s3_bucket.this.id}/"
94+
input_format = "org.apache.hadoop.mapred.TextInputFormat"
95+
output_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
96+
ser_de_info {
97+
name = "parquet-serde"
98+
serialization_library = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
99+
parameters = { "serialization.format" = "1" }
100+
}
101+
columns {
102+
name = "module"
103+
type = "string"
104+
}
105+
columns {
106+
name = "createdAt"
107+
type = "bigint"
108+
}
109+
columns {
110+
name = "actor"
111+
type = "string"
112+
}
113+
columns {
114+
name = "message"
115+
type = "string"
116+
}
117+
columns {
118+
name = "requestId"
119+
type = "string"
120+
}
121+
columns {
122+
name = "target"
123+
type = "string"
124+
}
125+
}
126+
partition_keys {
127+
name = "module"
128+
type = "string"
129+
}
130+
partition_keys {
131+
name = "year"
132+
type = "string"
133+
}
134+
partition_keys {
135+
name = "month"
136+
type = "string"
137+
}
138+
partition_keys {
139+
name = "day"
140+
type = "string"
141+
}
142+
partition_keys {
143+
name = "hour"
144+
type = "string"
145+
}
146+
}
147+
148+
resource "aws_glue_catalog_database" "this" {
149+
name = local.glue_db_name
150+
}
151+
152+
153+
resource "aws_iam_role" "firehose_role" {
154+
name = "${local.firehose_stream_name}-exec-role"
155+
assume_role_policy = jsonencode({
156+
Version = "2012-10-17",
157+
Statement = [{
158+
Action = "sts:AssumeRole",
159+
Effect = "Allow",
160+
Principal = { Service = "firehose.amazonaws.com" }
161+
}]
162+
})
163+
}
164+
165+
resource "aws_iam_policy" "firehose_policy" {
166+
name = "${local.firehose_stream_name}-s3-policy"
167+
policy = jsonencode({
168+
Version = "2012-10-17",
169+
Statement = [
170+
{
171+
Effect = "Allow",
172+
Action = [
173+
"s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject",
174+
"s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject"
175+
],
176+
Resource = [aws_s3_bucket.this.arn, "${aws_s3_bucket.this.arn}/*"]
177+
},
178+
{
179+
Effect = "Allow",
180+
Action = ["logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents"],
181+
Resource = [aws_cloudwatch_log_group.firehose_logs.arn]
182+
},
183+
{
184+
Effect = "Allow",
185+
Action = ["glue:GetTable", "glue:GetTableVersion", "glue:GetTableVersions"],
186+
Resource = [
187+
aws_glue_catalog_database.this.arn,
188+
aws_glue_catalog_table.this.arn
189+
]
190+
}
191+
]
192+
})
193+
}
194+
195+
resource "aws_iam_role_policy_attachment" "firehose_attach" {
196+
role = aws_iam_role.firehose_role.name
197+
policy_arn = aws_iam_policy.firehose_policy.arn
198+
}
199+
200+
resource "aws_kinesis_firehose_delivery_stream" "dynamic_stream" {
201+
name = local.firehose_stream_name
202+
destination = "extended_s3"
203+
204+
extended_s3_configuration {
205+
bucket_arn = aws_s3_bucket.this.arn
206+
role_arn = aws_iam_role.firehose_role.arn
207+
compression_format = "UNCOMPRESSED"
208+
buffering_interval = 60
209+
buffering_size = 64
210+
211+
data_format_conversion_configuration {
212+
enabled = true
213+
input_format_configuration {
214+
deserializer {
215+
open_x_json_ser_de {}
216+
}
217+
}
218+
output_format_configuration {
219+
serializer {
220+
parquet_ser_de {}
221+
}
222+
}
223+
schema_configuration {
224+
database_name = aws_glue_catalog_database.this.name
225+
table_name = aws_glue_catalog_table.this.name
226+
role_arn = aws_iam_role.firehose_role.arn
227+
}
228+
}
229+
230+
processing_configuration {
231+
enabled = true
232+
processors {
233+
type = "MetadataExtraction"
234+
parameters {
235+
parameter_name = "MetadataExtractionQuery"
236+
parameter_value = "{module: .module, year: (.createdAt | strftime(\"%Y\")), month: (.createdAt | strftime(\"%m\")), day: (.createdAt | strftime(\"%d\")), hour: (.createdAt | strftime(\"%H\"))}"
237+
}
238+
parameters {
239+
parameter_name = "JsonParsingEngine"
240+
parameter_value = "JQ-1.6"
241+
}
242+
}
243+
}
244+
245+
dynamic_partitioning_configuration {
246+
enabled = true
247+
}
248+
249+
cloudwatch_logging_options {
250+
enabled = true
251+
log_group_name = aws_cloudwatch_log_group.firehose_logs.name
252+
log_stream_name = aws_cloudwatch_log_stream.firehose_logs_stream.name
253+
}
254+
255+
# UPDATED: Added 'hour' to the S3 prefix to match the partition key
256+
prefix = "module=!{partitionKeyFromQuery:module}/year=!{partitionKeyFromQuery:year}/month=!{partitionKeyFromQuery:month}/day=!{partitionKeyFromQuery:day}/hour=!{partitionKeyFromQuery:hour}/"
257+
error_output_prefix = "firehose-errors/!{firehose:error-output-type}/!{timestamp:yyyy/MM/dd}/"
258+
}
259+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
output "firehose_delivery_stream_name" {
2+
description = "The name of the Kinesis Firehose delivery stream."
3+
value = aws_kinesis_firehose_delivery_stream.dynamic_stream.name
4+
}
5+
6+
output "firehose_delivery_stream_arn" {
7+
description = "The ARN of the Kinesis Firehose delivery stream."
8+
value = aws_kinesis_firehose_delivery_stream.dynamic_stream.arn
9+
}
10+
11+
output "s3_bucket_name" {
12+
description = "The name of the S3 bucket where data is stored."
13+
value = aws_s3_bucket.this.bucket
14+
}
15+
16+
output "glue_database_name" {
17+
description = "The name of the AWS Glue database."
18+
value = aws_glue_catalog_database.this.name
19+
}
20+
21+
output "glue_table_name" {
22+
description = "The name of the AWS Glue table."
23+
value = aws_glue_catalog_table.this.name
24+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
variable "ProjectId" {
2+
type = string
3+
description = "Prefix before each resource"
4+
}
5+
6+
variable "BucketPrefix" {
7+
type = string
8+
}
9+
10+
11+
variable "LogRetentionDays" {
12+
type = number
13+
}
14+
15+
variable "DataExpirationDays" {
16+
type = number
17+
}
18+
19+
20+
variable "RunEnvironment" {
21+
type = string
22+
validation {
23+
condition = var.RunEnvironment == "dev" || var.RunEnvironment == "prod"
24+
error_message = "The lambda run environment must be dev or prod."
25+
}
26+
}
27+

0 commit comments

Comments
 (0)