|
| 1 | +# main.tf |
| 2 | + |
| 3 | +data "aws_caller_identity" "current" {} |
| 4 | +data "aws_region" "current" {} |
| 5 | + |
| 6 | +locals { |
| 7 | + firehose_stream_name = "${var.ProjectId}-audit-log-stream" |
| 8 | + glue_db_name = "${replace(var.ProjectId, "-", "_")}_audit_logs" |
| 9 | + glue_table_name = "logs" |
| 10 | + s3_bucket_name = "${var.BucketPrefix}-audit-logs" |
| 11 | +} |
| 12 | + |
| 13 | +# 1. S3 Bucket and Configuration (No changes) |
| 14 | +resource "aws_s3_bucket" "this" { |
| 15 | + bucket = local.s3_bucket_name |
| 16 | +} |
| 17 | + |
| 18 | +resource "aws_s3_bucket_versioning" "this" { |
| 19 | + bucket = aws_s3_bucket.this.id |
| 20 | + versioning_configuration { |
| 21 | + status = "Enabled" |
| 22 | + } |
| 23 | +} |
| 24 | + |
| 25 | +resource "aws_s3_bucket_lifecycle_configuration" "this" { |
| 26 | + bucket = aws_s3_bucket.this.id |
| 27 | + |
| 28 | + rule { |
| 29 | + id = "AbortIncompleteMultipartUploads" |
| 30 | + status = "Enabled" |
| 31 | + abort_incomplete_multipart_upload { |
| 32 | + days_after_initiation = 1 |
| 33 | + } |
| 34 | + } |
| 35 | + rule { |
| 36 | + id = "intelligent-tiering-transition" |
| 37 | + status = "Enabled" |
| 38 | + filter {} |
| 39 | + transition { |
| 40 | + days = 1 |
| 41 | + storage_class = "INTELLIGENT_TIERING" |
| 42 | + } |
| 43 | + } |
| 44 | + rule { |
| 45 | + id = "ExpireNoncurrentVersions" |
| 46 | + status = "Enabled" |
| 47 | + filter {} |
| 48 | + noncurrent_version_expiration { |
| 49 | + noncurrent_days = 5 |
| 50 | + } |
| 51 | + } |
| 52 | + rule { |
| 53 | + id = "DeleteAuditLogsAfterDays" |
| 54 | + status = "Enabled" |
| 55 | + filter {} |
| 56 | + expiration { |
| 57 | + days = var.DataExpirationDays |
| 58 | + } |
| 59 | + } |
| 60 | +} |
| 61 | + |
| 62 | +resource "aws_s3_bucket_intelligent_tiering_configuration" "this" { |
| 63 | + bucket = aws_s3_bucket.this.id |
| 64 | + name = "ArchiveAfterSixMonths" |
| 65 | + status = "Enabled" |
| 66 | + tiering { |
| 67 | + access_tier = "ARCHIVE_ACCESS" |
| 68 | + days = 180 |
| 69 | + } |
| 70 | +} |
| 71 | + |
| 72 | +resource "aws_cloudwatch_log_group" "firehose_logs" { |
| 73 | + name = "/aws/kinesisfirehose/${local.firehose_stream_name}" |
| 74 | + retention_in_days = var.LogRetentionDays |
| 75 | +} |
| 76 | + |
| 77 | +resource "aws_cloudwatch_log_stream" "firehose_logs_stream" { |
| 78 | + log_group_name = aws_cloudwatch_log_group.firehose_logs.name |
| 79 | + name = "DataArchivalS3Delivery" |
| 80 | +} |
| 81 | + |
| 82 | +# 3. AWS Glue Catalog for Parquet Conversion (No changes) |
| 83 | +resource "aws_glue_catalog_table" "this" { |
| 84 | + name = local.glue_table_name |
| 85 | + database_name = aws_glue_catalog_database.this.name |
| 86 | + table_type = "EXTERNAL_TABLE" |
| 87 | + parameters = { |
| 88 | + "EXTERNAL" = "TRUE" |
| 89 | + "parquet.compression" = "SNAPPY" |
| 90 | + } |
| 91 | + |
| 92 | + storage_descriptor { |
| 93 | + location = "s3://${aws_s3_bucket.this.id}/" |
| 94 | + input_format = "org.apache.hadoop.mapred.TextInputFormat" |
| 95 | + output_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat" |
| 96 | + ser_de_info { |
| 97 | + name = "parquet-serde" |
| 98 | + serialization_library = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe" |
| 99 | + parameters = { "serialization.format" = "1" } |
| 100 | + } |
| 101 | + columns { |
| 102 | + name = "module" |
| 103 | + type = "string" |
| 104 | + } |
| 105 | + columns { |
| 106 | + name = "createdAt" |
| 107 | + type = "bigint" |
| 108 | + } |
| 109 | + columns { |
| 110 | + name = "actor" |
| 111 | + type = "string" |
| 112 | + } |
| 113 | + columns { |
| 114 | + name = "message" |
| 115 | + type = "string" |
| 116 | + } |
| 117 | + columns { |
| 118 | + name = "requestId" |
| 119 | + type = "string" |
| 120 | + } |
| 121 | + columns { |
| 122 | + name = "target" |
| 123 | + type = "string" |
| 124 | + } |
| 125 | + } |
| 126 | + partition_keys { |
| 127 | + name = "module" |
| 128 | + type = "string" |
| 129 | + } |
| 130 | + partition_keys { |
| 131 | + name = "year" |
| 132 | + type = "string" |
| 133 | + } |
| 134 | + partition_keys { |
| 135 | + name = "month" |
| 136 | + type = "string" |
| 137 | + } |
| 138 | + partition_keys { |
| 139 | + name = "day" |
| 140 | + type = "string" |
| 141 | + } |
| 142 | + partition_keys { |
| 143 | + name = "hour" |
| 144 | + type = "string" |
| 145 | + } |
| 146 | +} |
| 147 | + |
| 148 | +resource "aws_glue_catalog_database" "this" { |
| 149 | + name = local.glue_db_name |
| 150 | +} |
| 151 | + |
| 152 | + |
| 153 | +resource "aws_iam_role" "firehose_role" { |
| 154 | + name = "${local.firehose_stream_name}-exec-role" |
| 155 | + assume_role_policy = jsonencode({ |
| 156 | + Version = "2012-10-17", |
| 157 | + Statement = [{ |
| 158 | + Action = "sts:AssumeRole", |
| 159 | + Effect = "Allow", |
| 160 | + Principal = { Service = "firehose.amazonaws.com" } |
| 161 | + }] |
| 162 | + }) |
| 163 | +} |
| 164 | + |
| 165 | +resource "aws_iam_policy" "firehose_policy" { |
| 166 | + name = "${local.firehose_stream_name}-s3-policy" |
| 167 | + policy = jsonencode({ |
| 168 | + Version = "2012-10-17", |
| 169 | + Statement = [ |
| 170 | + { |
| 171 | + Effect = "Allow", |
| 172 | + Action = [ |
| 173 | + "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", |
| 174 | + "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" |
| 175 | + ], |
| 176 | + Resource = [aws_s3_bucket.this.arn, "${aws_s3_bucket.this.arn}/*"] |
| 177 | + }, |
| 178 | + { |
| 179 | + Effect = "Allow", |
| 180 | + Action = ["logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents"], |
| 181 | + Resource = [aws_cloudwatch_log_group.firehose_logs.arn] |
| 182 | + }, |
| 183 | + { |
| 184 | + Effect = "Allow", |
| 185 | + Action = ["glue:GetTable", "glue:GetTableVersion", "glue:GetTableVersions"], |
| 186 | + Resource = [ |
| 187 | + aws_glue_catalog_database.this.arn, |
| 188 | + aws_glue_catalog_table.this.arn |
| 189 | + ] |
| 190 | + } |
| 191 | + ] |
| 192 | + }) |
| 193 | +} |
| 194 | + |
| 195 | +resource "aws_iam_role_policy_attachment" "firehose_attach" { |
| 196 | + role = aws_iam_role.firehose_role.name |
| 197 | + policy_arn = aws_iam_policy.firehose_policy.arn |
| 198 | +} |
| 199 | + |
| 200 | +resource "aws_kinesis_firehose_delivery_stream" "dynamic_stream" { |
| 201 | + name = local.firehose_stream_name |
| 202 | + destination = "extended_s3" |
| 203 | + |
| 204 | + extended_s3_configuration { |
| 205 | + bucket_arn = aws_s3_bucket.this.arn |
| 206 | + role_arn = aws_iam_role.firehose_role.arn |
| 207 | + compression_format = "UNCOMPRESSED" |
| 208 | + buffering_interval = 60 |
| 209 | + buffering_size = 64 |
| 210 | + |
| 211 | + data_format_conversion_configuration { |
| 212 | + enabled = true |
| 213 | + input_format_configuration { |
| 214 | + deserializer { |
| 215 | + open_x_json_ser_de {} |
| 216 | + } |
| 217 | + } |
| 218 | + output_format_configuration { |
| 219 | + serializer { |
| 220 | + parquet_ser_de {} |
| 221 | + } |
| 222 | + } |
| 223 | + schema_configuration { |
| 224 | + database_name = aws_glue_catalog_database.this.name |
| 225 | + table_name = aws_glue_catalog_table.this.name |
| 226 | + role_arn = aws_iam_role.firehose_role.arn |
| 227 | + } |
| 228 | + } |
| 229 | + |
| 230 | + processing_configuration { |
| 231 | + enabled = true |
| 232 | + processors { |
| 233 | + type = "MetadataExtraction" |
| 234 | + parameters { |
| 235 | + parameter_name = "MetadataExtractionQuery" |
| 236 | + parameter_value = "{module: .module, year: (.createdAt | strftime(\"%Y\")), month: (.createdAt | strftime(\"%m\")), day: (.createdAt | strftime(\"%d\")), hour: (.createdAt | strftime(\"%H\"))}" |
| 237 | + } |
| 238 | + parameters { |
| 239 | + parameter_name = "JsonParsingEngine" |
| 240 | + parameter_value = "JQ-1.6" |
| 241 | + } |
| 242 | + } |
| 243 | + } |
| 244 | + |
| 245 | + dynamic_partitioning_configuration { |
| 246 | + enabled = true |
| 247 | + } |
| 248 | + |
| 249 | + cloudwatch_logging_options { |
| 250 | + enabled = true |
| 251 | + log_group_name = aws_cloudwatch_log_group.firehose_logs.name |
| 252 | + log_stream_name = aws_cloudwatch_log_stream.firehose_logs_stream.name |
| 253 | + } |
| 254 | + |
| 255 | + # UPDATED: Added 'hour' to the S3 prefix to match the partition key |
| 256 | + prefix = "module=!{partitionKeyFromQuery:module}/year=!{partitionKeyFromQuery:year}/month=!{partitionKeyFromQuery:month}/day=!{partitionKeyFromQuery:day}/hour=!{partitionKeyFromQuery:hour}/" |
| 257 | + error_output_prefix = "firehose-errors/!{firehose:error-output-type}/!{timestamp:yyyy/MM/dd}/" |
| 258 | + } |
| 259 | +} |
0 commit comments