Skip to content

Commit 9007dcc

Browse files
committed
NRL-1187 deal with timestamps
1 parent 65f7a96 commit 9007dcc

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

terraform/account-wide-infrastructure/modules/glue/src/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from awsglue.utils import getResolvedOptions
44
from pipeline import LogPipeline
55
from pyspark.context import SparkContext
6-
from transformations import flatten_df, logSchema
6+
from transformations import dtype_conversion, flatten_df, logSchema
77

88
# Get arguments from AWS Glue job
99
args = getResolvedOptions(sys.argv, ["job_name", "source_path", "target_path"])
@@ -20,7 +20,7 @@
2020
target_path=args["target_path"],
2121
schema=logSchema,
2222
partition_cols=partition_cols,
23-
transformations=[flatten_df],
23+
transformations=[flatten_df, dtype_conversion],
2424
)
2525

2626
# Run the job

terraform/account-wide-infrastructure/modules/glue/src/transformations.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from pyspark.sql.functions import to_timestamp
12
from pyspark.sql.types import (
23
BooleanType,
34
StringType,
@@ -19,7 +20,7 @@
1920
StructField("level", StringType(), True),
2021
StructField("location", StringType(), True),
2122
StructField("message", StringType(), True),
22-
StructField("timestamp", TimestampType(), True),
23+
StructField("timestamp", StringType(), True),
2324
StructField("service", StringType(), True),
2425
StructField("cold_start", BooleanType(), True),
2526
StructField("function_name", StringType(), True),
@@ -68,5 +69,8 @@ def flatten_df(df):
6869
return df.select(*cols, f"{nested_col}.*")
6970

7071

71-
def placeholder(df):
72+
def dtype_conversion(df):
73+
df = df.withColumn(
74+
"timestamp", to_timestamp(df["timestamp"], "yyyy-MM-dd HH:mm:ss,SSSXXX")
75+
)
7276
return df

0 commit comments

Comments
 (0)