Skip to content

Commit c8f24f6

Browse files
authored
Merge pull request #854 from NHSDigital/bug/jale13-nrl-1320-load-data-if-null
NRL 1320 Null df error fix
2 parents 0ee2fd4 + eee2f0a commit c8f24f6

File tree

1 file changed

+7
-4
lines changed
  • terraform/account-wide-infrastructure/modules/glue/src

1 file changed

+7
-4
lines changed

terraform/account-wide-infrastructure/modules/glue/src/pipeline.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,13 @@ def load(self, data):
9090
"""Load transformed data into Parquet format"""
9191
self.logger.info(f"Loading data into {self.target_path} as Parquet")
9292
for name, dataframe in data.items():
93-
name = name.replace("--", "_")
94-
dataframe.write.mode("append").partitionBy(*self.partition_cols).parquet(
95-
f"{self.target_path}{name}"
96-
)
93+
if dataframe.na.drop().count() > 0:
94+
name = name.replace("--", "_")
95+
dataframe.write.mode("append").partitionBy(
96+
*self.partition_cols
97+
).parquet(f"{self.target_path}{name}")
98+
else:
99+
self.logger.info(f"Dataframe {name} is null, skipping")
97100

98101
def trigger_crawler(self):
99102
self.glue.start_crawler(Name=f"{self.name_prefix}-log-crawler")

0 commit comments

Comments
 (0)