Skip to content

Commit d879684

Browse files
committed
small tweak on shuffle and small files
1 parent d6c58bc commit d879684

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

scripts/jobs/planning/tascomi_create_daily_snapshot.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def prepare_increments(increment_df):
8989

9090
# Check for residual duplicates - print and further de-duplicate
9191
duplicate_ids = increment_df.groupBy("id").count().filter("count > 1")
92-
if duplicate_ids.count() > 0:
92+
if duplicate_ids.limit(1).count() > 0:
9393
duplicate_ids.join(increment_df, "id").show(truncate=False)
9494
increment_df = deduplicate_by_id_and_last_updated(increment_df)
9595
else:
@@ -368,7 +368,7 @@ def purge_today_partition(
368368
verificationSuite.saveOrAppendResult(resultKey).run()
369369

370370
# if data quality tests succeed, write to S3
371-
371+
snapshot_df = snapshot_df.coalesce(300)
372372
resultDataFrame = DynamicFrame.fromDF(
373373
snapshot_df, glueContext, "resultDataFrame"
374374
)

0 commit comments

Comments
 (0)