[DOP-21450] Add compressions for file formats

Ilyas Gasanov · Ilyas Gasanov · commit a8a4bac2e69f · 2024-12-04T10:35:07.000+03:00
diff --git a/tests/test_integration/test_run_transfer/test_hdfs.py b/tests/test_integration/test_run_transfer/test_hdfs.py
@@ -184,7 +184,7 @@ async def test_run_transfer_hdfs_to_postgres(
     )
     df = reader.run()
 
-    # as these file formats do not support datetime values with precision greater than milliseconds
+    # as Excel does not support datetime values with precision greater than milliseconds
     if file_format in ("excel", "parquet", "orc"):
         df = df.withColumn("REGISTERED_AT", date_trunc("second", col("REGISTERED_AT")))
         init_df = init_df.withColumn("REGISTERED_AT", date_trunc("second", col("REGISTERED_AT")))
@@ -275,8 +275,8 @@ async def test_run_transfer_postgres_to_hdfs(
     )
     df = reader.run()
 
-    # as these file formats do not support datetime values with precision greater than milliseconds
-    if format_name in ("excel", "parquet", "orc"):
+    # as Excel does not support datetime values with precision greater than milliseconds
+    if format_name in ("excel", "parquet"):
         init_df = init_df.withColumn(
             "REGISTERED_AT",
             to_timestamp(date_format(col("REGISTERED_AT"), "yyyy-MM-dd HH:mm:ss.SSS")),
diff --git a/tests/test_integration/test_run_transfer/test_s3.py b/tests/test_integration/test_run_transfer/test_s3.py
@@ -185,7 +185,7 @@ async def test_run_transfer_s3_to_postgres(
     )
     df = reader.run()
 
-    # as these file formats do not support datetime values with precision greater than milliseconds
+    # as Excel does not support datetime values with precision greater than milliseconds
     if file_format in ("excel", "parquet", "orc"):
         df = df.withColumn("REGISTERED_AT", date_trunc("second", col("REGISTERED_AT")))
         init_df = init_df.withColumn("REGISTERED_AT", date_trunc("second", col("REGISTERED_AT")))
@@ -276,8 +276,8 @@ async def test_run_transfer_postgres_to_s3(
     )
     df = reader.run()
 
-    # as these file formats do not support datetime values with precision greater than milliseconds
-    if format_name in ("excel", "parquet", "orc"):
+    # as Excel does not support datetime values with precision greater than milliseconds
+    if format_name in ("excel", "parquet"):
         init_df = init_df.withColumn(
             "REGISTERED_AT",
             to_timestamp(date_format(col("REGISTERED_AT"), "yyyy-MM-dd HH:mm:ss.SSS")),
diff --git a/tests/test_unit/test_transfers/transfer_fixtures/transfers_fixture.py b/tests/test_unit/test_transfers/transfer_fixtures/transfers_fixture.py
@@ -61,6 +61,7 @@ async def group_transfers(
                 "line_sep": "\n",
                 "quote": '"',
                 "type": "csv",
+                "compression": "none",
             }
             common_params = {
                 "file_format": file_format,

Original file line number	Diff line number	Diff line change
`@@ -61,6 +61,7 @@ async def group_transfers(`
`61`	`61`	`"line_sep": "\n",`
`62`	`62`	`"quote": '"',`
`63`	`63`	`"type": "csv",`
	`64`	`+ "compression": "none",`
`64`	`65`	`}`
`65`	`66`	`common_params = {`
`66`	`67`	`"file_format": file_format,`