Skip to content

Commit 3e63d9b

Browse files
author
Ilyas Gasanov
committed
[DOP-21976] Add compression options for XML
1 parent beecd7a commit 3e63d9b

File tree

8 files changed

+30
-7
lines changed

8 files changed

+30
-7
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add compression options to XML file format

syncmaster/dto/transfers.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,24 @@ class FileTransferDTO(TransferDTO):
3636

3737
def __post_init__(self):
3838
if isinstance(self.file_format, dict):
39-
self.file_format = self._get_format(self.file_format.copy())
39+
self.file_format = self._get_file_format(self.file_format.copy())
4040
if isinstance(self.df_schema, str):
4141
self.df_schema = json.loads(self.df_schema)
4242

43-
def _get_format(self, file_format: dict):
44-
file_type = file_format.pop("type", None)
43+
def _get_file_format(self, file_format: dict) -> CSV | JSONLine | JSON | Excel | XML | ORC | Parquet:
44+
file_type = self._prepare_file_format(file_format)
4545
parser_class = self._format_parsers.get(file_type)
4646
if parser_class is not None:
4747
return parser_class.parse_obj(file_format)
4848
raise ValueError(f"Unknown file type: {file_type}")
4949

50+
@staticmethod
51+
def _prepare_file_format(file_format: dict) -> str | None:
52+
file_type = file_format.pop("type", None)
53+
if file_type == "xml" and file_format.get("compression") == "none":
54+
file_format.pop("compression")
55+
return file_type
56+
5057

5158
@dataclass
5259
class PostgresTransferDTO(DBTransferDTO):

syncmaster/schemas/v1/transfers/file_format.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@ class CSVCompression(str, Enum):
4949
DEFLATE = "deflate"
5050

5151

52+
class XMLCompression(str, Enum):
53+
NONE = "none"
54+
BZIP2 = "bzip2"
55+
GZIP = "gzip"
56+
LZ4 = "lz4"
57+
SNAPPY = "snappy"
58+
59+
5260
class CSV(BaseModel):
5361
type: CSV_FORMAT
5462
delimiter: str = ","
@@ -84,6 +92,7 @@ class XML(BaseModel):
8492
type: XML_FORMAT
8593
root_tag: str
8694
row_tag: str
95+
compression: XMLCompression = XMLCompression.GZIP
8796

8897

8998
class ORC(BaseModel):

tests/test_integration/test_run_transfer/test_hdfs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,8 @@ async def test_run_transfer_hdfs_to_postgres(
229229
id="parquet",
230230
),
231231
pytest.param(
232-
("xml", {}),
233-
"without_compression",
232+
("xml", {"compression": "snappy"}),
233+
"with_compression",
234234
id="xml",
235235
),
236236
],

tests/test_integration/test_run_transfer/test_s3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,8 @@ async def test_run_transfer_s3_to_postgres(
230230
id="parquet",
231231
),
232232
pytest.param(
233-
("xml", {}),
234-
"without_compression",
233+
("xml", {"compression": "none"}),
234+
"with_compression",
235235
id="xml",
236236
),
237237
],

tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
"type": "xml",
6060
"root_tag": "data",
6161
"row_tag": "record",
62+
"compression": "lz4",
6263
},
6364
"options": {
6465
"some": "option",
@@ -166,6 +167,7 @@ async def test_developer_plus_can_create_s3_transfer(
166167
"type": "xml",
167168
"root_tag": "data",
168169
"row_tag": "record",
170+
"compression": "lz4",
169171
},
170172
"orc": {
171173
"type": "orc",
@@ -221,6 +223,7 @@ async def test_developer_plus_can_create_s3_transfer(
221223
"type": "xml",
222224
"root_tag": "data",
223225
"row_tag": "record",
226+
"compression": "bzip2",
224227
},
225228
},
226229
{
@@ -320,6 +323,7 @@ async def test_developer_plus_can_create_hdfs_transfer(
320323
"type": "xml",
321324
"root_tag": "data",
322325
"row_tag": "record",
326+
"compression": "bzip2",
323327
},
324328
"orc": {
325329
"type": "orc",

tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
"type": "xml",
4242
"root_tag": "data",
4343
"row_tag": "record",
44+
"compression": "bzip2",
4445
},
4546
"options": {},
4647
},

tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
"type": "xml",
4242
"root_tag": "data",
4343
"row_tag": "record",
44+
"compression": "bzip2",
4445
},
4546
"options": {},
4647
},

0 commit comments

Comments
 (0)