Skip to content

Commit 455f99e

Browse files
[DOP-21442] Add Excel API schema (#140)
1 parent 32726c9 commit 455f99e

File tree

11 files changed

+137
-34
lines changed

11 files changed

+137
-34
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add Excel API schema

syncmaster/schemas/v1/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
PostgresReadTransferSourceAndTarget,
3737
ReadDBTransfer,
3838
)
39-
from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine
39+
from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, Excel, JSONLine
4040
from syncmaster.schemas.v1.transfers.run import (
4141
CreateRunSchema,
4242
ReadRunSchema,

syncmaster/schemas/v1/file_formats.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@
55
CSV_FORMAT = Literal["csv"]
66
JSONLINE_FORMAT = Literal["jsonline"]
77
JSON_FORMAT = Literal["json"]
8+
EXCEL_FORMAT = Literal["excel"]

syncmaster/schemas/v1/transfers/file/base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,28 @@
77

88
from pydantic import BaseModel, Field, field_validator
99

10-
from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine
10+
from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, Excel, JSONLine
1111

1212

1313
# At the moment the ReadTransferSourceParams and ReadTransferTargetParams
1414
# classes are identical but may change in the future
1515
class ReadFileTransferSource(BaseModel):
1616
directory_path: str
17-
file_format: CSV | JSONLine | JSON = Field(..., discriminator="type")
17+
file_format: CSV | JSONLine | JSON | Excel = Field(..., discriminator="type")
1818
options: dict[str, Any]
1919

2020

2121
class ReadFileTransferTarget(BaseModel):
2222
directory_path: str
23-
file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON format is not supported for writing
23+
file_format: CSV | JSONLine | Excel = Field(..., discriminator="type") # JSON format is not supported for writing
2424
options: dict[str, Any]
2525

2626

2727
# At the moment the CreateTransferSourceParams and CreateTransferTargetParams
2828
# classes are identical but may change in the future
2929
class CreateFileTransferSource(BaseModel):
3030
directory_path: str
31-
file_format: CSV | JSONLine | JSON = Field(..., discriminator="type")
31+
file_format: CSV | JSONLine | JSON | Excel = Field(..., discriminator="type")
3232
options: dict[str, Any] = Field(default_factory=dict)
3333

3434
class Config:
@@ -44,7 +44,7 @@ def _directory_path_is_valid_path(cls, value):
4444

4545
class CreateFileTransferTarget(BaseModel):
4646
directory_path: str
47-
file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET !
47+
file_format: CSV | JSONLine | Excel = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET !
4848
options: dict[str, Any] = Field(default_factory=dict)
4949

5050
class Config:

syncmaster/schemas/v1/transfers/file_format.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@
44

55
from pydantic import BaseModel
66

7-
from syncmaster.schemas.v1.file_formats import CSV_FORMAT, JSON_FORMAT, JSONLINE_FORMAT
7+
from syncmaster.schemas.v1.file_formats import (
8+
CSV_FORMAT,
9+
EXCEL_FORMAT,
10+
JSON_FORMAT,
11+
JSONLINE_FORMAT,
12+
)
813

914

1015
class CSV(BaseModel):
@@ -13,7 +18,7 @@ class CSV(BaseModel):
1318
encoding: str = "utf-8"
1419
quote: str = '"'
1520
escape: str = "\\"
16-
header: bool = False
21+
include_header: bool = False
1722
line_sep: str = "\n"
1823

1924

@@ -27,3 +32,9 @@ class JSON(BaseModel):
2732
type: JSON_FORMAT
2833
encoding: str = "utf-8"
2934
line_sep: str = "\n"
35+
36+
37+
class Excel(BaseModel):
38+
type: EXCEL_FORMAT
39+
include_header: bool = False
40+
start_cell: str | None = None

tests/resources/file_df_connection/generate_files.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,11 @@ def _to_string(obj):
103103
return obj
104104

105105

106-
def _write_csv(data: list[dict], file: TextIO, header: bool = False, **kwargs) -> None:
106+
def _write_csv(data: list[dict], file: TextIO, include_header: bool = False, **kwargs) -> None:
107107
columns = list(data[0].keys())
108108
writer = csv.DictWriter(file, fieldnames=columns, lineterminator="\n", **kwargs)
109109

110-
if header:
110+
if include_header:
111111
writer.writeheader()
112112

113113
for row in data:
@@ -123,7 +123,7 @@ def save_as_csv_without_header(data: list[dict], path: Path) -> None:
123123
def save_as_csv_with_header(data: list[dict], path: Path) -> None:
124124
path.mkdir(parents=True, exist_ok=True)
125125
with open(path / "file.csv", "w", newline="") as file:
126-
_write_csv(data, file, header=True)
126+
_write_csv(data, file, include_header=True)
127127

128128

129129
def save_as_csv_with_delimiter(data: list[dict], path: Path) -> None:
@@ -403,12 +403,12 @@ def save_as_xlsx(data: list[dict], path: Path) -> None:
403403
shutil.rmtree(root, ignore_errors=True)
404404
root.mkdir(parents=True, exist_ok=True)
405405

406-
save_as_xlsx_with_options(data, root / "without_header", header=False)
407-
save_as_xlsx_with_options(data, root / "with_header", header=True)
406+
save_as_xlsx_with_options(data, root / "without_header", include_header=False)
407+
save_as_xlsx_with_options(data, root / "with_header", include_header=True)
408408
save_as_xlsx_with_options(
409409
data,
410410
root / "with_data_address",
411-
header=False,
411+
include_header=False,
412412
sheet_name="ABC",
413413
startcol=10,
414414
startrow=5,
@@ -420,12 +420,12 @@ def save_as_xls(data: list[dict], path: Path) -> None:
420420
shutil.rmtree(root, ignore_errors=True)
421421
root.mkdir(parents=True, exist_ok=True)
422422

423-
save_as_xls_with_options(data, root / "without_header", header=False)
424-
save_as_xls_with_options(data, root / "with_header", header=True)
423+
save_as_xls_with_options(data, root / "without_header", include_header=False)
424+
save_as_xls_with_options(data, root / "with_header", include_header=True)
425425
save_as_xls_with_options(
426426
data,
427427
root / "with_data_address",
428-
header=False,
428+
include_header=False,
429429
sheet_name="ABC",
430430
startcol=10,
431431
startrow=5,

tests/test_unit/test_transfers/test_create_transfer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -633,10 +633,14 @@ async def test_developer_plus_can_not_create_transfer_with_target_format_json(
633633
"message": "Invalid request",
634634
"details": [
635635
{
636-
"context": {"discriminator": "'type'", "tag": "json", "expected_tags": "'csv', 'jsonline'"},
636+
"context": {
637+
"discriminator": "'type'",
638+
"tag": "json",
639+
"expected_tags": "'csv', 'jsonline', 'excel'",
640+
},
637641
"input": {"type": "json", "lineSep": "\n", "encoding": "utf-8"},
638642
"location": ["body", "target_params", "s3", "file_format"],
639-
"message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline'",
643+
"message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel'",
640644
"code": "union_tag_invalid",
641645
},
642646
],

tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py

Lines changed: 75 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,24 @@
2828
"directory_path": "/some/pure/path",
2929
"file_format": {
3030
"type": "csv",
31+
"delimiter": ",",
32+
"encoding": "utf-8",
33+
"quote": '"',
34+
"escape": "\\",
35+
"include_header": False,
36+
"line_sep": "\n",
37+
},
38+
"options": {
39+
"some": "option",
40+
},
41+
},
42+
{
43+
"type": "s3",
44+
"directory_path": "/some/excel/path",
45+
"file_format": {
46+
"type": "excel",
47+
"include_header": True,
48+
"start_cell": "A1",
3149
},
3250
"options": {
3351
"some": "option",
@@ -94,11 +112,28 @@ async def test_developer_plus_can_create_s3_transfer(
94112
"queue_id": transfer.queue_id,
95113
}
96114

115+
expected_file_formats = {
116+
"csv": {
117+
"type": "csv",
118+
"delimiter": ",",
119+
"encoding": "utf-8",
120+
"quote": '"',
121+
"escape": "\\",
122+
"include_header": False,
123+
"line_sep": "\n",
124+
},
125+
"excel": {
126+
"type": "excel",
127+
"include_header": True,
128+
"start_cell": "A1",
129+
},
130+
}
131+
97132
for params in (transfer.source_params, transfer.target_params):
98-
assert params["type"] == "s3"
99-
assert params["directory_path"] == "/some/pure/path"
100-
assert params["file_format"]["type"] == "csv"
133+
assert params["type"] == target_source_params["type"]
134+
assert params["directory_path"] == target_source_params["directory_path"]
101135
assert params["options"] == {"some": "option"}
136+
assert params["file_format"] == expected_file_formats[params["file_format"]["type"]]
102137

103138

104139
@pytest.mark.parametrize(
@@ -121,6 +156,15 @@ async def test_developer_plus_can_create_s3_transfer(
121156
"type": "csv",
122157
},
123158
},
159+
{
160+
"type": "hdfs",
161+
"directory_path": "/some/excel/path",
162+
"file_format": {
163+
"type": "excel",
164+
"include_header": True,
165+
"start_cell": "A1",
166+
},
167+
},
124168
],
125169
)
126170
async def test_developer_plus_can_create_hdfs_transfer(
@@ -183,10 +227,27 @@ async def test_developer_plus_can_create_hdfs_transfer(
183227
"queue_id": transfer.queue_id,
184228
}
185229

230+
expected_file_formats = {
231+
"csv": {
232+
"type": "csv",
233+
"delimiter": ",",
234+
"encoding": "utf-8",
235+
"quote": '"',
236+
"escape": "\\",
237+
"include_header": False,
238+
"line_sep": "\n",
239+
},
240+
"excel": {
241+
"type": "excel",
242+
"include_header": True,
243+
"start_cell": "A1",
244+
},
245+
}
246+
186247
for params in (transfer.source_params, transfer.target_params):
187-
assert params["type"] == "hdfs"
188-
assert params["directory_path"] == "/some/pure/path"
189-
assert params["file_format"]["type"] == "csv"
248+
assert params["type"] == target_source_params["type"]
249+
assert params["directory_path"] == target_source_params["directory_path"]
250+
assert params["file_format"] == expected_file_formats[params["file_format"]["type"]]
190251
assert params["options"] == {}
191252

192253

@@ -211,6 +272,14 @@ async def test_developer_plus_can_create_hdfs_transfer(
211272
"type": "csv",
212273
},
213274
},
275+
{
276+
"type": "s3",
277+
"directory_path": "some/path",
278+
"file_format": {
279+
"type": "excel",
280+
"include_header": True,
281+
},
282+
},
214283
],
215284
)
216285
async def test_cannot_create_file_transfer_with_relative_path(

tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,23 @@
1616
"delimiter": ",",
1717
"encoding": "utf-8",
1818
"escape": "\\",
19-
"header": False,
19+
"include_header": False,
2020
"line_sep": "\n",
2121
"quote": '"',
2222
"type": "csv",
2323
},
2424
"options": {},
2525
},
26+
{
27+
"type": "s3",
28+
"directory_path": "/some/excel/path",
29+
"file_format": {
30+
"type": "excel",
31+
"include_header": True,
32+
"start_cell": "A1",
33+
},
34+
"options": {},
35+
},
2636
],
2737
)
2838
@pytest.mark.parametrize(

tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,23 @@
1616
"delimiter": ",",
1717
"encoding": "utf-8",
1818
"escape": "\\",
19-
"header": False,
19+
"include_header": False,
2020
"line_sep": "\n",
2121
"quote": '"',
2222
"type": "csv",
2323
},
2424
"options": {},
2525
},
26+
{
27+
"type": "s3",
28+
"directory_path": "/some/excel/path",
29+
"file_format": {
30+
"type": "excel",
31+
"include_header": True,
32+
"start_cell": "A1",
33+
},
34+
"options": {},
35+
},
2636
],
2737
)
2838
@pytest.mark.parametrize(
@@ -54,7 +64,7 @@ async def test_developer_plus_can_update_s3_transfer(
5464
"source_params": {
5565
"type": "s3",
5666
"directory_path": "/some/new/test/directory",
57-
"file_format": {"type": "jsonline"},
67+
"file_format": create_transfer_data["file_format"],
5868
"options": {"some": "option"},
5969
},
6070
},
@@ -65,14 +75,11 @@ async def test_developer_plus_can_update_s3_transfer(
6575
source_params.update(
6676
{
6777
"directory_path": "/some/new/test/directory",
68-
"file_format": {
69-
"encoding": "utf-8",
70-
"line_sep": "\n",
71-
"type": "jsonline",
72-
},
78+
"file_format": create_transfer_data["file_format"],
7379
"options": {"some": "option"},
7480
},
7581
)
82+
7683
# Assert
7784
assert result.status_code == 200
7885
assert result.json() == {

0 commit comments

Comments
 (0)