Skip to content

Commit 87b7bc3

Browse files
committed
windows tests
1 parent 8326a0a commit 87b7bc3

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

src/crawlee/_utils/file.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,13 @@ async def export_csv_to_stream(
163163
dst: TextIO,
164164
**kwargs: Unpack[ExportDataCsvKwargs],
165165
) -> None:
166+
# Set lineterminator to '\n' if not explicitly provided. This prevents double line endings on Windows.
167+
# The csv.writer default is '\r\n', which when written to a file in text mode on Windows gets converted
168+
# to '\r\r\n' due to newline translation. By using '\n', we let the platform handle the line ending
169+
# conversion: '\n' stays as '\n' on Unix, and becomes '\r\n' on Windows.
170+
if 'lineterminator' not in kwargs:
171+
kwargs['lineterminator'] = '\n'
172+
166173
writer = csv.writer(dst, **kwargs) # type: ignore[arg-type]
167174
write_header = True
168175

tests/unit/crawlers/_basic/test_basic_crawler.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,13 @@ async def test_crawler_push_and_export_data(tmp_path: Path) -> None:
732732
{'id': 1, 'test': 'test'},
733733
{'id': 2, 'test': 'test'},
734734
]
735-
assert (tmp_path / 'dataset.csv').read_bytes() == b'id,test\r\n0,test\r\n1,test\r\n2,test\r\n'
735+
736+
# On Windows, text mode file writes convert \n to \r\n, resulting in \r\n line endings.
737+
# On Unix/Linux, \n remains as \n.
738+
if sys.platform == 'win32':
739+
assert (tmp_path / 'dataset.csv').read_bytes() == b'id,test\r\n0,test\r\n1,test\r\n2,test\r\n'
740+
else:
741+
assert (tmp_path / 'dataset.csv').read_bytes() == b'id,test\n0,test\n1,test\n2,test\n'
736742

737743

738744
async def test_crawler_export_data_additional_kwargs(tmp_path: Path) -> None:
@@ -770,7 +776,12 @@ async def handler(context: BasicCrawlingContext) -> None:
770776
{'id': 2, 'test': 'test'},
771777
]
772778

773-
assert (tmp_path / 'dataset.csv').read_bytes() == b'id,test\r\n0,test\r\n1,test\r\n2,test\r\n'
779+
# On Windows, text mode file writes convert \n to \r\n, resulting in \r\n line endings.
780+
# On Unix/Linux, \n remains as \n.
781+
if sys.platform == 'win32':
782+
assert (tmp_path / 'dataset.csv').read_bytes() == b'id,test\r\n0,test\r\n1,test\r\n2,test\r\n'
783+
else:
784+
assert (tmp_path / 'dataset.csv').read_bytes() == b'id,test\n0,test\n1,test\n2,test\n'
774785

775786

776787
async def test_context_update_kv_store() -> None:

0 commit comments

Comments
 (0)