|
| 1 | +import io |
| 2 | +import pytest |
| 3 | +from prepdocslib.csvparser import CsvParser # Adjust import to the correct module |
| 4 | + |
| 5 | +@pytest.mark.asyncio |
| 6 | +async def test_csvparser_single_row(): |
| 7 | + # Mock CSV content with a single row in binary format |
| 8 | + file = io.BytesIO(b"col1,col2,col3\nvalue1,value2,value3") |
| 9 | + file.name = "test.csv" |
| 10 | + csvparser = CsvParser() |
| 11 | + |
| 12 | + # Parse the file |
| 13 | + pages = [page async for page in csvparser.parse(file)] |
| 14 | + |
| 15 | + # Assertions |
| 16 | + assert len(pages) == 1 |
| 17 | + assert pages[0].page_num == 0 |
| 18 | + assert pages[0].offset == 0 |
| 19 | + assert pages[0].text == "value1,value2,value3" |
| 20 | + |
| 21 | + |
| 22 | +@pytest.mark.asyncio |
| 23 | +async def test_csvparser_multiple_rows(): |
| 24 | + # Mock CSV content with multiple rows in binary format |
| 25 | + file = io.BytesIO(b"col1,col2,col3\nvalue1,value2,value3\nvalue4,value5,value6") |
| 26 | + file.name = "test.csv" |
| 27 | + csvparser = CsvParser() |
| 28 | + |
| 29 | + # Parse the file |
| 30 | + pages = [page async for page in csvparser.parse(file)] |
| 31 | + |
| 32 | + # Assertions |
| 33 | + assert len(pages) == 2 # Expect only data rows, skipping the header |
| 34 | + assert pages[0].page_num == 0 |
| 35 | + assert pages[0].offset == 0 |
| 36 | + assert pages[0].text == "value1,value2,value3" |
| 37 | + |
| 38 | + assert pages[1].page_num == 1 |
| 39 | + assert pages[1].offset == len(pages[0].text) + 1 # Length of the first row plus a newline |
| 40 | + assert pages[1].text == "value4,value5,value6" |
| 41 | + |
| 42 | + |
| 43 | +@pytest.mark.asyncio |
| 44 | +async def test_csvparser_empty_file(): |
| 45 | + # Mock empty CSV content in binary format |
| 46 | + file = io.BytesIO(b"") |
| 47 | + file.name = "test.csv" |
| 48 | + csvparser = CsvParser() |
| 49 | + |
| 50 | + # Parse the file |
| 51 | + pages = [page async for page in csvparser.parse(file)] |
| 52 | + |
| 53 | + # Assertions |
| 54 | + assert len(pages) == 0 # No rows should be parsed from an empty file |
0 commit comments