1
1
import csv
2
2
from typing import IO , AsyncGenerator
3
+
3
4
from .page import Page
4
5
from .parser import Parser
5
6
@@ -10,11 +11,20 @@ class CsvParser(Parser):
10
11
"""
11
12
12
13
async def parse (self , content : IO ) -> AsyncGenerator [Page , None ]:
13
- # Ensure the file is read in text mode
14
- text_content = content .read ().decode ('utf-8' ) # Decode bytes to string if opened in binary mode
15
- reader = csv .reader (text_content .splitlines ()) # Create CSV reader from text lines
14
+ # Check if content is in bytes (binary file) and decode to string
15
+ if isinstance (content , (bytes , bytearray )):
16
+ content = content .decode ("utf-8" )
17
+ elif hasattr (content , "read" ): # Handle BufferedReader
18
+ content = content .read ().decode ("utf-8" )
19
+
20
+ # Create a CSV reader from the text content
21
+ reader = csv .reader (content .splitlines ())
16
22
offset = 0
23
+
24
+ # Skip the header row
25
+ next (reader , None )
26
+
17
27
for i , row in enumerate (reader ):
18
- page_text = "," .join (row ) # Combine CSV row elements back to a string
28
+ page_text = "," .join (row )
19
29
yield Page (i , offset , page_text )
20
- offset += len (page_text ) + 1 # Add 1 for the newline character or comma
30
+ offset += len (page_text ) + 1 # Account for newline character
0 commit comments