Skip to content

Commit ced9813

Browse files
authored
Support for parsing csv (#1357)
Signed-off-by: Mandana Vaziri <[email protected]>
1 parent 4286f82 commit ced9813

File tree

5 files changed

+34
-3
lines changed

5 files changed

+34
-3
lines changed

docs/tutorial.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ Bob lives at the following address:
378378
## Parsing the output of a block
379379

380380
As we saw in the previous section, it is possible to use the `parser: json` setting to parse the result of a block as a JSON.
381-
Other possible values for `parser` are `yaml`, `jsonl`, or `regex`.
381+
Other possible values for `parser` are `yaml`, `jsonl`, `regex`, or `csv`.
382382

383383
The following example extracts using a regular expression parser the code between triple backtick generated by a model:
384384

src/pdl/pdl-schema.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3998,7 +3998,8 @@
39983998
"enum": [
39993999
"json",
40004000
"jsonl",
4001-
"yaml"
4001+
"yaml",
4002+
"csv"
40024003
],
40034004
"type": "string"
40044005
},

src/pdl/pdl_ast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ class RegexParser(Parser):
301301

302302

303303
ParserType = TypeAliasType(
304-
"ParserType", Union[Literal["json", "jsonl", "yaml"], PdlParser, RegexParser]
304+
"ParserType", Union[Literal["json", "jsonl", "yaml", "csv"], PdlParser, RegexParser]
305305
)
306306
"""Different parsers."""
307307
OptionalParserType = TypeAliasType("OptionalParserType", Optional[ParserType])

src/pdl/pdl_interpreter.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# pylint: disable=import-outside-toplevel
2+
import csv
23
import json
34
import re
45
import shlex
@@ -13,6 +14,7 @@
1314
from abc import ABC, abstractmethod
1415
from concurrent.futures import ThreadPoolExecutor
1516
from functools import partial, reduce
17+
from io import StringIO
1618
from itertools import count
1719
from os import getenv
1820
from pathlib import Path
@@ -2647,6 +2649,18 @@ def parse_result(parser: ParserType, text: str) -> JSONReturnType:
26472649
raise PDLRuntimeParserError(
26482650
f"Attempted to parse ill-formed YAML: {repr(exc)}"
26492651
) from exc
2652+
case "csv":
2653+
try:
2654+
result = []
2655+
reader = csv.reader(StringIO(text))
2656+
for row in reader:
2657+
result.append(row)
2658+
except KeyboardInterrupt as exc:
2659+
raise exc from exc
2660+
except Exception as exc:
2661+
raise PDLRuntimeParserError(
2662+
f"Attempted to parse ill-formed CSV: {repr(exc)}"
2663+
) from exc
26502664
case PdlParser():
26512665
assert False, "TODO"
26522666
case RegexParser(mode="search" | "match" | "fullmatch"):

tests/test_parser.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,19 @@ def test_parser_case2():
172172
"""
173173
result = exec_str(prog)
174174
assert result == ["1", "2", "3", "4"]
175+
176+
177+
def test_parser_csv():
178+
csv_parser = """
179+
text: |
180+
1,Apple,Red
181+
2,Orange,Orange
182+
3,Banana,Yellow
183+
parser: csv
184+
"""
185+
result = exec_str(csv_parser)
186+
assert result == [
187+
["1", "Apple", "Red"],
188+
["2", "Orange", "Orange"],
189+
["3", "Banana", "Yellow"],
190+
]

0 commit comments

Comments
 (0)