Skip to content

Commit 99c2633

Browse files
author
Oleksandr Bazarnov
committed
fix
1 parent 978be1b commit 99c2633

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,16 @@ class CsvParser(Parser):
107107
encoding: Optional[str] = "utf-8"
108108
delimiter: Optional[str] = ","
109109

110+
def _get_delimiter(self) -> Optional[str]:
111+
"""
112+
Get delimiter from the configuration. Check for the escape character and decode it.
113+
"""
114+
if self.delimiter is not None:
115+
if self.delimiter.startswith("\\"):
116+
self.delimiter = self.delimiter.encode("utf-8").decode("unicode_escape")
117+
118+
return self.delimiter
119+
110120
def parse(
111121
self,
112122
data: BufferedIOBase,
@@ -115,7 +125,7 @@ def parse(
115125
Parse CSV data from decompressed bytes.
116126
"""
117127
text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore
118-
reader = csv.DictReader(text_data, delimiter=self.delimiter or ",")
128+
reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",")
119129
yield from reader
120130

121131

unit_tests/sources/declarative/decoders/test_composite_decoder.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ def test_composite_raw_decoder_gzip_csv_parser(requests_mock, encoding: str):
6262
)
6363
response = requests.get("https://airbyte.io/", stream=True)
6464

65-
parser = GzipParser(inner_parser=CsvParser(encoding=encoding, delimiter="\t"))
65+
# the delimiter is set to `\\t` intentionally to test the parsing logic here
66+
parser = GzipParser(inner_parser=CsvParser(encoding=encoding, delimiter="\\t"))
67+
6668
composite_raw_decoder = CompositeRawDecoder(parser=parser)
6769
counter = 0
6870
for _ in composite_raw_decoder.decode(response):

0 commit comments

Comments
 (0)