Skip to content

Commit 5364059

Browse files
author
maxi297
committed
Improve documentation on the issue
1 parent f276a6e commit 5364059

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ def decode(
151151
self, response: requests.Response
152152
) -> Generator[MutableMapping[str, Any], None, None]:
153153
if self.is_stream_response():
154+
# urllib mentions that some interfaces don't play nice with auto_close [here](https://urllib3.readthedocs.io/en/stable/user-guide.html#using-io-wrappers-with-response-content)
155+
# We have indeed observed some issues with CSV parsing. Hence, we will manage the closing of the file ourselves until we find a better solution.
154156
response.raw.auto_close = False
155157
yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
156158
response.raw.close()

unit_tests/sources/declarative/decoders/test_composite_decoder.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,15 @@ def do_GET(self) -> None:
214214
def test_composite_raw_decoder_csv_parser_without_mocked_response():
215215
"""
216216
This test reproduce a `ValueError: I/O operation on closed file` error we had with CSV parsing. We could not catch this with other tests because the closing of the mocked response from requests_mock was not the same as the one in requests.
217+
218+
We first identified this issue while working with the sample defined https://people.sc.fsu.edu/~jburkardt/data/csv/addresses.csv.
219+
This should be reproducible by having the test server return the `self.wfile.write` statement as a comment below but it does not. However, it wasn't reproducible.
220+
221+
Currently we use `self.wfile.write(bytes("col1,col2\nval1,val2", "utf-8"))` to reproduce which we know is not a valid csv as it does not end with a newline character. However, this is the only we were able to reproduce locally.
217222
"""
223+
# self.wfile.write(bytes('John,Doe,120 jefferson st.,Riverside, NJ, 08075\nJack,McGinnis,220 hobo Av.,Phila, PA,09119\n"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075\nStephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234\n,Blankman,,SomeTown, SD, 00298\n"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123\n', "utf-8"))
224+
225+
218226
# start server
219227
httpd = HTTPServer(("localhost", 8080), TestServer)
220228
thread = Thread(target=httpd.serve_forever, args=())
@@ -224,8 +232,7 @@ def test_composite_raw_decoder_csv_parser_without_mocked_response():
224232
result = list(CompositeRawDecoder(parser=CsvParser()).decode(response))
225233

226234
assert len(result) == 1
227-
httpd.shutdown() # release port
228-
thread.join()
235+
httpd.shutdown() # release port and kill the thread
229236

230237

231238
def test_given_response_already_consumed_when_decode_then_no_data_is_returned(requests_mock):

0 commit comments

Comments
 (0)