@@ -30,35 +30,35 @@ def compress_with_gzip(data: str, encoding: str = "utf-8"):
3030 return buf .getvalue ()
3131
3232
33- def generate_csv (encoding : str ) -> bytes :
34- """
35- Generate CSV data with tab-separated values (\t ).
36- """
33+ def generate_csv (
34+ encoding : str = "utf-8" , delimiter : str = "," , should_compress : bool = False
35+ ) -> bytes :
3736 data = [
38- {"id" : 1 , "name" : "John" , "age" : 28 },
39- {"id" : 2 , "name" : "Alice" , "age" : 34 },
40- {"id" : 3 , "name" : "Bob" , "age" : 25 },
37+ {"id" : "1" , "name" : "John" , "age" : "28" },
38+ {"id" : "2" , "name" : "Alice" , "age" : "34" },
39+ {"id" : "3" , "name" : "Bob" , "age" : "25" },
4140 ]
4241
4342 output = StringIO ()
44- writer = csv .DictWriter (output , fieldnames = ["id" , "name" , "age" ], delimiter = " \t " )
43+ writer = csv .DictWriter (output , fieldnames = ["id" , "name" , "age" ], delimiter = delimiter )
4544 writer .writeheader ()
4645 for row in data :
4746 writer .writerow (row )
4847
49- # Ensure the pointer is at the beginning of the buffer before compressing
5048 output .seek (0 )
49+ csv_data = output .read ()
5150
52- # Compress the CSV data with Gzip
53- compressed_data = compress_with_gzip (output .read (), encoding = encoding )
54-
55- return compressed_data
51+ if should_compress :
52+ return compress_with_gzip (csv_data , encoding = encoding )
53+ return csv_data .encode (encoding )
5654
5755
5856@pytest .mark .parametrize ("encoding" , ["utf-8" , "utf" , "iso-8859-1" ])
5957def test_composite_raw_decoder_gzip_csv_parser (requests_mock , encoding : str ):
6058 requests_mock .register_uri (
61- "GET" , "https://airbyte.io/" , content = generate_csv (encoding = encoding )
59+ "GET" ,
60+ "https://airbyte.io/" ,
61+ content = generate_csv (encoding = encoding , delimiter = "\t " , should_compress = True ),
6262 )
6363 response = requests .get ("https://airbyte.io/" , stream = True )
6464
@@ -175,3 +175,26 @@ def test_composite_raw_decoder_raises_traced_exception_when_both_parsers_fail(re
175175 with patch ("json.loads" , side_effect = Exception ("test" )):
176176 with pytest .raises (AirbyteTracedException ):
177177 list (composite_raw_decoder .decode (response ))
178+
179+
180+ @pytest .mark .parametrize ("encoding" , ["utf-8" , "utf" , "iso-8859-1" ])
181+ @pytest .mark .parametrize ("delimiter" , ["," , "\t " , ";" ])
182+ def test_composite_raw_decoder_csv_parser_values (requests_mock , encoding : str , delimiter : str ):
183+ requests_mock .register_uri (
184+ "GET" ,
185+ "https://airbyte.io/" ,
186+ content = generate_csv (encoding = encoding , delimiter = delimiter , should_compress = False ),
187+ )
188+ response = requests .get ("https://airbyte.io/" , stream = True )
189+
190+ parser = CsvParser (encoding = encoding , delimiter = delimiter )
191+ composite_raw_decoder = CompositeRawDecoder (parser = parser )
192+
193+ expected_data = [
194+ {"id" : "1" , "name" : "John" , "age" : "28" },
195+ {"id" : "2" , "name" : "Alice" , "age" : "34" },
196+ {"id" : "3" , "name" : "Bob" , "age" : "25" },
197+ ]
198+
199+ parsed_records = list (composite_raw_decoder .decode (response ))
200+ assert parsed_records == expected_data
0 commit comments