1010from abc import ABC , abstractmethod
1111from dataclasses import dataclass
1212from io import BufferedIOBase , TextIOWrapper
13- from typing import Any , Generator , MutableMapping , Optional
13+ from typing import Any , Dict , Generator , List , MutableMapping , Optional , Set , Tuple
1414
1515import orjson
1616import requests
@@ -28,7 +28,6 @@ class Parser(ABC):
2828 def parse (
2929 self ,
3030 data : BufferedIOBase ,
31- compressed : Optional [bool ] = False ,
3231 ) -> Generator [MutableMapping [str , Any ], None , None ]:
3332 """
3433 Parse data and yield dictionaries.
@@ -43,7 +42,6 @@ class GzipParser(Parser):
4342 def parse (
4443 self ,
4544 data : BufferedIOBase ,
46- compressed : Optional [bool ] = False ,
4745 ) -> Generator [MutableMapping [str , Any ], None , None ]:
4846 """
4947 Decompress gzipped bytes and pass decompressed data to the inner parser.
@@ -55,11 +53,8 @@ def parse(
5553 - The data is not decoded by default.
5654 """
5755
58- if compressed :
59- with gzip .GzipFile (fileobj = data , mode = "rb" ) as gzipobj :
60- yield from self .inner_parser .parse (gzipobj )
61- else :
62- yield from self .inner_parser .parse (data )
56+ with gzip .GzipFile (fileobj = data , mode = "rb" ) as gzipobj :
57+ yield from self .inner_parser .parse (gzipobj )
6358
6459
6560@dataclass
@@ -69,7 +64,6 @@ class JsonParser(Parser):
6964 def parse (
7065 self ,
7166 data : BufferedIOBase ,
72- compressed : Optional [bool ] = False ,
7367 ) -> Generator [MutableMapping [str , Any ], None , None ]:
7468 """
7569 Attempts to deserialize data using orjson library. As an extra layer of safety we fallback on the json library to deserialize the data.
@@ -113,7 +107,6 @@ class JsonLineParser(Parser):
113107 def parse (
114108 self ,
115109 data : BufferedIOBase ,
116- compressed : Optional [bool ] = False ,
117110 ) -> Generator [MutableMapping [str , Any ], None , None ]:
118111 for line in data :
119112 try :
@@ -141,7 +134,6 @@ def _get_delimiter(self) -> Optional[str]:
141134 def parse (
142135 self ,
143136 data : BufferedIOBase ,
144- compressed : Optional [bool ] = False ,
145137 ) -> Generator [MutableMapping [str , Any ], None , None ]:
146138 """
147139 Parse CSV data from decompressed bytes.
@@ -152,7 +144,9 @@ def parse(
152144 yield row
153145
154146
155- @dataclass
147+ _HEADER = str
148+ _HEADER_VALUE = str
149+
156150class CompositeRawDecoder (Decoder ):
157151 """
158152 Decoder strategy to transform a requests.Response into a Generator[MutableMapping[str, Any], None, None]
@@ -168,26 +162,46 @@ class CompositeRawDecoder(Decoder):
168162 )
169163 """
170164
171- parser : Parser
172- stream_response : bool = True
165+ @classmethod
166+ def by_headers (cls , parsers : List [Tuple [Set [_HEADER ], Set [_HEADER_VALUE ], Parser ]], stream_response : bool , fallback_parser : Parser ) -> "CompositeRawDecoder" :
167+ parsers_by_header = {}
168+ for headers , header_values , parser in parsers :
169+ for header in headers :
170+ parsers_by_header [header ] = {header_value : parser for header_value in header_values }
171+ return cls (fallback_parser , stream_response , parsers_by_header )
172+
173+ @classmethod
174+ def from_parser (cls , parser : Parser , stream_response : bool ) -> "CompositeRawDecoder" :
175+ return cls (parser , stream_response , {})
176+
177+ def __init__ (self , parser : Parser , stream_response : bool = True , parsers_by_header : Optional [Dict [_HEADER , Dict [_HEADER_VALUE , Parser ]]] = None ) -> None :
178+ self ._parsers_by_header = parsers_by_header if parsers_by_header else {}
179+ self ._fallback_parser = parser
180+ self ._stream_response = stream_response
173181
174182 def is_stream_response (self ) -> bool :
175- return self .stream_response
183+ return self ._stream_response
176184
177185 def decode (
178186 self ,
179187 response : requests .Response ,
180188 ) -> Generator [MutableMapping [str , Any ], None , None ]:
189+ parser = self ._select_parser (response )
181190 if self .is_stream_response ():
182191 # urllib mentions that some interfaces don't play nice with auto_close
183192 # More info here: https://urllib3.readthedocs.io/en/stable/user-guide.html#using-io-wrappers-with-response-content
184193 # We have indeed observed some issues with CSV parsing.
185194 # Hence, we will manage the closing of the file ourselves until we find a better solution.
186195 response .raw .auto_close = False
187- yield from self . parser .parse (
196+ yield from parser .parse (
188197 data = response .raw , # type: ignore[arg-type]
189- compressed = self .is_compressed_response (response ),
190198 )
191199 response .raw .close ()
192200 else :
193- yield from self .parser .parse (data = io .BytesIO (response .content ))
201+ yield from parser .parse (data = io .BytesIO (response .content ))
202+
203+ def _select_parser (self , response : requests .Response ) -> Parser :
204+ for header , parser_by_header_value in self ._parsers_by_header .items ():
205+ if header in response .headers and response .headers [header ] in parser_by_header_value .keys ():
206+ return parser_by_header_value [response .headers [header ]]
207+ return self ._fallback_parser
0 commit comments