22import logging
33import urllib
44import xmlrpc .client
5+ from string import hexdigits
6+ from typing import List , Set
57
68from .util import read_body
79
10+ _HEXDIG_CODE_POINTS : Set [int ] = {ord (s .encode ("ascii" )) for s in hexdigits }
11+
812log = logging .getLogger (__name__ )
913
1014
@@ -49,11 +53,17 @@ def raw_body(r1, r2):
4953
5054
5155def body (r1 , r2 ):
52- transformer = _get_transformer (r1 )
53- r2_transformer = _get_transformer (r2 )
54- if transformer != r2_transformer :
55- transformer = _identity
56- if transformer (read_body (r1 )) != transformer (read_body (r2 )):
56+ transformers = list (_get_transformers (r1 ))
57+ if transformers != list (_get_transformers (r2 )):
58+ transformers = []
59+
60+ b1 = read_body (r1 )
61+ b2 = read_body (r2 )
62+ for transform in transformers :
63+ b1 = transform (b1 )
64+ b2 = transform (b2 )
65+
66+ if b1 != b2 :
5767 raise AssertionError
5868
5969
@@ -72,6 +82,62 @@ def checker(headers):
7282 return checker
7383
7484
85+ def _dechunk (body ):
86+ if isinstance (body , str ):
87+ body = body .encode ("utf-8" )
88+ elif isinstance (body , bytearray ):
89+ body = bytes (body )
90+ elif hasattr (body , "__iter__" ):
91+ body = list (body )
92+ if body :
93+ if isinstance (body [0 ], str ):
94+ body = ("" .join (body )).encode ("utf-8" )
95+ elif isinstance (body [0 ], bytes ):
96+ body = b"" .join (body )
97+ elif isinstance (body [0 ], int ):
98+ body = bytes (body )
99+ else :
100+ raise ValueError (f"Body chunk type { type (body [0 ])} not supported" )
101+ else :
102+ body = None
103+
104+ if not isinstance (body , bytes ):
105+ return body
106+
107+ # Now decode chunked data format (https://en.wikipedia.org/wiki/Chunked_transfer_encoding)
108+ # Example input: b"45\r\n<69 bytes>\r\n0\r\n\r\n" where int(b"45", 16) == 69.
109+ CHUNK_GAP = b"\r \n "
110+ BODY_LEN : int = len (body )
111+
112+ chunks : List [bytes ] = []
113+ pos : int = 0
114+
115+ while True :
116+ for i in range (pos , BODY_LEN ):
117+ if body [i ] not in _HEXDIG_CODE_POINTS :
118+ break
119+
120+ if i == 0 or body [i : i + len (CHUNK_GAP )] != CHUNK_GAP :
121+ if pos == 0 :
122+ return body # i.e. assume non-chunk data
123+ raise ValueError ("Malformed chunked data" )
124+
125+ size_bytes = int (body [pos :i ], 16 )
126+ if size_bytes == 0 : # i.e. well-formed ending
127+ return b"" .join (chunks )
128+
129+ chunk_data_first = i + len (CHUNK_GAP )
130+ chunk_data_after_last = chunk_data_first + size_bytes
131+
132+ if body [chunk_data_after_last : chunk_data_after_last + len (CHUNK_GAP )] != CHUNK_GAP :
133+ raise ValueError ("Malformed chunked data" )
134+
135+ chunk_data = body [chunk_data_first :chunk_data_after_last ]
136+ chunks .append (chunk_data )
137+
138+ pos = chunk_data_after_last + len (CHUNK_GAP )
139+
140+
75141def _transform_json (body ):
76142 if body :
77143 return json .loads (body )
@@ -80,6 +146,7 @@ def _transform_json(body):
80146_xml_header_checker = _header_checker ("text/xml" )
81147_xmlrpc_header_checker = _header_checker ("xmlrpc" , header = "User-Agent" )
82148_checker_transformer_pairs = (
149+ (_header_checker ("chunked" , header = "Transfer-Encoding" ), _dechunk ),
83150 (
84151 _header_checker ("application/x-www-form-urlencoded" ),
85152 lambda body : urllib .parse .parse_qs (body .decode ("ascii" )),
@@ -89,16 +156,10 @@ def _transform_json(body):
89156)
90157
91158
92- def _identity (x ):
93- return x
94-
95-
96- def _get_transformer (request ):
159+ def _get_transformers (request ):
97160 for checker , transformer in _checker_transformer_pairs :
98161 if checker (request .headers ):
99- return transformer
100- else :
101- return _identity
162+ yield transformer
102163
103164
104165def requests_match (r1 , r2 , matchers ):
0 commit comments