Skip to content

Commit e7c00a4

Browse files
authored
Merge pull request #739 from kevin1024/issue-734-fix-body-matcher-for-chunked-requests
Fix body matcher for chunked requests (fixes #734)
2 parents 92dd4d0 + e69b10c commit e7c00a4

File tree

2 files changed

+107
-13
lines changed

2 files changed

+107
-13
lines changed

tests/unit/test_matchers.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ def test_uri_matcher():
6363
"Expect": b"100-continue",
6464
"Content-Length": "21",
6565
}
66+
chunked_headers = {
67+
"Transfer-Encoding": "chunked",
68+
}
6669

6770

6871
@pytest.mark.parametrize(
@@ -151,6 +154,36 @@ def test_uri_matcher():
151154
request.Request("POST", "http://aws.custom.com/", b"123", boto3_bytes_headers),
152155
request.Request("POST", "http://aws.custom.com/", b"123", boto3_bytes_headers),
153156
),
157+
(
158+
# chunked transfer encoding: decoded bytes versus encoded bytes
159+
request.Request("POST", "scheme1://host1.test/", b"123456789_123456", chunked_headers),
160+
request.Request(
161+
"GET",
162+
"scheme2://host2.test/",
163+
b"10\r\n123456789_123456\r\n0\r\n\r\n",
164+
chunked_headers,
165+
),
166+
),
167+
(
168+
# chunked transfer encoding: bytes iterator versus string iterator
169+
request.Request(
170+
"POST",
171+
"scheme1://host1.test/",
172+
iter([b"123456789_", b"123456"]),
173+
chunked_headers,
174+
),
175+
request.Request("GET", "scheme2://host2.test/", iter(["123456789_", "123456"]), chunked_headers),
176+
),
177+
(
178+
# chunked transfer encoding: bytes iterator versus single byte iterator
179+
request.Request(
180+
"POST",
181+
"scheme1://host1.test/",
182+
iter([b"123456789_", b"123456"]),
183+
chunked_headers,
184+
),
185+
request.Request("GET", "scheme2://host2.test/", iter(b"123456789_123456"), chunked_headers),
186+
),
154187
],
155188
)
156189
def test_body_matcher_does_match(r1, r2):

vcr/matchers.py

Lines changed: 74 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22
import logging
33
import urllib
44
import xmlrpc.client
5+
from string import hexdigits
6+
from typing import List, Set
57

68
from .util import read_body
79

10+
_HEXDIG_CODE_POINTS: Set[int] = {ord(s.encode("ascii")) for s in hexdigits}
11+
812
log = logging.getLogger(__name__)
913

1014

@@ -49,11 +53,17 @@ def raw_body(r1, r2):
4953

5054

5155
def body(r1, r2):
52-
transformer = _get_transformer(r1)
53-
r2_transformer = _get_transformer(r2)
54-
if transformer != r2_transformer:
55-
transformer = _identity
56-
if transformer(read_body(r1)) != transformer(read_body(r2)):
56+
transformers = list(_get_transformers(r1))
57+
if transformers != list(_get_transformers(r2)):
58+
transformers = []
59+
60+
b1 = read_body(r1)
61+
b2 = read_body(r2)
62+
for transform in transformers:
63+
b1 = transform(b1)
64+
b2 = transform(b2)
65+
66+
if b1 != b2:
5767
raise AssertionError
5868

5969

@@ -72,6 +82,62 @@ def checker(headers):
7282
return checker
7383

7484

85+
def _dechunk(body):
86+
if isinstance(body, str):
87+
body = body.encode("utf-8")
88+
elif isinstance(body, bytearray):
89+
body = bytes(body)
90+
elif hasattr(body, "__iter__"):
91+
body = list(body)
92+
if body:
93+
if isinstance(body[0], str):
94+
body = ("".join(body)).encode("utf-8")
95+
elif isinstance(body[0], bytes):
96+
body = b"".join(body)
97+
elif isinstance(body[0], int):
98+
body = bytes(body)
99+
else:
100+
raise ValueError(f"Body chunk type {type(body[0])} not supported")
101+
else:
102+
body = None
103+
104+
if not isinstance(body, bytes):
105+
return body
106+
107+
# Now decode chunked data format (https://en.wikipedia.org/wiki/Chunked_transfer_encoding)
108+
# Example input: b"45\r\n<69 bytes>\r\n0\r\n\r\n" where int(b"45", 16) == 69.
109+
CHUNK_GAP = b"\r\n"
110+
BODY_LEN: int = len(body)
111+
112+
chunks: List[bytes] = []
113+
pos: int = 0
114+
115+
while True:
116+
for i in range(pos, BODY_LEN):
117+
if body[i] not in _HEXDIG_CODE_POINTS:
118+
break
119+
120+
if i == 0 or body[i : i + len(CHUNK_GAP)] != CHUNK_GAP:
121+
if pos == 0:
122+
return body # i.e. assume non-chunk data
123+
raise ValueError("Malformed chunked data")
124+
125+
size_bytes = int(body[pos:i], 16)
126+
if size_bytes == 0: # i.e. well-formed ending
127+
return b"".join(chunks)
128+
129+
chunk_data_first = i + len(CHUNK_GAP)
130+
chunk_data_after_last = chunk_data_first + size_bytes
131+
132+
if body[chunk_data_after_last : chunk_data_after_last + len(CHUNK_GAP)] != CHUNK_GAP:
133+
raise ValueError("Malformed chunked data")
134+
135+
chunk_data = body[chunk_data_first:chunk_data_after_last]
136+
chunks.append(chunk_data)
137+
138+
pos = chunk_data_after_last + len(CHUNK_GAP)
139+
140+
75141
def _transform_json(body):
76142
if body:
77143
return json.loads(body)
@@ -80,6 +146,7 @@ def _transform_json(body):
80146
_xml_header_checker = _header_checker("text/xml")
81147
_xmlrpc_header_checker = _header_checker("xmlrpc", header="User-Agent")
82148
_checker_transformer_pairs = (
149+
(_header_checker("chunked", header="Transfer-Encoding"), _dechunk),
83150
(
84151
_header_checker("application/x-www-form-urlencoded"),
85152
lambda body: urllib.parse.parse_qs(body.decode("ascii")),
@@ -89,16 +156,10 @@ def _transform_json(body):
89156
)
90157

91158

92-
def _identity(x):
93-
return x
94-
95-
96-
def _get_transformer(request):
159+
def _get_transformers(request):
97160
for checker, transformer in _checker_transformer_pairs:
98161
if checker(request.headers):
99-
return transformer
100-
else:
101-
return _identity
162+
yield transformer
102163

103164

104165
def requests_match(r1, r2, matchers):

0 commit comments

Comments
 (0)