Skip to content

Commit 451c462

Browse files
committed
fix: replace 0 bytes instead of raising an exception
This factorize handling of 0 bytes instead of leaving it up to all callers.
1 parent 59f2926 commit 451c462

File tree

5 files changed

+94
-13
lines changed

5 files changed

+94
-13
lines changed

pygitguardian/client.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,10 @@ def content_scan(
236236
extra_headers: Optional[Dict[str, str]] = None,
237237
) -> Union[Detail, ScanResult]:
238238
"""
239-
content_scan handles the /scan endpoint of the API
239+
content_scan handles the /scan endpoint of the API.
240+
241+
If document contains `0` bytes, they will be replaced with the Unicode
242+
replacement character.
240243
241244
:param filename: name of file, example: "intro.py"
242245
:param document: content of file
@@ -272,7 +275,10 @@ def multi_content_scan(
272275
extra_headers: Optional[Dict[str, str]] = None,
273276
) -> Union[Detail, MultiScanResult]:
274277
"""
275-
multi_content_scan handles the /multiscan endpoint of the API
278+
multi_content_scan handles the /multiscan endpoint of the API.
279+
280+
If documents contain `0` bytes, they will be replaced with the Unicode
281+
replacement character.
276282
277283
:param documents: List of dictionaries containing the keys document
278284
and, optionally, filename.

pygitguardian/models.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class DocumentSchema(BaseSchema):
5252
document = fields.String(required=True)
5353

5454
@validates("document")
55-
def validate_document(self, document: str) -> str:
55+
def validate_document(self, document: str) -> None:
5656
"""
5757
validate that document is smaller than scan limit
5858
"""
@@ -64,10 +64,12 @@ def validate_document(self, document: str) -> str:
6464
)
6565
)
6666

67-
if "\x00" in document:
68-
raise ValidationError("document has null characters")
69-
70-
return document
67+
@post_load
68+
def replace_0_bytes(self, in_data: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]:
69+
doc = in_data["document"]
70+
# Our API does not accept 0 bytes in documents, so replace them with the replacement character
71+
in_data["document"] = doc.replace("\0", "\uFFFD")
72+
return in_data
7173

7274

7375
class Document(Base):
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
interactions:
2+
- request:
3+
body: '{"document": "Hello World"}'
4+
headers:
5+
Accept:
6+
- '*/*'
7+
Accept-Encoding:
8+
- gzip, deflate
9+
Connection:
10+
- keep-alive
11+
Content-Length:
12+
- '27'
13+
Content-Type:
14+
- application/json
15+
User-Agent:
16+
- pygitguardian/1.3.4 (Linux;py3.8.10)
17+
method: POST
18+
uri: https://api.gitguardian.com/v1/scan
19+
response:
20+
body:
21+
string:
22+
'{"policy_break_count":0,"policies":["File extensions","Filenames","Secrets
23+
detection"],"policy_breaks":[]}'
24+
headers:
25+
Access-Control-Expose-Headers:
26+
- X-App-Version
27+
Allow:
28+
- POST, OPTIONS
29+
Connection:
30+
- keep-alive
31+
Content-Length:
32+
- '106'
33+
Content-Type:
34+
- application/json
35+
Date:
36+
- Fri, 24 Jun 2022 16:08:40 GMT
37+
Referrer-Policy:
38+
- strict-origin-when-cross-origin
39+
Server:
40+
- nginx
41+
Set-Cookie:
42+
- AWSALB=jzG+lNYQFwVa/HLEk17W6yiGRSKg6NTA2/1+uOmn+n5jG7J03MudYdFdbtJdN7+y9jwsoul66j7dHclQD7B8ZRa4FWTZJO3AeCHhfcZQxhwEb5uko4OvEhi9jD2o;
43+
Expires=Fri, 01 Jul 2022 16:08:40 GMT; Path=/
44+
- AWSALBCORS=jzG+lNYQFwVa/HLEk17W6yiGRSKg6NTA2/1+uOmn+n5jG7J03MudYdFdbtJdN7+y9jwsoul66j7dHclQD7B8ZRa4FWTZJO3AeCHhfcZQxhwEb5uko4OvEhi9jD2o;
45+
Expires=Fri, 01 Jul 2022 16:08:40 GMT; Path=/; SameSite=None; Secure
46+
Strict-Transport-Security:
47+
- max-age=31536000; includeSubDomains
48+
Vary:
49+
- Cookie
50+
X-App-Version:
51+
- v2.7.5
52+
X-Content-Type-Options:
53+
- nosniff
54+
- nosniff
55+
X-Frame-Options:
56+
- DENY
57+
- SAMEORIGIN
58+
X-Secrets-Engine-Version:
59+
- 2.69.0
60+
X-XSS-Protection:
61+
- 1; mode=block
62+
status:
63+
code: 200
64+
message: OK
65+
version: 1

tests/test_client.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -362,12 +362,6 @@ def test_multi_content_scan(
362362
r"file exceeds the maximum allowed size",
363363
id="too large file",
364364
),
365-
pytest.param(
366-
"dwhewe\x00ddw",
367-
ValidationError,
368-
r"document has null characters",
369-
id="invalid type",
370-
),
371365
],
372366
)
373367
def test_content_scan_exceptions(
@@ -437,6 +431,14 @@ def test_content_not_ok():
437431
True,
438432
id="secret with validity",
439433
),
434+
pytest.param(
435+
"document_with_0_bytes",
436+
{"document": "Hello\0World"},
437+
0,
438+
False,
439+
False,
440+
id="Document containing a 0 byte",
441+
),
440442
pytest.param(
441443
"filename",
442444
{"filename": FILENAME, "document": "normal"},

tests/test_models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ def test_document_model(self):
3232
assert isinstance(document.to_dict(), dict)
3333
assert isinstance(str(document), str)
3434

35+
def test_document_handle_0_bytes(self):
36+
document = Document.SCHEMA.load(
37+
{"filename": "name", "document": "hello\0world"}
38+
)
39+
assert document["document"] == "hello\uFFFDworld"
40+
3541
@pytest.mark.parametrize(
3642
"schema_klass, expected_klass, instance_data",
3743
[

0 commit comments

Comments
 (0)