Skip to content

Commit 0c54209

Browse files
authored
Merge pull request #55 from GitGuardian/agateau/read-metadata
Add method to read server metadata
2 parents aba7be3 + 6b193a1 commit 0c54209

File tree

3 files changed

+76
-23
lines changed

3 files changed

+76
-23
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Added
2+
3+
- Added `GGClient.read_metadata()` to read metadata from the server. The metadata is then used by further secret scan calls and is available in a new `GGClient.secret_scan_preferences` attribute.

pygitguardian/client.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,7 @@
99
import requests
1010
from requests import Response, Session, codes
1111

12-
from .config import (
13-
DEFAULT_API_VERSION,
14-
DEFAULT_BASE_URI,
15-
DEFAULT_TIMEOUT,
16-
MULTI_DOCUMENT_LIMIT,
17-
)
12+
from .config import DEFAULT_API_VERSION, DEFAULT_BASE_URI, DEFAULT_TIMEOUT
1813
from .iac_models import (
1914
IaCScanParameters,
2015
IaCScanParametersSchema,
@@ -29,6 +24,8 @@
2924
MultiScanResult,
3025
QuotaResponse,
3126
ScanResult,
27+
SecretScanPreferences,
28+
ServerMetadata,
3229
)
3330

3431

@@ -121,6 +118,7 @@ class GGClient:
121118
timeout: Optional[float]
122119
user_agent: str
123120
extra_headers: Dict
121+
secret_scan_preferences: SecretScanPreferences
124122

125123
def __init__(
126124
self,
@@ -178,6 +176,7 @@ def __init__(
178176
"Authorization": f"Token {api_key}",
179177
},
180178
)
179+
self.secret_scan_preferences = SecretScanPreferences()
181180

182181
def request(
183182
self,
@@ -308,6 +307,9 @@ def content_scan(
308307
doc_dict["filename"] = filename
309308

310309
request_obj = Document.SCHEMA.load(doc_dict)
310+
Document.SCHEMA.validate_size(
311+
request_obj, self.secret_scan_preferences.maximum_document_size
312+
)
311313

312314
resp = self.post(
313315
endpoint="scan",
@@ -344,16 +346,22 @@ def multi_content_scan(
344346
:param ignore_known_secrets: indicates whether known secrets should be ignored
345347
:return: Detail or ScanResult response and status code
346348
"""
347-
if len(documents) > MULTI_DOCUMENT_LIMIT:
349+
max_documents = self.secret_scan_preferences.maximum_documents_per_scan
350+
if len(documents) > max_documents:
348351
raise ValueError(
349-
f"too many documents submitted for scan (max={MULTI_DOCUMENT_LIMIT})"
352+
f"too many documents submitted for scan (max={max_documents})"
350353
)
351354

352355
if all(isinstance(doc, dict) for doc in documents):
353356
request_obj = Document.SCHEMA.load(documents, many=True)
354357
else:
355358
raise TypeError("each document must be a dict")
356359

360+
for document in request_obj:
361+
Document.SCHEMA.validate_size(
362+
document, self.secret_scan_preferences.maximum_document_size
363+
)
364+
357365
params = (
358366
{"ignore_known_secrets": ignore_known_secrets}
359367
if ignore_known_secrets
@@ -472,3 +480,23 @@ def iac_directory_scan(
472480
result.status_code = resp.status_code
473481

474482
return result
483+
484+
def read_metadata(self) -> Optional[Detail]:
485+
"""
486+
Fetch server preferences and store them in `self.secret_scan_preferences`.
487+
These preferences are then used by all future secret scans.
488+
489+
Note that the call fails if the API key is not valid.
490+
491+
:return: a Detail instance in case of error, None otherwise
492+
"""
493+
resp = self.get("metadata")
494+
495+
if not is_ok(resp):
496+
result = load_detail(resp)
497+
result.status_code = resp.status_code
498+
return result
499+
metadata = ServerMetadata.SCHEMA.load(resp.json())
500+
501+
self.secret_scan_preferences = metadata.secret_scan_preferences
502+
return None

pygitguardian/models.py

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
from dataclasses import dataclass, field
12
from datetime import date, datetime
23
from typing import Any, ClassVar, Dict, List, Optional, cast
34
from uuid import UUID
45

6+
import marshmallow_dataclass
57
from marshmallow import (
68
EXCLUDE,
79
Schema,
@@ -10,10 +12,9 @@
1012
post_load,
1113
pre_load,
1214
validate,
13-
validates,
1415
)
1516

16-
from .config import DOCUMENT_SIZE_THRESHOLD_BYTES
17+
from .config import DOCUMENT_SIZE_THRESHOLD_BYTES, MULTI_DOCUMENT_LIMIT
1718

1819

1920
class BaseSchema(Schema):
@@ -25,8 +26,8 @@ class Meta:
2526
class Base:
2627
SCHEMA: ClassVar[BaseSchema]
2728

28-
def __init__(self) -> None:
29-
self.status_code: Optional[int] = None
29+
def __init__(self, status_code: Optional[int] = None) -> None:
30+
self.status_code = status_code
3031

3132
def to_json(self) -> str:
3233
"""
@@ -52,17 +53,18 @@ class DocumentSchema(BaseSchema):
5253
filename = fields.String(validate=validate.Length(max=256), allow_none=True)
5354
document = fields.String(required=True)
5455

55-
@validates("document")
56-
def validate_document(self, document: str) -> None:
57-
"""
58-
validate that document is smaller than scan limit
56+
@staticmethod
57+
def validate_size(document: Dict[str, Any], maximum_size: int) -> None:
58+
"""Raises a ValidationError if the content of the document is longer than
59+
`maximum_size`.
60+
61+
This is not implemented as a Marshmallow validator because the maximum size can
62+
vary.
5963
"""
60-
encoded = document.encode("utf-8", errors="replace")
61-
if len(encoded) > DOCUMENT_SIZE_THRESHOLD_BYTES:
64+
encoded = document["document"].encode("utf-8", errors="replace")
65+
if len(encoded) > maximum_size:
6266
raise ValidationError(
63-
"file exceeds the maximum allowed size of {}B".format(
64-
DOCUMENT_SIZE_THRESHOLD_BYTES
65-
)
67+
f"file exceeds the maximum allowed size of {maximum_size}B"
6668
)
6769

6870
@post_load
@@ -130,8 +132,8 @@ class Detail(Base):
130132

131133
SCHEMA = DetailSchema()
132134

133-
def __init__(self, detail: str, **kwargs: Any) -> None:
134-
super().__init__()
135+
def __init__(self, detail: str, status_code: Optional[int] = None, **kwargs: Any) -> None:
136+
super().__init__(status_code=status_code)
135137
self.detail = detail
136138

137139
def __repr__(self) -> str:
@@ -620,3 +622,23 @@ def __repr__(self) -> str:
620622
self.secrets_engine_version or "",
621623
)
622624
)
625+
626+
627+
@dataclass
628+
class SecretScanPreferences:
629+
maximum_document_size: int = DOCUMENT_SIZE_THRESHOLD_BYTES
630+
maximum_documents_per_scan: int = MULTI_DOCUMENT_LIMIT
631+
632+
633+
@dataclass
634+
class ServerMetadata(Base):
635+
version: str
636+
preferences: Dict[str, Any]
637+
secret_scan_preferences: SecretScanPreferences = field(
638+
default_factory=SecretScanPreferences
639+
)
640+
641+
642+
ServerMetadata.SCHEMA = marshmallow_dataclass.class_schema(
643+
ServerMetadata, base_schema=BaseSchema
644+
)()

0 commit comments

Comments
 (0)