Skip to content

Commit 11a31ce

Browse files
authored
Merge pull request #612 from atlanhq/APP-6435
APP-6435: Added support for `vcrpy` test utilities to mock `HTTP` interactions with 3rd-party `APIs`
2 parents 09d5423 + a13768f commit 11a31ce

16 files changed

+753
-4
lines changed

.github/workflows/pyatlan-pr.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ jobs:
5656

5757
- name: Install dependencies
5858
run: |
59-
python -m pip install --no-cache-dir --upgrade pip
59+
python -m pip install --no-cache-dir --upgrade pip setuptools
6060
if [ -f requirements.txt ]; then pip install --no-cache-dir -r requirements.txt; fi
6161
if [ -f requirements-dev.txt ]; then pip install --no-cache-dir -r requirements-dev.txt; fi
6262
@@ -102,7 +102,7 @@ jobs:
102102

103103
- name: Install dependencies
104104
run: |
105-
python -m pip install --no-cache-dir --upgrade pip
105+
python -m pip install --no-cache-dir --upgrade pip setuptools
106106
if [ -f requirements.txt ]; then pip install --no-cache-dir -r requirements.txt; fi
107107
if [ -f requirements-dev.txt ]; then pip install --no-cache-dir -r requirements-dev.txt; fi
108108

pyatlan/errors.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,17 @@ def __str__(self):
5151
)
5252

5353

54+
class DependencyNotFoundError(Exception):
55+
"""
56+
Raised when a required external dependency is not installed.
57+
58+
This exception is typically used to indicate that an optional library
59+
or plugin needed for a specific feature is missing.
60+
"""
61+
62+
pass
63+
64+
5465
class ApiConnectionError(AtlanError):
5566
"""Error that occurs when there is an intermittent issue with the API, such as a network outage or an inability
5667
to connect due to an incorrect URL."""

pyatlan/test_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
# Copyright 2024 Atlan Pte. Ltd.
2+
# Copyright 2025 Atlan Pte. Ltd.
33
import logging
44
import random
55
from os import path

pyatlan/test_utils/base_vcr.py

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# Copyright 2025 Atlan Pte. Ltd.
3+
4+
import pkg_resources # type: ignore[import-untyped]
5+
6+
from pyatlan.errors import DependencyNotFoundError
7+
8+
# Check if pytest-vcr plugin is installed
9+
try:
10+
pkg_resources.get_distribution("pytest-vcr")
11+
except pkg_resources.DistributionNotFound:
12+
raise DependencyNotFoundError(
13+
"pytest-vcr plugin is not installed. Please install pytest-vcr."
14+
)
15+
16+
# Check if vcrpy is installed and ensure the version is 6.0.x
17+
try:
18+
vcr_version = pkg_resources.get_distribution("vcrpy").version
19+
if not vcr_version.startswith("6.0"):
20+
raise DependencyNotFoundError(
21+
f"vcrpy version 6.0.x is required, but found {vcr_version}. Please install the correct version."
22+
)
23+
except pkg_resources.DistributionNotFound:
24+
raise DependencyNotFoundError(
25+
"vcrpy version 6.0.x is not installed. Please install vcrpy version 6.0.x."
26+
)
27+
28+
import json
29+
import os
30+
from typing import Any, Dict, Union
31+
32+
import pytest
33+
import yaml # type: ignore[import-untyped]
34+
35+
36+
class LiteralBlockScalar(str):
37+
"""Formats the string as a literal block scalar, preserving whitespace and
38+
without interpreting escape characters"""
39+
40+
41+
def literal_block_scalar_presenter(dumper, data):
42+
"""Represents a scalar string as a literal block, via '|' syntax"""
43+
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
44+
45+
46+
yaml.add_representer(LiteralBlockScalar, literal_block_scalar_presenter)
47+
48+
49+
def process_string_value(string_value):
50+
"""Pretty-prints JSON or returns long strings as a LiteralBlockScalar"""
51+
try:
52+
json_data = json.loads(string_value)
53+
return LiteralBlockScalar(json.dumps(json_data, indent=2))
54+
except (ValueError, TypeError):
55+
if len(string_value) > 80:
56+
return LiteralBlockScalar(string_value)
57+
return string_value
58+
59+
60+
def convert_body_to_literal(data):
61+
"""Searches the data for body strings, attempting to pretty-print JSON"""
62+
if isinstance(data, dict):
63+
for key, value in data.items():
64+
# Handle response body case (e.g: response.body.string)
65+
if key == "body" and isinstance(value, dict) and "string" in value:
66+
value["string"] = process_string_value(value["string"])
67+
68+
# Handle request body case (e.g: request.body)
69+
elif key == "body" and isinstance(value, str):
70+
data[key] = process_string_value(value)
71+
72+
else:
73+
convert_body_to_literal(value)
74+
75+
elif isinstance(data, list):
76+
for idx, choice in enumerate(data):
77+
data[idx] = convert_body_to_literal(choice)
78+
79+
return data
80+
81+
82+
class VCRPrettyPrintYamlJSONBody:
83+
"""This makes request and response YAML JSON body recordings more readable."""
84+
85+
@staticmethod
86+
def serialize(cassette_dict):
87+
cassette_dict = convert_body_to_literal(cassette_dict)
88+
return yaml.dump(cassette_dict, default_flow_style=False, allow_unicode=True)
89+
90+
@staticmethod
91+
def deserialize(cassette_string):
92+
return yaml.safe_load(cassette_string)
93+
94+
95+
class VCRPrettyPrintJSONBody:
96+
"""Makes request and response JSON body recordings more readable."""
97+
98+
@staticmethod
99+
def _parse_json_body(
100+
body: Union[str, bytes, None],
101+
) -> Union[Dict[str, Any], str, None, bytes]:
102+
"""Parse JSON body if possible, otherwise return the original body."""
103+
if body is None:
104+
return None
105+
106+
# Convert bytes to string if needed
107+
if isinstance(body, bytes):
108+
try:
109+
body = body.decode("utf-8")
110+
except UnicodeDecodeError:
111+
return body # Return original if can't decode
112+
113+
# If it's a string, try to parse as JSON
114+
if isinstance(body, str):
115+
try:
116+
return json.loads(body)
117+
except json.JSONDecodeError:
118+
return body # Return original if not valid JSON
119+
120+
return body # Return original for other types
121+
122+
@staticmethod
123+
def serialize(cassette_dict: dict) -> str:
124+
"""
125+
Converts body strings to parsed JSON objects for better readability when possible.
126+
"""
127+
# Safety check for cassette_dict
128+
if not cassette_dict or not isinstance(cassette_dict, dict):
129+
cassette_dict = {}
130+
131+
interactions = cassette_dict.get("interactions", []) or []
132+
133+
for interaction in interactions:
134+
if not interaction:
135+
continue
136+
137+
# Handle response body
138+
response = interaction.get("response") or {}
139+
body_container = response.get("body")
140+
if isinstance(body_container, dict) and "string" in body_container:
141+
parsed_body = VCRPrettyPrintJSONBody._parse_json_body(
142+
body_container["string"]
143+
)
144+
if isinstance(parsed_body, dict):
145+
# Replace string field with parsed_json field
146+
response["body"] = {"parsed_json": parsed_body}
147+
148+
# Handle request body
149+
request = interaction.get("request") or {}
150+
body_container = request.get("body")
151+
if isinstance(body_container, dict) and "string" in body_container:
152+
parsed_body = VCRPrettyPrintJSONBody._parse_json_body(
153+
body_container["string"]
154+
)
155+
if isinstance(parsed_body, dict):
156+
# Replace string field with parsed_json field
157+
request["body"] = {"parsed_json": parsed_body}
158+
159+
# Serialize the final dictionary into a JSON string with pretty formatting
160+
try:
161+
return json.dumps(cassette_dict, indent=2, ensure_ascii=False) + "\n"
162+
except TypeError as exc:
163+
raise TypeError(
164+
"Does this HTTP interaction contain binary data? "
165+
"If so, use a different serializer (like the YAML serializer)."
166+
) from exc
167+
168+
@staticmethod
169+
def deserialize(cassette_string: str) -> dict:
170+
"""
171+
Deserializes a JSON string into a dictionary and converts
172+
parsed_json fields back to string fields.
173+
"""
174+
# Safety check for cassette_string
175+
if not cassette_string:
176+
return {}
177+
178+
try:
179+
cassette_dict = json.loads(cassette_string)
180+
except json.JSONDecodeError:
181+
return {}
182+
183+
# Convert parsed_json back to string format
184+
interactions = cassette_dict.get("interactions", []) or []
185+
186+
for interaction in interactions:
187+
if not interaction:
188+
continue
189+
190+
# Handle response body
191+
response = interaction.get("response") or {}
192+
body_container = response.get("body")
193+
if isinstance(body_container, dict) and "parsed_json" in body_container:
194+
json_body = body_container["parsed_json"]
195+
response["body"] = {"string": json.dumps(json_body)}
196+
197+
# Handle request body
198+
request = interaction.get("request") or {}
199+
body_container = request.get("body")
200+
if isinstance(body_container, dict) and "parsed_json" in body_container:
201+
json_body = body_container["parsed_json"]
202+
request["body"] = {"string": json.dumps(json_body)}
203+
204+
return cassette_dict
205+
206+
207+
class BaseVCR:
208+
"""
209+
A base class for configuring VCR (Virtual Cassette Recorder)
210+
for HTTP request/response recording and replaying.
211+
212+
This class provides pytest fixtures to set up the VCR configuration
213+
and custom serializers for JSON and YAML formats.
214+
It also handles cassette directory configuration.
215+
"""
216+
217+
class VCRRemoveAllHeaders:
218+
"""
219+
A class responsible for removing all headers from requests and responses.
220+
This can be useful for scenarios where headers are not needed for matching or comparison
221+
in VCR (Virtual Cassette Recorder) interactions, such as when recording or replaying HTTP requests.
222+
"""
223+
224+
@staticmethod
225+
def remove_all_request_headers(request):
226+
# Save only what's necessary for matching
227+
request.headers = {}
228+
return request
229+
230+
@staticmethod
231+
def remove_all_response_headers(response):
232+
# Save only what's necessary for matching
233+
response["headers"] = {}
234+
return response
235+
236+
_CASSETTES_DIR = None
237+
_BASE_CONFIG = {
238+
# More config options can be found at:
239+
# https://vcrpy.readthedocs.io/en/latest/configuration.html#configuration
240+
"record_mode": "once", # (default: "once", "always", "none", "new_episodes")
241+
"serializer": "pretty-yaml", # (default: "yaml")
242+
"decode_compressed_response": True, # Decode compressed responses
243+
# (optional) Replace the Authorization request header with "**REDACTED**" in cassettes
244+
# "filter_headers": [("authorization", "**REDACTED**")],
245+
"before_record_request": VCRRemoveAllHeaders.remove_all_request_headers,
246+
"before_record_response": VCRRemoveAllHeaders.remove_all_response_headers,
247+
}
248+
249+
@pytest.fixture(scope="module")
250+
def vcr(self, vcr):
251+
"""
252+
Registers custom serializers for VCR and returns the VCR instance.
253+
254+
The method registers two custom serializers:
255+
- "pretty-json" for pretty-printing JSON responses.
256+
- "pretty-yaml" for pretty-printing YAML responses.
257+
258+
:param vcr: The VCR instance provided by the pytest-vcr plugin
259+
:returns: modified VCR instance with custom serializers registered
260+
"""
261+
vcr.register_serializer("pretty-json", VCRPrettyPrintJSONBody)
262+
vcr.register_serializer("pretty-yaml", VCRPrettyPrintYamlJSONBody)
263+
return vcr
264+
265+
@pytest.fixture(scope="module")
266+
def vcr_config(self):
267+
"""
268+
Provides the VCR configuration dictionary.
269+
270+
The configuration includes default options for the recording mode,
271+
serializer, response decoding, and filtering headers.
272+
This configuration is used to set up VCR behavior during tests.
273+
274+
:returns: a dictionary with VCR configuration options
275+
"""
276+
return self._BASE_CONFIG
277+
278+
@pytest.fixture(scope="module")
279+
def vcr_cassette_dir(self, request):
280+
"""
281+
Provides the directory path for storing VCR cassettes.
282+
283+
If a custom cassette directory is set in the class, it is used;
284+
otherwise, the default directory structure is created under "tests/cassettes".
285+
The directory path will be based on the module name.
286+
287+
:param request: request object which provides metadata about the test
288+
289+
:returns: directory path for storing cassettes
290+
"""
291+
# Set self._CASSETTES_DIR or use the default directory path based on the test module name
292+
return self._CASSETTES_DIR or os.path.join(
293+
"tests/vcr_cassettes", request.module.__name__
294+
)

requirements-dev.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
mypy~=1.9.0
22
ruff~=0.9.9
3-
types-requests~=2.32.0.20241016
3+
# [PINNED] for Python 3.8 compatibility
4+
# higher versions require urllib3>=2.0
5+
types-requests~=2.31.0.6
6+
types-setuptools~=75.8.0.20250110
47
pytest~=8.3.4
8+
pytest-vcr~=1.0.2
9+
# [PINNED] to v6.x since vcrpy>=7.0 requires urllib3>=2.0
10+
# which breaks compatibility with Python 3.8
11+
vcrpy~=6.0.2
512
pytest-order~=1.3.0
613
pytest-timer[termcolor]~=1.0.0
714
pytest-sugar~=1.0.0

tests/unit/conftest.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,19 @@ def mock_custom_metadata_cache():
3131
def mock_tag_cache():
3232
with patch("pyatlan.cache.atlan_tag_cache.AtlanTagCache") as cache:
3333
yield cache
34+
35+
36+
@pytest.fixture(autouse=True)
37+
def patch_vcr_http_response_version_string():
38+
"""
39+
Patch the VCRHTTPResponse class to add a version_string attribute if it doesn't exist.
40+
41+
This patch is necessary to avoid bumping vcrpy to 7.0.0,
42+
which drops support for Python 3.8.
43+
"""
44+
from vcr.stubs import VCRHTTPResponse # type: ignore[import-untyped]
45+
46+
if not hasattr(VCRHTTPResponse, "version_string"):
47+
VCRHTTPResponse.version_string = None
48+
49+
yield

0 commit comments

Comments
 (0)