Skip to content

Commit db1e36b

Browse files
feat: Add HTTP proxy configuration modules to CDK
- Add airbyte_cdk.utils.http_proxy module with proxy configuration utilities - Add airbyte_cdk.models.http_proxy_config module with Pydantic model - Migrate functionality from airbyte PR #62451 source-file proxy.py - Include comprehensive unit tests for both modules - Update module exports in __init__.py files Co-Authored-By: AJ Steers <[email protected]>
1 parent 5824a5e commit db1e36b

File tree

6 files changed

+414
-1
lines changed

6 files changed

+414
-1
lines changed

airbyte_cdk/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
ConfiguredAirbyteStreamSerializer,
5858
ConnectorSpecificationSerializer,
5959
)
60+
from .http_proxy_config import HttpProxyConfig
6061
from .well_known_types import (
6162
BinaryData,
6263
Boolean,
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2+
"""HTTP proxy configuration models."""
3+
4+
from typing import Optional
5+
6+
from pydantic.v1 import BaseModel, Field
7+
8+
9+
class HttpProxyConfig(BaseModel):
10+
"""Configuration model for HTTP proxy settings."""
11+
12+
proxy_url: str = Field(
13+
...,
14+
title="Proxy URL",
15+
description="The URL of the HTTP proxy server to use for requests",
16+
examples=["http://proxy.example.com:8080", "https://proxy.example.com:8080"],
17+
)
18+
proxy_ca_certificate: Optional[str] = Field(
19+
None,
20+
title="Proxy CA Certificate",
21+
description="Custom CA certificate for the proxy server in PEM format",
22+
airbyte_secret=True,
23+
)
24+
25+
class Config:
26+
title = "HTTP Proxy Configuration"
27+
description = "Configuration for routing HTTP requests through a proxy server"

airbyte_cdk/utils/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,16 @@
22
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
33
#
44

5+
from .http_proxy import configure_custom_http_proxy
56
from .is_cloud_environment import is_cloud_environment
67
from .print_buffer import PrintBuffer
78
from .schema_inferrer import SchemaInferrer
89
from .traced_exception import AirbyteTracedException
910

10-
__all__ = ["AirbyteTracedException", "SchemaInferrer", "is_cloud_environment", "PrintBuffer"]
11+
__all__ = [
12+
"AirbyteTracedException",
13+
"SchemaInferrer",
14+
"is_cloud_environment",
15+
"PrintBuffer",
16+
"configure_custom_http_proxy",
17+
]

airbyte_cdk/utils/http_proxy.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2+
"""HTTP proxy configuration utilities."""
3+
4+
import os
5+
import tempfile
6+
from logging import Logger
7+
from pathlib import Path
8+
from typing import Optional
9+
10+
PROXY_PARENT_CONFIG_KEY = "http_proxy"
11+
PROXY_URL_CONFIG_KEY = "proxy_url"
12+
PROXY_CA_CERTIFICATE_CONFIG_KEY = "proxy_ca_certificate"
13+
14+
15+
AIRBYTE_NO_PROXY_ENTRIES = [
16+
"localhost",
17+
"127.0.0.1",
18+
"*.local",
19+
"169.254.169.254",
20+
"metadata.google.internal",
21+
"*.airbyte.io",
22+
"*.airbyte.com",
23+
"connectors.airbyte.com",
24+
"sentry.io",
25+
"api.segment.io",
26+
"*.sentry.io",
27+
"*.datadoghq.com",
28+
"app.datadoghq.com",
29+
]
30+
31+
32+
def _get_no_proxy_entries_from_env_var() -> list[str]:
33+
"""Return a list of entries from the NO_PROXY environment variable."""
34+
if "NO_PROXY" in os.environ:
35+
return [x.strip() for x in os.environ["NO_PROXY"].split(",") if x.strip()]
36+
37+
return []
38+
39+
40+
def _get_no_proxy_string() -> str:
41+
"""Return a string to be used as the NO_PROXY environment variable.
42+
43+
This ensures that requests to these hosts bypass the proxy.
44+
"""
45+
return ",".join(
46+
filter(
47+
None,
48+
list(set(_get_no_proxy_entries_from_env_var() + AIRBYTE_NO_PROXY_ENTRIES)),
49+
)
50+
)
51+
52+
53+
def _install_ca_certificate(ca_cert_file_text: str) -> Path:
54+
"""Install the CA certificate for the proxy.
55+
56+
This involves saving the text to a local file and then setting
57+
the appropriate environment variables to use this certificate.
58+
59+
Returns the path to the temporary CA certificate file.
60+
"""
61+
with tempfile.NamedTemporaryFile(
62+
mode="w",
63+
delete=False,
64+
prefix="airbyte-custom-ca-cert-",
65+
suffix=".pem",
66+
encoding="utf-8",
67+
) as temp_file:
68+
temp_file.write(ca_cert_file_text)
69+
temp_file.flush()
70+
71+
os.environ["REQUESTS_CA_BUNDLE"] = temp_file.name
72+
os.environ["CURL_CA_BUNDLE"] = temp_file.name
73+
os.environ["SSL_CERT_FILE"] = temp_file.name
74+
75+
return Path(temp_file.name).absolute()
76+
77+
78+
def configure_custom_http_proxy(
79+
http_proxy_config: dict[str, str],
80+
*,
81+
logger: Logger,
82+
proxy_url: Optional[str] = None,
83+
ca_cert_file_text: Optional[str] = None,
84+
) -> None:
85+
"""Initialize the proxy environment variables.
86+
87+
If http_proxy_config is provided and contains proxy configuration settings,
88+
this config will be used to configure the proxy.
89+
90+
If proxy_url and/or ca_cert_file_text are provided, they will override the values in
91+
http_proxy_config.
92+
93+
The function will no-op if neither input option provides a proxy URL.
94+
"""
95+
proxy_url = proxy_url or http_proxy_config.get(PROXY_URL_CONFIG_KEY)
96+
ca_cert_file_text = ca_cert_file_text or http_proxy_config.get(PROXY_CA_CERTIFICATE_CONFIG_KEY)
97+
98+
if proxy_url:
99+
logger.info(f"Using custom proxy URL: {proxy_url}")
100+
101+
if ca_cert_file_text:
102+
cert_file_path = _install_ca_certificate(ca_cert_file_text)
103+
logger.info(f"Using custom installed CA certificate: {cert_file_path!s}")
104+
105+
os.environ["NO_PROXY"] = _get_no_proxy_string()
106+
os.environ["HTTP_PROXY"] = proxy_url
107+
os.environ["HTTPS_PROXY"] = proxy_url
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2+
3+
import pytest
4+
from pydantic.v1 import ValidationError
5+
6+
from airbyte_cdk.models.http_proxy_config import HttpProxyConfig
7+
8+
9+
class TestHttpProxyConfig:
10+
def test_valid_config_with_required_fields_only(self):
11+
config = HttpProxyConfig(proxy_url="http://proxy.example.com:8080")
12+
13+
assert config.proxy_url == "http://proxy.example.com:8080"
14+
assert config.proxy_ca_certificate is None
15+
16+
def test_valid_config_with_all_fields(self):
17+
test_cert = "-----BEGIN CERTIFICATE-----\ntest certificate\n-----END CERTIFICATE-----"
18+
config = HttpProxyConfig(
19+
proxy_url="https://proxy.example.com:8080", proxy_ca_certificate=test_cert
20+
)
21+
22+
assert config.proxy_url == "https://proxy.example.com:8080"
23+
assert config.proxy_ca_certificate == test_cert
24+
25+
def test_missing_required_proxy_url(self):
26+
with pytest.raises(ValidationError) as exc_info:
27+
HttpProxyConfig()
28+
29+
errors = exc_info.value.errors()
30+
assert len(errors) == 1
31+
assert errors[0]["loc"] == ("proxy_url",)
32+
assert errors[0]["type"] == "value_error.missing"
33+
34+
def test_empty_proxy_url(self):
35+
config = HttpProxyConfig(proxy_url="")
36+
assert config.proxy_url == ""
37+
38+
def test_serialization(self):
39+
test_cert = "-----BEGIN CERTIFICATE-----\ntest certificate\n-----END CERTIFICATE-----"
40+
config = HttpProxyConfig(
41+
proxy_url="https://proxy.example.com:8080", proxy_ca_certificate=test_cert
42+
)
43+
44+
serialized = config.dict()
45+
expected = {
46+
"proxy_url": "https://proxy.example.com:8080",
47+
"proxy_ca_certificate": test_cert,
48+
}
49+
assert serialized == expected
50+
51+
def test_serialization_exclude_none(self):
52+
config = HttpProxyConfig(proxy_url="http://proxy.example.com:8080")
53+
54+
serialized = config.dict(exclude_none=True)
55+
expected = {"proxy_url": "http://proxy.example.com:8080"}
56+
assert serialized == expected
57+
58+
def test_json_serialization(self):
59+
config = HttpProxyConfig(proxy_url="http://proxy.example.com:8080")
60+
61+
json_str = config.json()
62+
assert '"proxy_url": "http://proxy.example.com:8080"' in json_str
63+
assert '"proxy_ca_certificate": null' in json_str
64+
65+
def test_from_dict(self):
66+
data = {"proxy_url": "https://proxy.example.com:8080", "proxy_ca_certificate": "test-cert"}
67+
68+
config = HttpProxyConfig(**data)
69+
assert config.proxy_url == "https://proxy.example.com:8080"
70+
assert config.proxy_ca_certificate == "test-cert"
71+
72+
def test_schema_generation(self):
73+
schema = HttpProxyConfig.schema()
74+
75+
assert schema["type"] == "object"
76+
assert "proxy_url" in schema["properties"]
77+
assert "proxy_ca_certificate" in schema["properties"]
78+
79+
proxy_url_prop = schema["properties"]["proxy_url"]
80+
assert proxy_url_prop["type"] == "string"
81+
assert proxy_url_prop["title"] == "Proxy URL"
82+
83+
ca_cert_prop = schema["properties"]["proxy_ca_certificate"]
84+
assert ca_cert_prop["type"] == "string"
85+
assert ca_cert_prop["title"] == "Proxy CA Certificate"
86+
assert ca_cert_prop.get("airbyte_secret") is True
87+
88+
def test_config_class_attributes(self):
89+
config_class = HttpProxyConfig.Config
90+
assert config_class.title == "HTTP Proxy Configuration"
91+
assert (
92+
config_class.description
93+
== "Configuration for routing HTTP requests through a proxy server"
94+
)

0 commit comments

Comments
 (0)