diff --git a/src/snowflake/connector/connection.py b/src/snowflake/connector/connection.py index ed8dca94f0..c904fa87a3 100644 --- a/src/snowflake/connector/connection.py +++ b/src/snowflake/connector/connection.py @@ -1082,6 +1082,15 @@ def connect(self, **kwargs) -> None: self._crl_config: CRLConfig = CRLConfig.from_connection(self) + no_proxy_csv_str = ( + ",".join(str(x) for x in self.no_proxy) + if ( + self.no_proxy is not None + and isinstance(self.no_proxy, Iterable) + and not isinstance(self.no_proxy, (str, bytes)) + ) + else self.no_proxy + ) self._http_config = HttpConfig( adapter_factory=ProxySupportAdapterFactory(), use_pooling=(not self.disable_request_pooling), @@ -1089,15 +1098,7 @@ def connect(self, **kwargs) -> None: proxy_port=self.proxy_port, proxy_user=self.proxy_user, proxy_password=self.proxy_password, - no_proxy=( - ",".join(str(x) for x in self.no_proxy) - if ( - self.no_proxy is not None - and isinstance(self.no_proxy, Iterable) - and not isinstance(self.no_proxy, (str, bytes)) - ) - else self.no_proxy - ), + no_proxy=no_proxy_csv_str, ) self._session_manager = SessionManagerFactory.get_manager(self._http_config) diff --git a/src/snowflake/connector/session_manager.py b/src/snowflake/connector/session_manager.py index 773d03461b..f8b9e96ca0 100644 --- a/src/snowflake/connector/session_manager.py +++ b/src/snowflake/connector/session_manager.py @@ -228,7 +228,7 @@ def close(self) -> None: self._idle_sessions.clear() -class _ConfigDirectAccessMixin(abc.ABC): +class _BaseConfigDirectAccessMixin(abc.ABC): @property @abc.abstractmethod def config(self) -> HttpConfig: ... @@ -245,14 +245,6 @@ def use_pooling(self) -> bool: def use_pooling(self, value: bool) -> None: self.config = self.config.copy_with(use_pooling=value) - @property - def adapter_factory(self) -> Callable[..., HTTPAdapter]: - return self.config.adapter_factory - - @adapter_factory.setter - def adapter_factory(self, value: Callable[..., HTTPAdapter]) -> None: - self.config = self.config.copy_with(adapter_factory=value) - @property def max_retries(self) -> Retry | int: return self.config.max_retries @@ -262,6 +254,16 @@ def max_retries(self, value: Retry | int) -> None: self.config = self.config.copy_with(max_retries=value) +class _HttpConfigDirectAccessMixin(_BaseConfigDirectAccessMixin): + @property + def adapter_factory(self) -> Callable[..., HTTPAdapter]: + return self.config.adapter_factory + + @adapter_factory.setter + def adapter_factory(self, value: Callable[..., HTTPAdapter]) -> None: + self.config = self.config.copy_with(adapter_factory=value) + + class _RequestVerbsUsingSessionMixin(abc.ABC): """ Mixin that provides HTTP methods (get, post, put, etc.) mirroring requests.Session, maintaining their default argument behavior (e.g., HEAD uses allow_redirects=False). @@ -372,7 +374,7 @@ def delete( return session.delete(url, headers=headers, timeout=timeout, **kwargs) -class SessionManager(_RequestVerbsUsingSessionMixin, _ConfigDirectAccessMixin): +class SessionManager(_RequestVerbsUsingSessionMixin, _HttpConfigDirectAccessMixin): """ Central HTTP session manager that handles all external requests from the Snowflake driver. @@ -562,7 +564,8 @@ def clone( Optional kwargs (e.g. *use_pooling* / *adapter_factory* / max_retries etc.) - overrides to create a modified copy of the HttpConfig before instantiation. """ - return SessionManager.from_config(self._cfg, **http_config_overrides) + # return SessionManager.from_config(self._cfg, **http_config_overrides) + return self.from_config(self._cfg, **http_config_overrides) def __getstate__(self): state = self.__dict__.copy() @@ -612,6 +615,10 @@ class ProxySessionManager(SessionManager): def make_session(self, *, url: str | None = None) -> Session: session = requests.Session() self._mount_adapters(session) + + # TODO: adding this makes all proxies "not see the requests" - but they actually go since we can see MATCHED-stub id + # session.trust_env = False + proxies = ( { "no_proxy": self._cfg.no_proxy, @@ -626,12 +633,6 @@ def make_session(self, *, url: str | None = None) -> Session: session.proxies = proxies return session - def clone( - self, - **http_config_overrides, - ) -> SessionManager: - return ProxySessionManager.from_config(self._cfg, **http_config_overrides) - class SessionManagerFactory: @staticmethod diff --git a/test/test_utils/cross_module_fixtures/wiremock_fixtures.py b/test/test_utils/cross_module_fixtures/wiremock_fixtures.py index d6330850e2..9421bf9280 100644 --- a/test/test_utils/cross_module_fixtures/wiremock_fixtures.py +++ b/test/test_utils/cross_module_fixtures/wiremock_fixtures.py @@ -12,6 +12,7 @@ WiremockClient, get_clients_for_proxy_and_target, get_clients_for_proxy_target_and_storage, + get_clients_for_two_proxies_and_target, ) @@ -102,3 +103,23 @@ def wiremock_backend_storage_proxy(wiremock_generic_mappings_dir): proxy_mapping_template=wiremock_proxy_mapping_path ) as triple: yield triple + + +@pytest.fixture +def wiremock_two_proxies_backend(wiremock_generic_mappings_dir): + """Starts backend (DB) and two proxy Wiremocks. + + Returns a tuple ``(backend_wm, proxy1_wm, proxy2_wm)`` to make roles explicit. + - proxy1_wm: Configured to forward to backend + - proxy2_wm: Configured to forward to backend + + Use when you need to test proxy selection logic with simple setup, + such as connection parameters taking precedence over environment variables. + """ + wiremock_proxy_mapping_path = ( + wiremock_generic_mappings_dir / "proxy_forward_all.json" + ) + with get_clients_for_two_proxies_and_target( + proxy_mapping_template=wiremock_proxy_mapping_path + ) as triple: + yield triple diff --git a/test/test_utils/wiremock/wiremock_utils.py b/test/test_utils/wiremock/wiremock_utils.py index 03fbe0adca..7a7b28bdce 100644 --- a/test/test_utils/wiremock/wiremock_utils.py +++ b/test/test_utils/wiremock/wiremock_utils.py @@ -388,3 +388,55 @@ def get_clients_for_proxy_target_and_storage( forbidden = [target_wm.wiremock_http_port, proxy_wm.wiremock_http_port] with WiremockClient(forbidden_ports=forbidden) as storage_wm: yield target_wm, storage_wm, proxy_wm + + +@contextmanager +def get_clients_for_two_proxies_and_target( + proxy_mapping_template: Union[str, dict, pathlib.Path, None] = None, + additional_proxy_placeholders: Optional[dict[str, object]] = None, + additional_proxy_args: Optional[Iterable[str]] = None, +): + """Context manager that starts three Wiremock instances – one *target* (DB) and two *proxies*. + + Both proxies are configured to forward all traffic to *target* using the same + mapping mechanism. This allows the test to verify which proxy was actually used + by checking the request history. + + Yields a tuple ``(target_wm, proxy1_wm, proxy2_wm)`` where: + - target_wm: The backend/DB Wiremock + - proxy1_wm: First proxy configured to forward to target + - proxy2_wm: Second proxy configured to forward to target + + All processes are shut down automatically on context exit. + + Note: + Use this helper for tests that need to verify proxy selection logic, + such as connection parameters taking precedence over environment variables. + """ + # Reuse existing helper to set up target+proxy1 + with get_clients_for_proxy_and_target( + proxy_mapping_template=proxy_mapping_template, + additional_proxy_placeholders=additional_proxy_placeholders, + additional_proxy_args=additional_proxy_args, + ) as (target_wm, proxy1_wm): + # Start second proxy and configure it to forward to target as well + forbidden = [target_wm.wiremock_http_port, proxy1_wm.wiremock_http_port] + with WiremockClient(forbidden_ports=forbidden) as proxy2_wm: + # Configure proxy2 to forward to target with the same mapping + if proxy_mapping_template is None: + proxy_mapping_template = ( + pathlib.Path(__file__).parent.parent.parent.parent + / "test" + / "data" + / "wiremock" + / "mappings" + / "generic" + / "proxy_forward_all.json" + ) + placeholders: dict[str, object] = { + "{{TARGET_HTTP_HOST_WITH_PORT}}": target_wm.http_host_with_port + } + if additional_proxy_placeholders: + placeholders.update(additional_proxy_placeholders) + proxy2_wm.add_mapping(proxy_mapping_template, placeholders=placeholders) + yield target_wm, proxy1_wm, proxy2_wm diff --git a/test/unit/test_proxies.py b/test/unit/test_proxies.py index 1624f6187d..ea1627e8d4 100644 --- a/test/unit/test_proxies.py +++ b/test/unit/test_proxies.py @@ -234,30 +234,14 @@ def _setup_backend_storage_mappings( wiremock_mapping_dir, wiremock_generic_mappings_dir, ): - password_mapping = wiremock_mapping_dir / "auth/password/successful_flow.json" - multi_chunk_request_mapping = ( - wiremock_mapping_dir / "queries/select_large_request_successful.json" - ) - chunk_1_mapping = wiremock_mapping_dir / "queries/chunk_1.json" - chunk_2_mapping = wiremock_mapping_dir / "queries/chunk_2.json" - disconnect_mapping = ( - wiremock_generic_mappings_dir / "snowflake_disconnect_successful.json" - ) - telemetry_mapping = wiremock_generic_mappings_dir / "telemetry.json" - - target_wm.import_mapping_with_default_placeholders(password_mapping) - target_wm.add_mapping(disconnect_mapping) - target_wm.add_mapping(telemetry_mapping) - target_wm.add_mapping( - multi_chunk_request_mapping, - placeholders={ - "{{STORAGE_WIREMOCK_HTTP_HOST_WITH_PORT}}": storage_wm.http_host_with_port - }, + """Setup backend, storage, and proxy mappings for large queries.""" + _set_mappings_for_common_backend(target_wm, wiremock_generic_mappings_dir) + _set_mappings_for_query_and_chunks( + target_wm, + wiremock_mapping_dir, + storage_or_target_wm=storage_wm, ) - storage_wm.add_mapping_with_default_placeholders(chunk_1_mapping) - storage_wm.add_mapping_with_default_placeholders(chunk_2_mapping) - proxy_wm.add_mapping( { "request": {"method": "ANY", "urlPathPattern": "/amazonaws/.*"}, @@ -318,13 +302,61 @@ def _apply_no_proxy(no_proxy_source, no_proxy_value, connect_kwargs): ) +def _set_mappings_for_common_backend(target_wm, wiremock_generic_mappings_dir): + """Set common backend mappings: auth, disconnect, and telemetry.""" + password_mapping = ( + wiremock_generic_mappings_dir.parent / "auth/password/successful_flow.json" + ) + disconnect_mapping = ( + wiremock_generic_mappings_dir / "snowflake_disconnect_successful.json" + ) + telemetry_mapping = wiremock_generic_mappings_dir / "telemetry.json" + + target_wm.import_mapping_with_default_placeholders(password_mapping) + target_wm.add_mapping(disconnect_mapping) + target_wm.add_mapping(telemetry_mapping) + + +def _set_mappings_for_query_and_chunks( + target_wm, + wiremock_mapping_dir, + storage_or_target_wm=None, +): + """Set multi-chunk query mapping and chunk mappings. + + Args: + target_wm: The target/backend Wiremock client + wiremock_mapping_dir: Path to wiremock mappings directory + storage_or_target_wm: Optional storage Wiremock client. If not provided, chunks are added to target_wm. + """ + if storage_or_target_wm is None: + storage_or_target_wm = target_wm + + multi_chunk_request_mapping = ( + wiremock_mapping_dir / "queries/select_large_request_successful.json" + ) + chunk_1_mapping = wiremock_mapping_dir / "queries/chunk_1.json" + chunk_2_mapping = wiremock_mapping_dir / "queries/chunk_2.json" + + target_wm.add_mapping( + multi_chunk_request_mapping, + placeholders={ + "{{STORAGE_WIREMOCK_HTTP_HOST_WITH_PORT}}": storage_or_target_wm.http_host_with_port + }, + ) + + storage_or_target_wm.add_mapping_with_default_placeholders(chunk_1_mapping) + storage_or_target_wm.add_mapping_with_default_placeholders(chunk_2_mapping) + + def _execute_large_query(connect_kwargs, row_count: int): with snowflake.connector.connect(**connect_kwargs) as conn: cursors = conn.execute_string( f"select seq4() as n from table(generator(rowcount => {row_count}));" ) assert len(cursors[0]._result_set.batches) > 1 - assert list(cursors[0]) + rs = list(cursors[0]) + assert rs @pytest.fixture @@ -408,6 +440,46 @@ def _collect_db_request_flags_only(proxy_wm, target_wm) -> DbRequestFlags: return DbRequestFlags(proxy_saw_db=proxy_saw_db, target_saw_db=target_saw_db) +class ProxyPrecedenceFlags(NamedTuple): + proxy1_saw_request: bool + proxy2_saw_request: bool + backend_saw_request: bool + + +def _collect_proxy_precedence_flags( + proxy1_wm, proxy2_wm, target_wm +) -> ProxyPrecedenceFlags: + """Collect flags for proxy precedence tests to see which proxy was used.""" + proxy1_reqs = requests.get( + f"{proxy1_wm.http_host_with_port}/__admin/requests" + ).json() + proxy2_reqs = requests.get( + f"{proxy2_wm.http_host_with_port}/__admin/requests" + ).json() + target_reqs = requests.get( + f"{target_wm.http_host_with_port}/__admin/requests" + ).json() + + proxy1_saw_request = any( + "/queries/v1/query-request" in r["request"]["url"] + for r in proxy1_reqs["requests"] + ) + proxy2_saw_request = any( + "/queries/v1/query-request" in r["request"]["url"] + for r in proxy2_reqs["requests"] + ) + backend_saw_request = any( + "/queries/v1/query-request" in r["request"]["url"] + for r in target_reqs["requests"] + ) + + return ProxyPrecedenceFlags( + proxy1_saw_request=proxy1_saw_request, + proxy2_saw_request=proxy2_saw_request, + backend_saw_request=backend_saw_request, + ) + + @pytest.mark.skipolddriver @pytest.mark.parametrize("no_proxy_source", ["param", "env"]) def test_no_proxy_bypass_storage( @@ -697,3 +769,70 @@ def test_no_proxy_bypass_backend_and_storage_param_only( assert flags.proxy_saw_db is False assert flags.storage_saw_storage assert flags.proxy_saw_storage is False + + +@pytest.mark.skipolddriver +def test_proxy_env_vars_take_precedence_over_connection_params( + wiremock_two_proxies_backend, + wiremock_mapping_dir, + wiremock_generic_mappings_dir, + proxy_env_vars, + monkeypatch, +): + """Verify that proxy_host/proxy_port connection parameters take precedence over env vars. + + Setup: + - Set HTTP_PROXY env var to point to proxy_from_env_vars + - Set proxy_host param to point to proxy_from_conn_params + + Expected outcome: + - proxy_from_conn_params should see the request (params take precedence) + - proxy_from_env_vars should NOT see the request + - backend should see the request + """ + target_wm, proxy_from_conn_params, proxy_from_env_vars = ( + wiremock_two_proxies_backend + ) + + # Setup backend mappings for large query with multiple chunks + _set_mappings_for_common_backend(target_wm, wiremock_generic_mappings_dir) + _set_mappings_for_query_and_chunks( + target_wm, + wiremock_mapping_dir, + ) + + # Set HTTP_PROXY env var AFTER Wiremock is running using monkeypatch + # This prevents Wiremock from inheriting it and forwarding through proxy2 + set_proxy_env_vars, clear_proxy_env_vars = proxy_env_vars + clear_proxy_env_vars() # Clear any existing ones first + + env_proxy_url = f"http://{proxy_from_env_vars.wiremock_host}:{proxy_from_env_vars.wiremock_http_port}" + # TODO: commenting out this makes conn_params behave normally and be used + monkeypatch.setenv("HTTP_PROXY", env_proxy_url) + monkeypatch.setenv("HTTPS_PROXY", env_proxy_url) + + # Set connection params to point to proxy1 (should take precedence) + connect_kwargs = _base_connect_kwargs(target_wm) + # TODO: commenting out this: connect_kwargs.update(... doesn't seem to change a thing - both proxies and the target still see all requests + connect_kwargs.update( + { + "proxy_host": proxy_from_conn_params.wiremock_host, + "proxy_port": str(proxy_from_conn_params.wiremock_http_port), + } + ) + + # Execute query + _execute_large_query(connect_kwargs, row_count=50_000) + + # Verify proxy selection using named tuple flags + flags = _collect_proxy_precedence_flags( + proxy_from_conn_params, proxy_from_env_vars, target_wm + ) + assert not ( + flags.proxy1_saw_request + ), "proxy_from_conn_params (connection param proxy) should NOT have seen the query request" + assert flags.proxy2_saw_request, ( + "proxy_from_env_vars (env var proxy) should have seen the request " + "since connection params take precedence" + ) + assert flags.backend_saw_request, "backend should have seen the query request"