diff --git a/CHANGELOG.md b/CHANGELOG.md index 3267858b..4faeaf12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Fixed + +- CloudInfo / CloudSettings now cached by authority (schema, host and port) instead of full URL + ## [5.0.3] - 2025-05-04 ### Fixed diff --git a/azure-kusto-data/azure/kusto/data/_cloud_settings.py b/azure-kusto-data/azure/kusto/data/_cloud_settings.py index 6c1ef637..7f1f34c9 100644 --- a/azure-kusto-data/azure/kusto/data/_cloud_settings.py +++ b/azure-kusto-data/azure/kusto/data/_cloud_settings.py @@ -54,22 +54,22 @@ class CloudSettings: first_party_authority_url=DEFAULT_FIRST_PARTY_AUTHORITY_URL, ) - @classmethod - @distributed_trace(name_of_span="CloudSettings.get_cloud_info", kind=SpanKind.CLIENT) - def get_cloud_info_for_cluster(cls, kusto_uri: str, proxies: Optional[Dict[str, str]] = None) -> CloudInfo: - kusto_uri = cls._normalize_uri(kusto_uri) - - # tracing attributes for cloud info - Span.set_cloud_info_attributes(kusto_uri) - - if kusto_uri in cls._cloud_cache: # Double-checked locking to avoid unnecessary lock access - return cls._cloud_cache[kusto_uri] - - with cls._cloud_cache_lock: - if kusto_uri in cls._cloud_cache: - return cls._cloud_cache[kusto_uri] - - url_parts = urlparse(kusto_uri) + @classmethod + @distributed_trace(name_of_span="CloudSettings.get_cloud_info", kind=SpanKind.CLIENT) + def get_cloud_info_for_cluster(cls, kusto_uri: str, proxies: Optional[Dict[str, str]] = None) -> CloudInfo: + normalized_authority = cls._normalize_uri(kusto_uri) + + # tracing attributes for cloud info + Span.set_cloud_info_attributes(kusto_uri) + + if normalized_authority in cls._cloud_cache: # Double-checked locking to avoid unnecessary lock access + return cls._cloud_cache[normalized_authority] + + with cls._cloud_cache_lock: + if normalized_authority in cls._cloud_cache: + return cls._cloud_cache[normalized_authority] + + url_parts = urlparse(kusto_uri) url = f"{url_parts.scheme}://{url_parts.netloc}/{METADATA_ENDPOINT}" try: @@ -87,31 +87,32 @@ def get_cloud_info_for_cluster(cls, kusto_uri: str, proxies: Optional[Dict[str, if content is None or content == {}: raise KustoServiceError("Kusto returned an invalid cloud metadata response", result) root = content["AzureAD"] - if root is not None: - cls._cloud_cache[kusto_uri] = CloudInfo( - login_endpoint=root["LoginEndpoint"], - login_mfa_required=root["LoginMfaRequired"], - kusto_client_app_id=root["KustoClientAppId"], - kusto_client_redirect_uri=root["KustoClientRedirectUri"], - kusto_service_resource_id=root["KustoServiceResourceId"], - first_party_authority_url=root["FirstPartyAuthorityUrl"], - ) - else: - cls._cloud_cache[kusto_uri] = cls.DEFAULT_CLOUD - elif result.status_code == 404: - # For now as long not all proxies implement the metadata endpoint, if no endpoint exists return public cloud data - cls._cloud_cache[kusto_uri] = cls.DEFAULT_CLOUD - else: - raise KustoServiceError("Kusto returned an invalid cloud metadata response", result) - return cls._cloud_cache[kusto_uri] + if root is not None: + cls._cloud_cache[normalized_authority] = CloudInfo( + login_endpoint=root["LoginEndpoint"], + login_mfa_required=root["LoginMfaRequired"], + kusto_client_app_id=root["KustoClientAppId"], + kusto_client_redirect_uri=root["KustoClientRedirectUri"], + kusto_service_resource_id=root["KustoServiceResourceId"], + first_party_authority_url=root["FirstPartyAuthorityUrl"], + ) + else: + cls._cloud_cache[normalized_authority] = cls.DEFAULT_CLOUD + elif result.status_code == 404: + # For now as long not all proxies implement the metadata endpoint, if no endpoint exists return public cloud data + cls._cloud_cache[normalized_authority] = cls.DEFAULT_CLOUD + else: + raise KustoServiceError("Kusto returned an invalid cloud metadata response", result) + return cls._cloud_cache[normalized_authority] @classmethod def add_to_cache(cls, url: str, cloud_info: CloudInfo): with cls._cloud_cache_lock: cls._cloud_cache[cls._normalize_uri(url)] = cloud_info - @classmethod - def _normalize_uri(cls, kusto_uri): - if not kusto_uri.endswith("/"): - kusto_uri += "/" - return kusto_uri + @classmethod + def _normalize_uri(cls, kusto_uri): + """Extracts and returns the authority part of the URI (schema, host, port)""" + url_parts = urlparse(kusto_uri) + # Return only the scheme and netloc (which contains host and port if present) + return f"{url_parts.scheme}://{url_parts.netloc}" diff --git a/azure-kusto-data/tests/test_cloud_settings.py b/azure-kusto-data/tests/test_cloud_settings.py new file mode 100644 index 00000000..dc98cb8a --- /dev/null +++ b/azure-kusto-data/tests/test_cloud_settings.py @@ -0,0 +1,105 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License +import pytest + +from azure.kusto.data._cloud_settings import CloudSettings, CloudInfo + + +@pytest.fixture +def clear_cache(): + """Fixture to clear the CloudSettings cache before each test""" + with CloudSettings._cloud_cache_lock: + CloudSettings._cloud_cache.clear() + yield + # Clean up after test if needed + with CloudSettings._cloud_cache_lock: + CloudSettings._cloud_cache.clear() + + +def test_normalize_uri_extracts_authority(): + """Test that _normalize_uri extracts only the authority part (schema, host, port) from a URI.""" + # Test with various URI formats + test_cases = [ + ("https://cluster.kusto.windows.net", "https://cluster.kusto.windows.net"), + ("https://cluster.kusto.windows.net/", "https://cluster.kusto.windows.net"), + ("https://cluster.kusto.windows.net/v1/rest", "https://cluster.kusto.windows.net"), + ("https://cluster.kusto.windows.net:443/v1/rest", "https://cluster.kusto.windows.net:443"), + ("http://localhost:8080/v1/rest/query", "http://localhost:8080"), + ("https://cluster.kusto.windows.net/database", "https://cluster.kusto.windows.net"), + ] + + for input_uri, expected_authority in test_cases: + assert CloudSettings._normalize_uri(input_uri) == expected_authority + + +def test_cloud_info_cached_by_authority(clear_cache): + """Test that CloudInfo is cached by authority part of the URI (schema, host, port).""" + # Create a test CloudInfo object + test_cloud_info = CloudInfo( + login_endpoint="https://login.test.com", + login_mfa_required=False, + kusto_client_app_id="test-app-id", + kusto_client_redirect_uri="http://localhost/redirect", + kusto_service_resource_id="https://test.kusto.windows.net", + first_party_authority_url="https://login.test.com/tenant-id", + ) + + # Add to cache with a specific URL + base_url = "https://cluster.kusto.windows.net" + CloudSettings.add_to_cache(base_url, test_cloud_info) + + # Test that it can be retrieved with different path variations but same authority + variations = [ + base_url + "/", + base_url + "/database", + base_url + "/v1/rest/query", + base_url + "/some/other/path", + ] + + for url in variations: + # Use the internal _normalize_uri to get the cache key + normalized_url = CloudSettings._normalize_uri(url) + assert normalized_url == "https://cluster.kusto.windows.net" + assert normalized_url in CloudSettings._cloud_cache + + # Verify the retrieved CloudInfo is the same instance + retrieved_info = CloudSettings._cloud_cache[normalized_url] + assert retrieved_info is test_cloud_info + + +def test_cloud_info_cached_with_port(clear_cache): + """Test that URIs with ports are cached separately from those without.""" + # Create two different CloudInfo objects + cloud_info_default = CloudInfo( + login_endpoint="https://login.default.com", + login_mfa_required=False, + kusto_client_app_id="default-app-id", + kusto_client_redirect_uri="http://localhost/redirect", + kusto_service_resource_id="https://default.kusto.windows.net", + first_party_authority_url="https://login.default.com/tenant-id", + ) + + cloud_info_with_port = CloudInfo( + login_endpoint="https://login.withport.com", + login_mfa_required=True, + kusto_client_app_id="port-app-id", + kusto_client_redirect_uri="http://localhost/redirect", + kusto_service_resource_id="https://port.kusto.windows.net", + first_party_authority_url="https://login.withport.com/tenant-id", + ) + + # Add both to cache with different authorities + CloudSettings.add_to_cache("https://cluster.kusto.windows.net", cloud_info_default) + CloudSettings.add_to_cache("https://cluster.kusto.windows.net:443", cloud_info_with_port) + + # Verify they are cached separately + assert "https://cluster.kusto.windows.net" in CloudSettings._cloud_cache + assert "https://cluster.kusto.windows.net:443" in CloudSettings._cloud_cache + + # Verify each URI gets the correct CloudInfo + assert CloudSettings._cloud_cache["https://cluster.kusto.windows.net"] is cloud_info_default + assert CloudSettings._cloud_cache["https://cluster.kusto.windows.net:443"] is cloud_info_with_port + + # Additional verification with variations + assert CloudSettings._cloud_cache[CloudSettings._normalize_uri("https://cluster.kusto.windows.net/database")] is cloud_info_default + assert CloudSettings._cloud_cache[CloudSettings._normalize_uri("https://cluster.kusto.windows.net:443/database")] is cloud_info_with_port