diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md index 8641588ccd20..064095f59a97 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md @@ -13,6 +13,8 @@ ### Other Changes - Unpinned fixedint dependency ([#43475](https://github.com/Azure/azure-sdk-for-python/pull/43475)) +- Remove fixedint dependency + ([#43659](https://github.com/Azure/azure-sdk-for-python/pull/43659)) ## 1.0.0b44 (2025-10-14) diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py index 5f1628fc6b61..05a6d26889a1 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py @@ -9,8 +9,6 @@ HTTP_CLIENT_REQUEST_DURATION, HTTP_SERVER_REQUEST_DURATION, ) -# pylint:disable=no-name-in-module -from fixedint import Int32 from azure.core import CaseInsensitiveEnumMeta @@ -319,8 +317,8 @@ class _RP_Names(Enum): _SAMPLE_RATE_KEY = "_MS.sampleRate" _SAMPLING_HASH = 5381 -_INTEGER_MAX: int = Int32.maxval -_INTEGER_MIN: int = Int32.minval +_INT32_MAX: int = 2**31 - 1 # 2147483647 +_INT32_MIN: int = -2**31 # -2147483648 # AAD Auth diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/trace/_utils.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/trace/_utils.py index 4b157e650db4..8a7a57b8f2d5 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/trace/_utils.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/trace/_utils.py @@ -21,15 +21,12 @@ from opentelemetry.semconv.trace import DbSystemValues, SpanAttributes from opentelemetry.util.types import Attributes -# pylint:disable=no-name-in-module -from fixedint import Int32 - from azure.monitor.opentelemetry.exporter._constants import _SAMPLE_RATE_KEY from azure.monitor.opentelemetry.exporter._constants import ( _SAMPLING_HASH, - _INTEGER_MAX, - _INTEGER_MIN, + _INT32_MAX, + _INT32_MIN, ) @@ -342,27 +339,29 @@ def _get_url_for_http_request(attributes: Attributes) -> Optional[str]: def _get_DJB2_sample_score(trace_id_hex: str) -> float: # This algorithm uses 32bit integers - hash_value = Int32(_SAMPLING_HASH) + hash_value = _SAMPLING_HASH for char in trace_id_hex: hash_value = ((hash_value << 5) + hash_value) + ord(char) + # Correctly emulate signed 32-bit integer overflow using two's complement + hash_value = ((hash_value + 2**31) % 2**32) - 2**31 - if hash_value == _INTEGER_MIN: - hash_value = int(_INTEGER_MAX) + if hash_value == _INT32_MIN: + hash_value = int(_INT32_MAX) else: hash_value = abs(hash_value) - # divide by _INTEGER_MAX for value between 0 and 1 for sampling score - return float(hash_value) / _INTEGER_MAX + # divide by _INT32_MAX for value between 0 and 1 for sampling score + return float(hash_value) / _INT32_MAX def _round_down_to_nearest(sampling_percentage: float) -> float: if sampling_percentage == 0: return 0 # Handle extremely small percentages that would cause overflow - if sampling_percentage <= _INTEGER_MIN: # Extremely small threshold + if sampling_percentage <= _INT32_MIN: # Extremely small threshold return 0.0 item_count = 100.0 / sampling_percentage # Handle case where item_count is infinity or too large for math.ceil - if not math.isfinite(item_count) or item_count >= _INTEGER_MAX: + if not math.isfinite(item_count) or item_count >= _INT32_MAX: return 0.0 return 100.0 / math.ceil(item_count) diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/dev_requirements.txt b/sdk/monitor/azure-monitor-opentelemetry-exporter/dev_requirements.txt index f32c8abb6b3f..5309592ff9cb 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/dev_requirements.txt +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/dev_requirements.txt @@ -3,3 +3,4 @@ ../../core/azure-core-tracing-opentelemetry -e ../../identity/azure-identity aiohttp>=3.0; python_version >= '3.7' +fixedint<1.0.0,>=0.1.6 diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/setup.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/setup.py index e498b0b26ef8..500d3228182d 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/setup.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/setup.py @@ -84,8 +84,6 @@ install_requires=[ "azure-core<2.0.0,>=1.28.0", "azure-identity~=1.17", - # TODO: Remove fixedint - "fixedint<1.0.0,>=0.1.6", "msrest>=0.6.10", "opentelemetry-api~=1.35", "opentelemetry-sdk~=1.35", diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/trace/test_trace_utils.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/trace/test_trace_utils.py new file mode 100644 index 000000000000..e6ff96dee377 --- /dev/null +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/trace/test_trace_utils.py @@ -0,0 +1,196 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import unittest +from unittest import mock + +from azure.monitor.opentelemetry.exporter.export.trace._utils import ( + _get_DJB2_sample_score, +) +# fixedint was removed as a source dependency. It is used as a dev requirement to test sample score +from fixedint import Int32 +from azure.monitor.opentelemetry.exporter._constants import ( + _SAMPLING_HASH, + _INT32_MAX, + _INT32_MIN, +) + + +class TestGetDJB2SampleScore(unittest.TestCase): + """Test cases for _get_DJB2_sample_score function.""" + + def test_empty_string(self): + """Test with empty string.""" + result = _get_DJB2_sample_score("") + # With empty string, hash should remain _SAMPLING_HASH (5381) + # Result should be 5381 / _INT32_MAX + expected = float(5381) / _INT32_MAX + self.assertEqual(result, expected) + + def test_single_character(self): + """Test with single character.""" + result = _get_DJB2_sample_score("a") + # hash = ((5381 << 5) + 5381) + ord('a') + # hash = (172192 + 5381) + 97 = 177670 + expected_hash = ((5381 << 5) + 5381) + ord('a') + expected = float(expected_hash) / _INT32_MAX + self.assertEqual(result, expected) + + def test_typical_trace_id(self): + """Test with typical 32-character trace ID.""" + trace_id = "12345678901234567890123456789012" + result = _get_DJB2_sample_score(trace_id) + + # Manually calculate expected result + hash_value = Int32(_SAMPLING_HASH) + for char in trace_id: + hash_value = ((hash_value << 5) + hash_value) + ord(char) + + if hash_value == _INT32_MIN: + hash_value = int(_INT32_MAX) + else: + hash_value = abs(hash_value) + + expected = float(hash_value) / _INT32_MAX + self.assertEqual(result, expected) + + def test_hex_characters(self): + """Test with valid hex characters (0-9, a-f).""" + test_cases = [ + "0123456789abcdef", + "fedcba9876543210", + "aaaaaaaaaaaaaaaa", + "0000000000000000", + "ffffffffffffffff" + ] + + for trace_id in test_cases: + with self.subTest(trace_id=trace_id): + result = _get_DJB2_sample_score(trace_id) + self.assertIsInstance(result, float) + self.assertGreaterEqual(result, 0.0) + self.assertLessEqual(result, 1.0) + + def test_int32_overflow_handling(self): + """Test that Int32 overflow is handled correctly.""" + # Create a string that should cause overflow + long_string = "f" * 100 # 100 'f' characters should cause overflow + result = _get_DJB2_sample_score(long_string) + + self.assertIsInstance(result, float) + self.assertGreaterEqual(result, 0.0) + self.assertLessEqual(result, 1.0) + + def test_int32_minimum_value_handling(self): + """Test handling when hash equals INTEGER_MIN.""" + # This is tricky to test directly since we need to find a string + # that results in exactly _INT32_MIN. Instead, let's test the logic. + + # We'll use a mock to simulate this condition + + + def mock_djb2_with_min_value(trace_id_hex): + # Call original to get the structure, then simulate _INT32_MIN case + hash_value = Int32(_SAMPLING_HASH) + for char in trace_id_hex: + hash_value = ((hash_value << 5) + hash_value) + ord(char) + + # Simulate the case where we get _INT32_MIN + if str(trace_id_hex) == "test_min": + hash_value = Int32(_INT32_MIN) + + if hash_value == _INT32_MIN: + hash_value = int(_INT32_MAX) + else: + hash_value = abs(hash_value) + + return float(hash_value) / _INT32_MAX + + # Test the _INT32_MIN case + result = mock_djb2_with_min_value("test_min") + expected = float(_INT32_MAX) / _INT32_MAX + self.assertEqual(result, expected) + + def test_negative_hash_conversion(self): + """Test that negative hash values are converted to positive.""" + # Find a string that produces a negative hash + test_string = "negative_test_case_string" + result = _get_DJB2_sample_score(test_string) + + # Result should always be positive (between 0 and 1) + self.assertGreaterEqual(result, 0.0) + self.assertLessEqual(result, 1.0) + + def test_deterministic_output(self): + """Test that same input always produces same output.""" + trace_id = "abcdef1234567890abcdef1234567890" + + # Call multiple times with same input + results = [_get_DJB2_sample_score(trace_id) for _ in range(5)] + + # All results should be identical + self.assertTrue(all(r == results[0] for r in results)) + + def test_different_inputs_different_outputs(self): + """Test that different inputs produce different outputs.""" + trace_ids = [ + "12345678901234567890123456789012", + "12345678901234567890123456789013", # Last digit different + "22345678901234567890123456789012", # First digit different + "abcdef1234567890fedcba0987654321", # Completely different + ] + + results = [_get_DJB2_sample_score(tid) for tid in trace_ids] + + # All results should be different + self.assertEqual(len(results), len(set(results))) + + def test_boundary_values(self): + """Test with boundary values and edge cases.""" + test_cases = [ + "0", # Single minimum hex digit + "f", # Single maximum hex digit + "00000000000000000000000000000000", # All zeros + "ffffffffffffffffffffffffffffffff", # All f's (32 chars) + ] + + for trace_id in test_cases: + with self.subTest(trace_id=trace_id): + result = _get_DJB2_sample_score(trace_id) + self.assertIsInstance(result, float) + self.assertGreaterEqual(result, 0.0) + self.assertLessEqual(result, 1.0) + + def test_constants_used_correctly(self): + """Test that the function uses the expected constants.""" + # Verify that _SAMPLING_HASH is 5381 (standard DJB2 hash initial value) + self.assertEqual(_SAMPLING_HASH, 5381) + + # Verify Int32 constants + self.assertEqual(_INT32_MAX, 2147483647) # 2^31 - 1 + self.assertEqual(_INT32_MIN, -2147483648) # -2^31 + + def test_algorithm_correctness(self): + """Test that the DJB2 algorithm is implemented correctly.""" + # Test with known input and manually calculated expected output + trace_id = "abc" + + # Manual calculation: + # Start with 5381 + # For 'a' (97): hash = ((5381 << 5) + 5381) + 97 = 177670 + # For 'b' (98): hash = ((177670 << 5) + 177670) + 98 = 5823168 + # For 'c' (99): hash = ((5823168 << 5) + 5823168) + 99 = 191582563 + + expected_hash = _SAMPLING_HASH + for char in trace_id: + expected_hash = Int32(((expected_hash << 5) + expected_hash) + ord(char)) + + if expected_hash == _INT32_MIN: + expected_hash = int(_INT32_MAX) + else: + expected_hash = abs(expected_hash) + + expected_result = float(expected_hash) / _INT32_MAX + actual_result = _get_DJB2_sample_score(trace_id) + + self.assertEqual(actual_result, expected_result)