Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
### Other Changes
- Unpinned fixedint dependency
([#43475](https://github.com/Azure/azure-sdk-for-python/pull/43475))
- Remove fixedint dependency
([#43659](https://github.com/Azure/azure-sdk-for-python/pull/43659))

## 1.0.0b44 (2025-10-14)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
HTTP_CLIENT_REQUEST_DURATION,
HTTP_SERVER_REQUEST_DURATION,
)
# pylint:disable=no-name-in-module
from fixedint import Int32
from azure.core import CaseInsensitiveEnumMeta


Expand Down Expand Up @@ -319,8 +317,8 @@ class _RP_Names(Enum):

_SAMPLE_RATE_KEY = "_MS.sampleRate"
_SAMPLING_HASH = 5381
_INTEGER_MAX: int = Int32.maxval
_INTEGER_MIN: int = Int32.minval
_INT32_MAX: int = 2**31 - 1 # 2147483647
_INT32_MIN: int = -2**31 # -2147483648

# AAD Auth

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,12 @@
from opentelemetry.semconv.trace import DbSystemValues, SpanAttributes
from opentelemetry.util.types import Attributes

# pylint:disable=no-name-in-module
from fixedint import Int32

from azure.monitor.opentelemetry.exporter._constants import _SAMPLE_RATE_KEY

from azure.monitor.opentelemetry.exporter._constants import (
_SAMPLING_HASH,
_INTEGER_MAX,
_INTEGER_MIN,
_INT32_MAX,
_INT32_MIN,
)


Expand Down Expand Up @@ -342,27 +339,29 @@ def _get_url_for_http_request(attributes: Attributes) -> Optional[str]:

def _get_DJB2_sample_score(trace_id_hex: str) -> float:
# This algorithm uses 32bit integers
hash_value = Int32(_SAMPLING_HASH)
hash_value = _SAMPLING_HASH
for char in trace_id_hex:
hash_value = ((hash_value << 5) + hash_value) + ord(char)
# Correctly emulate signed 32-bit integer overflow using two's complement
hash_value = ((hash_value + 2**31) % 2**32) - 2**31

if hash_value == _INTEGER_MIN:
hash_value = int(_INTEGER_MAX)
if hash_value == _INT32_MIN:
hash_value = int(_INT32_MAX)
else:
hash_value = abs(hash_value)

# divide by _INTEGER_MAX for value between 0 and 1 for sampling score
return float(hash_value) / _INTEGER_MAX
# divide by _INT32_MAX for value between 0 and 1 for sampling score
return float(hash_value) / _INT32_MAX

def _round_down_to_nearest(sampling_percentage: float) -> float:
if sampling_percentage == 0:
return 0
# Handle extremely small percentages that would cause overflow
if sampling_percentage <= _INTEGER_MIN: # Extremely small threshold
if sampling_percentage <= _INT32_MIN: # Extremely small threshold
return 0.0
item_count = 100.0 / sampling_percentage
# Handle case where item_count is infinity or too large for math.ceil
if not math.isfinite(item_count) or item_count >= _INTEGER_MAX:
if not math.isfinite(item_count) or item_count >= _INT32_MAX:
return 0.0
return 100.0 / math.ceil(item_count)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
../../core/azure-core-tracing-opentelemetry
-e ../../identity/azure-identity
aiohttp>=3.0; python_version >= '3.7'
fixedint<1.0.0,>=0.1.6
2 changes: 0 additions & 2 deletions sdk/monitor/azure-monitor-opentelemetry-exporter/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@
install_requires=[
"azure-core<2.0.0,>=1.28.0",
"azure-identity~=1.17",
# TODO: Remove fixedint
"fixedint<1.0.0,>=0.1.6",
"msrest>=0.6.10",
"opentelemetry-api~=1.35",
"opentelemetry-sdk~=1.35",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import unittest
from unittest import mock

from azure.monitor.opentelemetry.exporter.export.trace._utils import (
_get_DJB2_sample_score,
)
# fixedint was removed as a source dependency. It is used as a dev requirement to test sample score
from fixedint import Int32
from azure.monitor.opentelemetry.exporter._constants import (
_SAMPLING_HASH,
_INT32_MAX,
_INT32_MIN,
)


class TestGetDJB2SampleScore(unittest.TestCase):
"""Test cases for _get_DJB2_sample_score function."""

def test_empty_string(self):
"""Test with empty string."""
result = _get_DJB2_sample_score("")
# With empty string, hash should remain _SAMPLING_HASH (5381)
# Result should be 5381 / _INT32_MAX
expected = float(5381) / _INT32_MAX
self.assertEqual(result, expected)

def test_single_character(self):
"""Test with single character."""
result = _get_DJB2_sample_score("a")
# hash = ((5381 << 5) + 5381) + ord('a')
# hash = (172192 + 5381) + 97 = 177670
expected_hash = ((5381 << 5) + 5381) + ord('a')
expected = float(expected_hash) / _INT32_MAX
self.assertEqual(result, expected)

def test_typical_trace_id(self):
"""Test with typical 32-character trace ID."""
trace_id = "12345678901234567890123456789012"
result = _get_DJB2_sample_score(trace_id)

# Manually calculate expected result
hash_value = Int32(_SAMPLING_HASH)
for char in trace_id:
hash_value = ((hash_value << 5) + hash_value) + ord(char)

if hash_value == _INT32_MIN:
hash_value = int(_INT32_MAX)
else:
hash_value = abs(hash_value)

expected = float(hash_value) / _INT32_MAX
self.assertEqual(result, expected)

def test_hex_characters(self):
"""Test with valid hex characters (0-9, a-f)."""
test_cases = [
"0123456789abcdef",
"fedcba9876543210",
"aaaaaaaaaaaaaaaa",
"0000000000000000",
"ffffffffffffffff"
]

for trace_id in test_cases:
with self.subTest(trace_id=trace_id):
result = _get_DJB2_sample_score(trace_id)
self.assertIsInstance(result, float)
self.assertGreaterEqual(result, 0.0)
self.assertLessEqual(result, 1.0)

def test_int32_overflow_handling(self):
"""Test that Int32 overflow is handled correctly."""
# Create a string that should cause overflow
long_string = "f" * 100 # 100 'f' characters should cause overflow
result = _get_DJB2_sample_score(long_string)

self.assertIsInstance(result, float)
self.assertGreaterEqual(result, 0.0)
self.assertLessEqual(result, 1.0)

def test_int32_minimum_value_handling(self):
"""Test handling when hash equals INTEGER_MIN."""
# This is tricky to test directly since we need to find a string
# that results in exactly _INT32_MIN. Instead, let's test the logic.

# We'll use a mock to simulate this condition


def mock_djb2_with_min_value(trace_id_hex):
# Call original to get the structure, then simulate _INT32_MIN case
hash_value = Int32(_SAMPLING_HASH)
for char in trace_id_hex:
hash_value = ((hash_value << 5) + hash_value) + ord(char)

# Simulate the case where we get _INT32_MIN
if str(trace_id_hex) == "test_min":
hash_value = Int32(_INT32_MIN)

if hash_value == _INT32_MIN:
hash_value = int(_INT32_MAX)
else:
hash_value = abs(hash_value)

return float(hash_value) / _INT32_MAX

# Test the _INT32_MIN case
result = mock_djb2_with_min_value("test_min")
expected = float(_INT32_MAX) / _INT32_MAX
self.assertEqual(result, expected)

def test_negative_hash_conversion(self):
"""Test that negative hash values are converted to positive."""
# Find a string that produces a negative hash
test_string = "negative_test_case_string"
result = _get_DJB2_sample_score(test_string)

# Result should always be positive (between 0 and 1)
self.assertGreaterEqual(result, 0.0)
self.assertLessEqual(result, 1.0)

def test_deterministic_output(self):
"""Test that same input always produces same output."""
trace_id = "abcdef1234567890abcdef1234567890"

# Call multiple times with same input
results = [_get_DJB2_sample_score(trace_id) for _ in range(5)]

# All results should be identical
self.assertTrue(all(r == results[0] for r in results))

def test_different_inputs_different_outputs(self):
"""Test that different inputs produce different outputs."""
trace_ids = [
"12345678901234567890123456789012",
"12345678901234567890123456789013", # Last digit different
"22345678901234567890123456789012", # First digit different
"abcdef1234567890fedcba0987654321", # Completely different
]

results = [_get_DJB2_sample_score(tid) for tid in trace_ids]

# All results should be different
self.assertEqual(len(results), len(set(results)))

def test_boundary_values(self):
"""Test with boundary values and edge cases."""
test_cases = [
"0", # Single minimum hex digit
"f", # Single maximum hex digit
"00000000000000000000000000000000", # All zeros
"ffffffffffffffffffffffffffffffff", # All f's (32 chars)
]

for trace_id in test_cases:
with self.subTest(trace_id=trace_id):
result = _get_DJB2_sample_score(trace_id)
self.assertIsInstance(result, float)
self.assertGreaterEqual(result, 0.0)
self.assertLessEqual(result, 1.0)

def test_constants_used_correctly(self):
"""Test that the function uses the expected constants."""
# Verify that _SAMPLING_HASH is 5381 (standard DJB2 hash initial value)
self.assertEqual(_SAMPLING_HASH, 5381)

# Verify Int32 constants
self.assertEqual(_INT32_MAX, 2147483647) # 2^31 - 1
self.assertEqual(_INT32_MIN, -2147483648) # -2^31

def test_algorithm_correctness(self):
"""Test that the DJB2 algorithm is implemented correctly."""
# Test with known input and manually calculated expected output
trace_id = "abc"

# Manual calculation:
# Start with 5381
# For 'a' (97): hash = ((5381 << 5) + 5381) + 97 = 177670
# For 'b' (98): hash = ((177670 << 5) + 177670) + 98 = 5823168
# For 'c' (99): hash = ((5823168 << 5) + 5823168) + 99 = 191582563

expected_hash = _SAMPLING_HASH
for char in trace_id:
expected_hash = Int32(((expected_hash << 5) + expected_hash) + ord(char))

if expected_hash == _INT32_MIN:
expected_hash = int(_INT32_MAX)
else:
expected_hash = abs(expected_hash)

expected_result = float(expected_hash) / _INT32_MAX
actual_result = _get_DJB2_sample_score(trace_id)

self.assertEqual(actual_result, expected_result)
Loading