Skip to content

Conversation

codeflash-ai[bot]
Copy link

@codeflash-ai codeflash-ai bot commented Oct 22, 2025

📄 6% (0.06x) speedup for cosine in chromadb/utils/distance_functions.py

⏱️ Runtime : 4.70 milliseconds 4.42 milliseconds (best of 103 runs)

📝 Explanation and details

Key optimizations:

  • Avoided duplicate norm computation: Previously, np.linalg.norm(x) and np.linalg.norm(y) were called twice (once each). Now, each is computed only once and stored in a local variable.
  • Minimized .item(): The value from the computation is always immediately converted to a Python float with float(); this is equivalent to .item() for 0D numpy arrays and works for scalars, with no loss of behavior but slightly improved performance by avoiding an extra method call and temporary scalar creation.
  • No change to input mutation, function signature, exceptions, or observable output.
  • Comment preservation: All comments remain attached to the relevant logic.

This rewrite is safe, robust, and measurably more efficient, especially for high-frequency calls.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 1046 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests and Runtime
from typing import Union, cast

# function to test
import numpy as np
# imports
import pytest  # used for our unit tests
from chromadb.utils.distance_functions import cosine
from numpy.typing import NDArray

Vector = NDArray[Union[np.int32, np.float32, np.int16, np.float16]]
from chromadb.utils.distance_functions import cosine

# unit tests

# ------------------- BASIC TEST CASES -------------------

def test_cosine_identical_vectors():
    # Test that cosine distance of identical vectors is 0
    x = np.array([1, 2, 3], dtype=np.float32)
    y = np.array([1, 2, 3], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 22.8μs -> 21.3μs (7.00% faster)

def test_cosine_orthogonal_vectors():
    # Test that cosine distance of orthogonal vectors is 1
    x = np.array([1, 0], dtype=np.float32)
    y = np.array([0, 1], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 16.2μs -> 14.4μs (12.3% faster)

def test_cosine_opposite_vectors():
    # Test that cosine distance of opposite vectors is 2
    x = np.array([1, 0], dtype=np.float32)
    y = np.array([-1, 0], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 14.8μs -> 13.7μs (7.68% faster)

def test_cosine_unit_vectors():
    # Test that cosine distance of two unit vectors at 45 degrees is 1 - cos(45deg)
    x = np.array([1, 0], dtype=np.float32)
    y = np.array([1, 1], dtype=np.float32) / np.sqrt(2)
    expected = 1.0 - (1/np.sqrt(2))
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 14.4μs -> 13.6μs (5.83% faster)

def test_cosine_different_lengths():
    # Test that cosine distance is independent of vector magnitude
    x = np.array([2, 0], dtype=np.float32)
    y = np.array([10, 0], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 14.4μs -> 12.8μs (12.2% faster)

def test_cosine_float16():
    # Test with float16 dtype
    x = np.array([1, 2, 3], dtype=np.float16)
    y = np.array([1, 2, 3], dtype=np.float16)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-4) # 16.2μs -> 14.7μs (9.81% faster)

def test_cosine_int32():
    # Test with int32 dtype
    x = np.array([1, 0, 0], dtype=np.int32)
    y = np.array([0, 1, 0], dtype=np.int32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 19.4μs -> 18.9μs (2.88% faster)

def test_cosine_mixed_types():
    # Test with mixed dtypes
    x = np.array([1, 2, 3], dtype=np.int32)
    y = np.array([1, 2, 3], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 18.0μs -> 16.8μs (7.10% faster)

# ------------------- EDGE TEST CASES -------------------

def test_cosine_zero_vector():
    # Test that cosine distance with a zero vector returns 1.0 (since dot=0, norm=0)
    x = np.array([0, 0, 0], dtype=np.float32)
    y = np.array([1, 2, 3], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 14.2μs -> 12.8μs (11.0% faster)

def test_cosine_both_zero_vectors():
    # Test that cosine distance between two zero vectors returns 1.0
    x = np.array([0, 0, 0], dtype=np.float32)
    y = np.array([0, 0, 0], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 13.9μs -> 12.6μs (10.3% faster)

def test_cosine_negative_values():
    # Test with negative values
    x = np.array([1, -1], dtype=np.float32)
    y = np.array([-1, 1], dtype=np.float32)
    # dot = -2, norms = sqrt(2) each
    expected = 1.0 - (-2/(2))
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 14.1μs -> 12.4μs (14.4% faster)

def test_cosine_large_values():
    # Test with very large values to check numerical stability
    x = np.array([1e20, 2e20, 3e20], dtype=np.float32)
    y = np.array([1e20, 2e20, 3e20], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-3) # 22.2μs -> 20.9μs (6.36% faster)

def test_cosine_small_values():
    # Test with very small values to check numerical stability
    x = np.array([1e-20, 2e-20, 3e-20], dtype=np.float32)
    y = np.array([1e-20, 2e-20, 3e-20], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 14.8μs -> 14.0μs (6.13% faster)

def test_cosine_single_element_vectors():
    # Test with single element vectors
    x = np.array([1], dtype=np.float32)
    y = np.array([1], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 13.8μs -> 12.3μs (11.8% faster)
    y = np.array([-1], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 5.06μs -> 4.65μs (8.70% faster)

def test_cosine_different_shapes():
    # Test with different shapes should raise an error
    x = np.array([1, 2, 3], dtype=np.float32)
    y = np.array([1, 2], dtype=np.float32)
    with pytest.raises(ValueError):
        cosine(x, y) # 7.28μs -> 15.7μs (53.6% slower)


def test_cosine_inf_nan():
    # Test with inf and nan values
    x = np.array([np.inf, 0], dtype=np.float32)
    y = np.array([1, 0], dtype=np.float32)
    # Should result in nan
    codeflash_output = cosine(x, y); result = codeflash_output # 30.9μs -> 29.1μs (6.29% faster)

    x = np.array([np.nan, 1], dtype=np.float32)
    y = np.array([1, 0], dtype=np.float32)
    codeflash_output = cosine(x, y); result = codeflash_output # 5.86μs -> 5.43μs (7.88% faster)

# ------------------- LARGE SCALE TEST CASES -------------------

def test_cosine_large_vectors_identical():
    # Test with large identical vectors
    x = np.ones(1000, dtype=np.float32)
    y = np.ones(1000, dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-6) # 14.3μs -> 13.2μs (7.73% faster)

def test_cosine_large_vectors_orthogonal():
    # Test with large orthogonal vectors (half zeros, half ones)
    x = np.concatenate([np.ones(500), np.zeros(500)]).astype(np.float32)
    y = np.concatenate([np.zeros(500), np.ones(500)]).astype(np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-6) # 13.7μs -> 12.2μs (11.9% faster)

def test_cosine_large_random_vectors():
    # Test with large random vectors
    rng = np.random.default_rng(seed=42)
    x = rng.normal(size=1000).astype(np.float32)
    y = rng.normal(size=1000).astype(np.float32)
    # The expected cosine similarity should be close to zero (distance ~1)
    codeflash_output = cosine(x, y); result = codeflash_output # 14.0μs -> 13.3μs (5.23% faster)

def test_cosine_large_sparse_vectors():
    # Test with large sparse vectors
    x = np.zeros(1000, dtype=np.float32)
    x[0] = 1.0
    y = np.zeros(1000, dtype=np.float32)
    y[999] = 1.0
    # Only one nonzero element in each, and at different positions, so dot=0
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 14.8μs -> 13.6μs (8.85% faster)

def test_cosine_large_vectors_opposite():
    # Test with large opposite vectors
    x = np.ones(1000, dtype=np.float32)
    y = -np.ones(1000, dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-6) # 12.4μs -> 11.5μs (8.21% faster)

def test_cosine_large_vectors_performance():
    # Test that the function runs efficiently on large vectors
    x = np.random.rand(1000).astype(np.float32)
    y = np.random.rand(1000).astype(np.float32)
    import time
    start = time.time()
    codeflash_output = cosine(x, y); result = codeflash_output # 13.7μs -> 12.7μs (8.07% faster)
    elapsed = time.time() - start
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from typing import Union, cast

# function to test
import numpy as np
# imports
import pytest  # used for our unit tests
from chromadb.utils.distance_functions import cosine
from numpy.typing import NDArray

Vector = NDArray[Union[np.int32, np.float32, np.int16, np.float16]]
from chromadb.utils.distance_functions import cosine

# unit tests

# ----------- BASIC TEST CASES -----------

def test_cosine_identical_vectors():
    # Identical vectors should have cosine distance 0
    x = np.array([1.0, 2.0, 3.0], dtype=np.float32)
    y = np.array([1.0, 2.0, 3.0], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 17.5μs -> 15.0μs (17.1% faster)

def test_cosine_orthogonal_vectors():
    # Orthogonal vectors should have cosine distance 1
    x = np.array([1.0, 0.0], dtype=np.float32)
    y = np.array([0.0, 1.0], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 15.3μs -> 13.8μs (11.4% faster)

def test_cosine_opposite_vectors():
    # Opposite vectors should have cosine distance 2
    x = np.array([1.0, 0.0], dtype=np.float32)
    y = np.array([-1.0, 0.0], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 14.4μs -> 12.2μs (18.2% faster)

def test_cosine_unit_vectors():
    # Unit vectors at 45 degrees should have cosine distance 1 - cos(45deg)
    x = np.array([1.0, 0.0], dtype=np.float32)
    y = np.array([1.0, 1.0], dtype=np.float32)
    expected = 1.0 - (1.0 / np.sqrt(2))
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 12.5μs -> 11.3μs (10.6% faster)

def test_cosine_with_negative_values():
    # Vectors with negative values
    x = np.array([1.0, -1.0], dtype=np.float32)
    y = np.array([-1.0, 1.0], dtype=np.float32)
    expected = 1.0 - (-1.0 + -1.0) / (np.sqrt(2) * np.sqrt(2))
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 12.0μs -> 10.5μs (14.7% faster)

def test_cosine_different_lengths():
    # Vectors with more than 2 dimensions
    x = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
    y = np.array([4.0, 3.0, 2.0, 1.0], dtype=np.float32)
    expected = 1.0 - np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 7.40μs -> 6.49μs (13.9% faster)

def test_cosine_int_types():
    # Vectors with integer types
    x = np.array([1, 0, 0], dtype=np.int32)
    y = np.array([0, 1, 0], dtype=np.int32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 19.1μs -> 18.8μs (2.05% faster)

# ----------- EDGE TEST CASES -----------

def test_cosine_zero_vector():
    # One vector is zero, should return 1.0 (since dot is 0, norm is 0 + eps)
    x = np.array([0.0, 0.0, 0.0], dtype=np.float32)
    y = np.array([1.0, 2.0, 3.0], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 14.1μs -> 12.7μs (10.6% faster)

def test_cosine_both_zero_vectors():
    # Both vectors are zero, should return 1.0
    x = np.array([0.0, 0.0, 0.0], dtype=np.float32)
    y = np.array([0.0, 0.0, 0.0], dtype=np.float32)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 13.8μs -> 12.4μs (11.0% faster)

def test_cosine_float16_precision():
    # float16 vectors should use higher epsilon
    x = np.array([1.0, 0.0], dtype=np.float16)
    y = np.array([0.0, 1.0], dtype=np.float16)
    # Should be close to 1.0 due to orthogonality
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-3) # 16.1μs -> 14.6μs (9.88% faster)

def test_cosine_large_values():
    # Vectors with very large values
    x = np.array([1e30, 2e30, 3e30], dtype=np.float32)
    y = np.array([3e30, 2e30, 1e30], dtype=np.float32)
    expected = 1.0 - np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-5) # 11.4μs -> 10.4μs (9.58% faster)

def test_cosine_small_values():
    # Vectors with very small values
    x = np.array([1e-30, 2e-30, 3e-30], dtype=np.float32)
    y = np.array([3e-30, 2e-30, 1e-30], dtype=np.float32)
    expected = 1.0 - np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y) + 1e-30)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 7.47μs -> 6.53μs (14.4% faster)

def test_cosine_int16_types():
    # Vectors with int16 type
    x = np.array([32767, 0], dtype=np.int16)
    y = np.array([0, 32767], dtype=np.int16)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 19.0μs -> 17.9μs (6.02% faster)

def test_cosine_float16_zero_vector():
    # float16 zero vector
    x = np.array([0.0, 0.0], dtype=np.float16)
    y = np.array([1.0, 1.0], dtype=np.float16)
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-3) # 15.9μs -> 14.2μs (11.8% faster)

def test_cosine_nan_vector():
    # Vectors containing NaN should result in NaN
    x = np.array([np.nan, 1.0], dtype=np.float32)
    y = np.array([1.0, 2.0], dtype=np.float32)
    codeflash_output = cosine(x, y); result = codeflash_output # 14.9μs -> 12.9μs (15.7% faster)

def test_cosine_inf_vector():
    # Vectors containing inf should result in NaN (norms become inf, division by inf)
    x = np.array([np.inf, 1.0], dtype=np.float32)
    y = np.array([1.0, 2.0], dtype=np.float32)
    codeflash_output = cosine(x, y); result = codeflash_output # 21.4μs -> 19.8μs (8.17% faster)

def test_cosine_different_dtypes():
    # Vectors with different dtypes (float32 and int32)
    x = np.array([1.0, 2.0, 3.0], dtype=np.float32)
    y = np.array([1, 2, 3], dtype=np.int32)
    expected = 0.0
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 19.7μs -> 18.4μs (6.58% faster)

# ----------- LARGE SCALE TEST CASES -----------

def test_cosine_large_vector():
    # Large vectors (1000 elements) with identical values
    x = np.ones(1000, dtype=np.float32)
    y = np.ones(1000, dtype=np.float32)
    # Identical vectors, should be 0.0
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 13.7μs -> 12.3μs (10.9% faster)

def test_cosine_large_vector_orthogonal():
    # Large vectors (1000 elements), orthogonal (first half 1, second half 0; first half 0, second half 1)
    x = np.concatenate([np.ones(500, dtype=np.float32), np.zeros(500, dtype=np.float32)])
    y = np.concatenate([np.zeros(500, dtype=np.float32), np.ones(500, dtype=np.float32)])
    # Should be orthogonal, cosine distance 1.0
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 13.4μs -> 12.0μs (11.6% faster)

def test_cosine_large_random_vectors():
    # Large random vectors, check value is in [0,2]
    rng = np.random.default_rng(42)
    x = rng.standard_normal(1000).astype(np.float32)
    y = rng.standard_normal(1000).astype(np.float32)
    codeflash_output = cosine(x, y); result = codeflash_output # 13.6μs -> 12.7μs (7.59% faster)

def test_cosine_large_sparse_vectors():
    # Large sparse vectors (mostly zeros)
    x = np.zeros(1000, dtype=np.float32)
    y = np.zeros(1000, dtype=np.float32)
    x[123] = 1.0
    y[456] = 1.0
    # Should be orthogonal, cosine distance 1.0
    codeflash_output = pytest.approx(cosine(x, y), abs=1e-7) # 13.8μs -> 13.4μs (2.82% faster)

def test_cosine_large_scale_performance():
    # Performance test: compute cosine for 1000 vector pairs
    # Not a strict timing test, but checks that all values are valid and in [0,2]
    rng = np.random.default_rng(123)
    for _ in range(1000):
        x = rng.standard_normal(100).astype(np.float32)
        y = rng.standard_normal(100).astype(np.float32)
        codeflash_output = cosine(x, y); result = codeflash_output # 4.02ms -> 3.78ms (6.48% faster)

# ----------- ERROR HANDLING TEST CASES -----------

def test_cosine_shape_mismatch():
    # Vectors of different shapes should raise ValueError from np.dot
    x = np.array([1.0, 2.0, 3.0], dtype=np.float32)
    y = np.array([1.0, 2.0], dtype=np.float32)
    with pytest.raises(ValueError):
        cosine(x, y) # 8.41μs -> 19.5μs (56.8% slower)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from chromadb.utils.distance_functions import cosine

To edit these changes git checkout codeflash/optimize-cosine-mh2jao0v and push.

Codeflash

**Key optimizations:**

- **Avoided duplicate norm computation**: Previously, `np.linalg.norm(x)` and `np.linalg.norm(y)` were called twice (once each). Now, each is computed only once and stored in a local variable.
- **Minimized `.item()`**: The value from the computation is always immediately converted to a Python `float` with `float()`; this is equivalent to `.item()` for 0D numpy arrays and works for scalars, with no loss of behavior but slightly improved performance by avoiding an extra method call and temporary scalar creation.
- **No change to input mutation, function signature, exceptions, or observable output**.
- **Comment preservation**: All comments remain attached to the relevant logic.

This rewrite is safe, robust, and measurably more efficient, especially for high-frequency calls.
@codeflash-ai codeflash-ai bot requested a review from mashraf-222 October 22, 2025 21:57
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Oct 22, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

0 participants