Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions codeflash/optimization/function_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from rich.tree import Tree

from codeflash.api.aiservice import AiServiceClient, AIServiceRefinerRequest, LocalAiServiceClient
from codeflash.api.cfapi import add_code_context_hash, create_staging, mark_optimization_success
from codeflash.api.cfapi import CFAPI_BASE_URL, add_code_context_hash, create_staging, mark_optimization_success
from codeflash.benchmarking.utils import process_benchmark_data
from codeflash.cli_cmds.console import code_print, console, logger, lsp_log, progress_bar
from codeflash.code_utils import env_utils
Expand Down Expand Up @@ -1475,7 +1475,7 @@ def process_review(
elif staging_review:
response = create_staging(**data)
if response.status_code == 200:
staging_url = f"https://app.codeflash.ai/review-optimizations/{self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id}"
staging_url = f"{CFAPI_BASE_URL}/review-optimizations/{self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id}"
console.print(
Panel(
f"[bold green]✅ Staging created:[/bold green]\n[link={staging_url}]{staging_url}[/link]",
Expand Down
11 changes: 11 additions & 0 deletions codeflash/result/common_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from __future__ import annotations


def find_common_tags(articles: list[dict[str, list[str]]]) -> set[str]:
if not articles:
return set()

common_tags = articles[0].get("tags", [])
for article in articles[1:]:
common_tags = [tag for tag in common_tags if tag in article.get("tags", [])]
return set(common_tags)
Comment on lines +8 to +11
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚡️Codeflash found 7,954% (79.54x) speedup for find_common_tags in codeflash/result/common_tags.py

⏱️ Runtime : 577 milliseconds 7.16 milliseconds (best of 93 runs)

📝 Explanation and details

The optimized version achieves a 79x speedup by making three key changes:

1. Using sets instead of list comprehensions: The original code used [tag for tag in common_tags if tag in article.get("tags", [])] which has O(n×m) complexity for each iteration (checking if each tag exists in the article's tag list). The optimized version uses set.intersection_update() which is O(min(n,m)) - significantly faster for set operations.

2. Early termination: Added if not common_tags: break to exit the loop as soon as no common tags remain. This prevents unnecessary processing of remaining articles when the result is already determined to be empty.

3. Eliminating final set conversion: The original code maintained a list and converted to a set at the end with return set(common_tags). The optimized version works directly with sets throughout, avoiding the conversion overhead.

The performance gains are most dramatic for large datasets - the line profiler shows the bottleneck line (list comprehension) went from 99.6% of execution time to being eliminated entirely. Test cases with large tag sets see improvements of 5400%+ (like test_large_number_of_tags) and 11000%+ (like large-scale tests), while smaller datasets still benefit with 15-50% improvements. The early termination is particularly effective when articles have no common tags, as seen in the "no common tags" test cases showing 25%+ speedups.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 2 Passed
🌀 Generated Regression Tests 29 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 2 Passed
📊 Tests Coverage 100.0%
⚙️ Existing Unit Tests and Runtime
Test File::Test Function Original ⏱️ Optimized ⏱️ Speedup
test_common_tags.py::test_common_tags_1 5.68μs 4.23μs 34.3%✅
🌀 Generated Regression Tests and Runtime
# imports
# function to test
from __future__ import annotations

import pytest  # used for our unit tests
from codeflash.result.common_tags import find_common_tags

# unit tests

def test_single_article():
    # Single article should return its tags
    articles = [{"tags": ["python", "coding", "tutorial"]}]
    codeflash_output = find_common_tags(articles) # 1.63μs -> 1.43μs (14.0% faster)
    # Outputs were verified to be equal to the original implementation

def test_multiple_articles_with_common_tags():
    # Multiple articles with common tags should return the common tags
    articles = [
        {"tags": ["python", "coding"]},
        {"tags": ["python", "data"]},
        {"tags": ["python", "machine learning"]}
    ]
    codeflash_output = find_common_tags(articles) # 2.83μs -> 2.46μs (15.5% faster)
    # Outputs were verified to be equal to the original implementation

def test_empty_list_of_articles():
    # Empty list of articles should return an empty set
    articles = []
    codeflash_output = find_common_tags(articles) # 601ns -> 491ns (22.4% faster)
    # Outputs were verified to be equal to the original implementation

def test_articles_with_no_common_tags():
    # Articles with no common tags should return an empty set
    articles = [
        {"tags": ["python"]},
        {"tags": ["java"]},
        {"tags": ["c++"]}
    ]
    codeflash_output = find_common_tags(articles) # 2.37μs -> 1.89μs (25.4% faster)
    # Outputs were verified to be equal to the original implementation

def test_articles_with_empty_tag_lists():
    # Articles with some empty tag lists should return an empty set
    articles = [
        {"tags": []},
        {"tags": ["python"]},
        {"tags": ["python", "java"]}
    ]
    codeflash_output = find_common_tags(articles) # 1.97μs -> 1.67μs (17.9% faster)
    # Outputs were verified to be equal to the original implementation

def test_all_articles_with_empty_tag_lists():
    # All articles with empty tag lists should return an empty set
    articles = [
        {"tags": []},
        {"tags": []},
        {"tags": []}
    ]
    codeflash_output = find_common_tags(articles) # 1.92μs -> 1.59μs (20.7% faster)
    # Outputs were verified to be equal to the original implementation

def test_tags_with_special_characters():
    # Tags with special characters should be handled correctly
    articles = [
        {"tags": ["python!", "coding"]},
        {"tags": ["python!", "data"]}
    ]
    codeflash_output = find_common_tags(articles) # 2.23μs -> 2.06μs (8.28% faster)
    # Outputs were verified to be equal to the original implementation

def test_case_sensitivity():
    # Tags with different cases should not be considered the same
    articles = [
        {"tags": ["Python", "coding"]},
        {"tags": ["python", "data"]}
    ]
    codeflash_output = find_common_tags(articles) # 2.07μs -> 1.87μs (10.7% faster)
    # Outputs were verified to be equal to the original implementation

def test_large_number_of_articles():
    # Large number of articles with a common tag should return that tag
    articles = [{"tags": ["common_tag", f"tag{i}"]} for i in range(1000)]
    codeflash_output = find_common_tags(articles) # 229μs -> 154μs (48.2% faster)
    # Outputs were verified to be equal to the original implementation

def test_large_number_of_tags():
    # Large number of tags with some common tags should return the common tags
    articles = [
        {"tags": [f"tag{i}" for i in range(1000)]},
        {"tags": [f"tag{i}" for i in range(500, 1500)]}
    ]
    expected = {f"tag{i}" for i in range(500, 1000)}
    codeflash_output = find_common_tags(articles) # 4.38ms -> 78.6μs (5474% faster)
    # Outputs were verified to be equal to the original implementation

def test_mixed_length_of_tag_lists():
    # Articles with mixed length of tag lists should return the common tags
    articles = [
        {"tags": ["python", "coding"]},
        {"tags": ["python"]},
        {"tags": ["python", "coding", "tutorial"]}
    ]
    codeflash_output = find_common_tags(articles) # 2.65μs -> 2.22μs (19.4% faster)
    # Outputs were verified to be equal to the original implementation

def test_tags_with_different_data_types():
    # Tags with different data types should only consider strings
    articles = [
        {"tags": ["python", 123]},
        {"tags": ["python", "123"]}
    ]
    codeflash_output = find_common_tags(articles) # 2.23μs -> 1.97μs (13.2% faster)
    # Outputs were verified to be equal to the original implementation

def test_performance_with_large_data():
    # Performance with large data should return the common tag
    articles = [{"tags": ["common_tag", f"tag{i}"]} for i in range(10000)]
    codeflash_output = find_common_tags(articles) # 2.27ms -> 1.52ms (48.8% faster)
    # Outputs were verified to be equal to the original implementation

def test_scalability_with_increasing_tags():
    # Scalability with increasing tags should return the common tag
    articles = [{"tags": ["common_tag"] + [f"tag{i}" for i in range(j)]} for j in range(1, 1001)]
    codeflash_output = find_common_tags(articles) # 392μs -> 259μs (51.2% faster)
    # Outputs were verified to be equal to the original implementation
#------------------------------------------------
# imports
# function to test
from __future__ import annotations

import pytest  # used for our unit tests
from codeflash.result.common_tags import find_common_tags

# unit tests

def test_empty_input_list():
    # Test with an empty list
    codeflash_output = find_common_tags([]) # 561ns -> 551ns (1.81% faster)
    # Outputs were verified to be equal to the original implementation

def test_single_article():
    # Test with a single article with tags
    codeflash_output = find_common_tags([{"tags": ["python", "coding", "development"]}]) # 1.44μs -> 1.28μs (12.4% faster)
    # Test with a single article with no tags
    codeflash_output = find_common_tags([{"tags": []}]) # 591ns -> 510ns (15.9% faster)
    # Outputs were verified to be equal to the original implementation

def test_multiple_articles_some_common_tags():
    # Test with multiple articles having some common tags
    articles = [
        {"tags": ["python", "coding", "development"]},
        {"tags": ["python", "development", "tutorial"]},
        {"tags": ["python", "development", "guide"]}
    ]
    codeflash_output = find_common_tags(articles) # 2.88μs -> 2.44μs (18.1% faster)

    articles = [
        {"tags": ["tech", "news"]},
        {"tags": ["tech", "gadgets"]},
        {"tags": ["tech", "reviews"]}
    ]
    codeflash_output = find_common_tags(articles) # 1.57μs -> 1.15μs (36.5% faster)
    # Outputs were verified to be equal to the original implementation

def test_multiple_articles_no_common_tags():
    # Test with multiple articles having no common tags
    articles = [
        {"tags": ["python", "coding"]},
        {"tags": ["development", "tutorial"]},
        {"tags": ["guide", "learning"]}
    ]
    codeflash_output = find_common_tags(articles) # 2.29μs -> 2.00μs (14.5% faster)

    articles = [
        {"tags": ["apple", "banana"]},
        {"tags": ["orange", "grape"]},
        {"tags": ["melon", "kiwi"]}
    ]
    codeflash_output = find_common_tags(articles) # 1.23μs -> 972ns (26.7% faster)
    # Outputs were verified to be equal to the original implementation

def test_articles_with_duplicate_tags():
    # Test with articles having duplicate tags
    articles = [
        {"tags": ["python", "python", "coding"]},
        {"tags": ["python", "development", "python"]},
        {"tags": ["python", "guide", "python"]}
    ]
    codeflash_output = find_common_tags(articles) # 2.83μs -> 2.41μs (17.0% faster)

    articles = [
        {"tags": ["tech", "tech", "news"]},
        {"tags": ["tech", "tech", "gadgets"]},
        {"tags": ["tech", "tech", "reviews"]}
    ]
    codeflash_output = find_common_tags(articles) # 1.59μs -> 1.17μs (35.8% faster)
    # Outputs were verified to be equal to the original implementation

def test_articles_with_mixed_case_tags():
    # Test with articles having mixed case tags
    articles = [
        {"tags": ["Python", "Coding"]},
        {"tags": ["python", "Development"]},
        {"tags": ["PYTHON", "Guide"]}
    ]
    codeflash_output = find_common_tags(articles) # 2.23μs -> 1.90μs (17.4% faster)

    articles = [
        {"tags": ["Tech", "News"]},
        {"tags": ["tech", "Gadgets"]},
        {"tags": ["TECH", "Reviews"]}
    ]
    codeflash_output = find_common_tags(articles) # 1.06μs -> 901ns (17.9% faster)
    # Outputs were verified to be equal to the original implementation

def test_articles_with_non_string_tags():
    # Test with articles having non-string tags
    articles = [
        {"tags": ["python", 123, "coding"]},
        {"tags": ["python", "development", 123]},
        {"tags": ["python", "guide", 123]}
    ]
    codeflash_output = find_common_tags(articles) # 2.85μs -> 2.52μs (13.1% faster)

    articles = [
        {"tags": [None, "news"]},
        {"tags": ["tech", None]},
        {"tags": [None, "reviews"]}
    ]
    codeflash_output = find_common_tags(articles) # 1.62μs -> 1.20μs (35.0% faster)
    # Outputs were verified to be equal to the original implementation

def test_large_scale_test_cases():
    # Test with large scale input where all tags should be common
    articles = [
        {"tags": ["tag" + str(i) for i in range(1000)]} for _ in range(100)
    ]
    expected_output = {"tag" + str(i) for i in range(1000)}
    codeflash_output = find_common_tags(articles) # 380ms -> 3.44ms (10974% faster)

    # Test with large scale input where no tags should be common
    articles = [
        {"tags": ["tag" + str(i) for i in range(1000)]} for _ in range(50)
    ] + [{"tags": ["unique_tag"]}]
    codeflash_output = find_common_tags(articles) # 188ms -> 1.66ms (11249% faster)
    # Outputs were verified to be equal to the original implementation
#------------------------------------------------
from codeflash.result.common_tags import find_common_tags

def test_find_common_tags():
    find_common_tags([{}, {}])

def test_find_common_tags_2():
    find_common_tags([])
🔎 Concolic Coverage Tests and Runtime
Test File::Test Function Original ⏱️ Optimized ⏱️ Speedup
codeflash_concolic_g9hfh7kd/tmp2gmm179f/test_concolic_coverage.py::test_find_common_tags 1.96μs 1.80μs 8.93%✅
codeflash_concolic_g9hfh7kd/tmp2gmm179f/test_concolic_coverage.py::test_find_common_tags_2 671ns 501ns 33.9%✅

To test or edit this optimization locally git merge codeflash/optimize-pr827-2025-10-16T18.41.45

Suggested change
common_tags = articles[0].get("tags", [])
for article in articles[1:]:
common_tags = [tag for tag in common_tags if tag in article.get("tags", [])]
return set(common_tags)
common_tags = set(articles[0].get("tags", []))
for article in articles[1:]:
common_tags.intersection_update(article.get("tags", []))
if not common_tags:
break
return common_tags

22 changes: 22 additions & 0 deletions tests/test_common_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from codeflash.result.common_tags import find_common_tags


def test_common_tags_1() -> None:
articles_1 = [
{"title": "Article 1", "tags": ["Python", "AI", "ML"]},
{"title": "Article 2", "tags": ["Python", "Data Science", "AI"]},
{"title": "Article 3", "tags": ["Python", "AI", "Big Data"]},
]

expected = {"Python", "AI"}

assert find_common_tags(articles_1) == expected

articles_2 = [
{"title": "Article 1", "tags": ["Python", "AI", "ML"]},
{"title": "Article 2", "tags": ["Python", "Data Science", "AI"]},
{"title": "Article 3", "tags": ["Python", "AI", "Big Data"]},
{"title": "Article 4", "tags": ["Python", "AI", "ML"]},
]

assert find_common_tags(articles_2) == expected
Loading