Skip to content

Commit 1cc39e3

Browse files
committed
merge
2 parents 4cb823e + 87fe086 commit 1cc39e3

File tree

2 files changed

+37
-54
lines changed

2 files changed

+37
-54
lines changed

codeflash/api/cfapi.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
from __future__ import annotations
22

3-
import hashlib
43
import json
54
import os
65
import sys
76
from functools import lru_cache
87
from pathlib import Path
9-
from typing import TYPE_CHECKING, Any, Optional, Dict
8+
from typing import TYPE_CHECKING, Any, Dict, Optional
109

1110
import requests
1211
import sentry_sdk
@@ -15,7 +14,6 @@
1514
from codeflash.cli_cmds.console import console, logger
1615
from codeflash.code_utils.env_utils import ensure_codeflash_api_key, get_codeflash_api_key, get_pr_number
1716
from codeflash.code_utils.git_utils import get_repo_owner_and_name
18-
from codeflash.models.models import CodeOptimizationContext
1917
from codeflash.version import __version__
2018

2119
if TYPE_CHECKING:
@@ -200,12 +198,7 @@ def is_function_being_optimized_again(owner: str, repo: str, pr_number: int, cod
200198
response = make_cfapi_request(
201199
"/is-already-optimized",
202200
"POST",
203-
{
204-
"owner": owner,
205-
"repo": repo,
206-
"pr_number": pr_number,
207-
"code_contexts": code_contexts
208-
}
201+
{"owner": owner, "repo": repo, "pr_number": pr_number, "code_contexts": code_contexts},
209202
)
210203
response.raise_for_status()
211204
return response.json()

codeflash/discovery/functions_to_optimize.py

Lines changed: 35 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222
module_name_from_file_path,
2323
path_belongs_to_site_packages,
2424
)
25+
from codeflash.code_utils.config_consts import REPEAT_OPTIMIZATION_PROBABILITY
2526
from codeflash.code_utils.env_utils import get_pr_number
2627
from codeflash.code_utils.git_utils import get_git_diff, get_repo_owner_and_name
2728
from codeflash.code_utils.time_utils import humanize_runtime
2829
from codeflash.discovery.discover_unit_tests import discover_unit_tests
2930
from codeflash.models.models import FunctionParent
3031
from codeflash.telemetry.posthog_cf import ph
31-
from codeflash.code_utils.config_consts import REPEAT_OPTIMIZATION_PROBABILITY
3232

3333
if TYPE_CHECKING:
3434
from libcst import CSTNode
@@ -460,9 +460,7 @@ def inspect_top_level_functions_or_methods(
460460
)
461461

462462

463-
def check_optimization_status(
464-
functions_by_file: dict[Path, list[FunctionToOptimize]], owner: str, repo: str, pr_number: int
465-
) -> tuple[dict[Path, list[FunctionToOptimize]], int]:
463+
def check_optimization_status(functions_by_file: dict[Path, list[FunctionToOptimize]]) -> list[tuple[str, str]]:
466464
"""Check which functions have already been optimized and filter them out.
467465
468466
This function calls the optimization API to:
@@ -480,7 +478,19 @@ def check_optimization_status(
480478
Tuple of (filtered_functions_dict, remaining_count)
481479
482480
"""
483-
# Build the code_contexts dictionary for the API call
481+
# Check optimization status if repository info is provided
482+
# already_optimized_count = 0
483+
try:
484+
repository = git.Repo(search_parent_directories=True)
485+
owner, repo = get_repo_owner_and_name(repository)
486+
except git.exc.InvalidGitRepositoryError:
487+
logger.warning("No git repository found")
488+
owner, repo = None, None
489+
pr_number = get_pr_number()
490+
491+
if not owner or not repo or pr_number is None:
492+
return []
493+
484494
code_contexts = {}
485495
path_to_function_map = {}
486496

@@ -497,29 +507,13 @@ def check_optimization_status(
497507

498508
try:
499509
result = is_function_being_optimized_again(owner, repo, pr_number, code_contexts)
500-
already_optimized_paths = set(result.get("already_optimized_paths", []))
501-
502-
# Filter out already optimized functions
503-
filtered_functions = defaultdict(list)
504-
remaining_count = 0
505-
506-
for path_key, (file_path, func) in path_to_function_map.items():
507-
if path_key not in already_optimized_paths:
508-
filtered_functions[file_path].append(func)
509-
remaining_count += 1
510-
else:
511-
if random.random() < REPEAT_OPTIMIZATION_PROBABILITY:
512-
logger.info(f"Attempting more optimization on {path_key}")
513-
filtered_functions[file_path].append(func)
514-
remaining_count += 1
515-
516-
return dict(filtered_functions), remaining_count
510+
already_optimized_paths: list[tuple[str, str]] = result.get("already_optimized_paths", [])
511+
return already_optimized_paths
517512

518513
except Exception as e:
519514
logger.warning(f"Failed to check optimization status: {e}")
520515
# Return all functions if API call fails
521-
total_count = sum(len(funcs) for funcs in functions_by_file.values())
522-
return functions_by_file, total_count
516+
return []
523517

524518

525519
def filter_functions(
@@ -531,33 +525,25 @@ def filter_functions(
531525
previous_checkpoint_functions: dict[Path, dict[str, Any]] | None = None,
532526
disable_logs: bool = False, # noqa: FBT001, FBT002
533527
) -> tuple[dict[Path, list[FunctionToOptimize]], int]:
528+
filtered_modified_functions: dict[str, list[FunctionToOptimize]] = {}
534529
blocklist_funcs = get_blocklisted_functions()
535-
already_optimized_count = 0
536-
path_based_functions = {Path(k): v for k, v in modified_functions.items() if v}
537-
try:
538-
repository = git.Repo(Path.cwd(), search_parent_directories=True)
539-
owner, repo = get_repo_owner_and_name(repository)
540-
except git.exc.InvalidGitRepositoryError:
541-
logger.warning("No git repository found")
542-
owner, repo = None, None
543-
pr_number = get_pr_number()
544-
if owner and repo and pr_number is not None:
545-
path_based_functions, functions_count = check_optimization_status(path_based_functions, owner, repo, pr_number)
546530
logger.debug(f"Blocklisted functions: {blocklist_funcs}")
547531
# Remove any function that we don't want to optimize
532+
already_optimized_paths = check_optimization_status(modified_functions)
548533

549534
# Ignore files with submodule path, cache the submodule paths
550535
submodule_paths = ignored_submodule_paths(module_root)
551536

552-
filtered_modified_functions: dict[str, list[FunctionToOptimize]] = {}
553537
functions_count: int = 0
554538
test_functions_removed_count: int = 0
555539
non_modules_removed_count: int = 0
556540
site_packages_removed_count: int = 0
557541
ignore_paths_removed_count: int = 0
558542
malformed_paths_count: int = 0
543+
already_optimized_count: int = 0
559544
submodule_ignored_paths_count: int = 0
560545
blocklist_funcs_removed_count: int = 0
546+
already_optimized_paths_removed_count: int = 0
561547
previous_checkpoint_functions_removed_count: int = 0
562548
tests_root_str = str(tests_root)
563549
module_root_str = str(module_root)
@@ -590,6 +576,7 @@ def filter_functions(
590576
except SyntaxError:
591577
malformed_paths_count += 1
592578
continue
579+
593580
if blocklist_funcs:
594581
functions_tmp = []
595582
for function in _functions:
@@ -603,6 +590,17 @@ def filter_functions(
603590
# This function is NOT in blocklist. we can keep it
604591
functions_tmp.append(function)
605592
_functions = functions_tmp
593+
functions_tmp = []
594+
for function in _functions:
595+
if (
596+
function.file_path.name,
597+
function.qualified_name,
598+
) in already_optimized_paths and random.random() > REPEAT_OPTIMIZATION_PROBABILITY:
599+
# This function is in blocklist, we can skip it with a probability
600+
already_optimized_paths_removed_count += 1
601+
continue
602+
functions_tmp.append(function)
603+
_functions = functions_tmp
606604

607605
if previous_checkpoint_functions:
608606
functions_tmp = []
@@ -616,14 +614,6 @@ def filter_functions(
616614
filtered_modified_functions[file_path] = _functions
617615
functions_count += len(_functions)
618616

619-
# Convert to Path keys for optimization check
620-
621-
622-
# Check optimization status if repository info is provided
623-
624-
initial_count = sum(len(funcs) for funcs in filtered_modified_functions.values())
625-
already_optimized_count = initial_count - functions_count
626-
627617
if not disable_logs:
628618
log_info = {
629619
f"{test_functions_removed_count} test function{'s' if test_functions_removed_count != 1 else ''}": test_functions_removed_count,
@@ -641,7 +631,7 @@ def filter_functions(
641631
logger.info(f"Ignoring: {log_string}")
642632
console.rule()
643633

644-
return path_based_functions, functions_count
634+
return {Path(k): v for k, v in filtered_modified_functions.items() if v}, functions_count
645635

646636

647637
def filter_files_optimized(file_path: Path, tests_root: Path, ignore_paths: list[Path], module_root: Path) -> bool:

0 commit comments

Comments
 (0)