Skip to content

Commit 2686682

Browse files
committed
Might work?
1 parent 5c0a028 commit 2686682

File tree

3 files changed

+26
-78
lines changed

3 files changed

+26
-78
lines changed

codeflash/code_utils/git_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sys
66
import tempfile
77
import time
8+
from functools import cache
89
from io import StringIO
910
from pathlib import Path
1011
from typing import TYPE_CHECKING
@@ -79,6 +80,7 @@ def get_git_remotes(repo: Repo) -> list[str]:
7980
return [remote.name for remote in repository.remotes]
8081

8182

83+
@cache
8284
def get_repo_owner_and_name(repo: Repo | None = None, git_remote: str | None = "origin") -> tuple[str, str]:
8385
remote_url = get_remote_url(repo, git_remote) # call only once
8486
remote_url = remote_url.removesuffix(".git") if remote_url.endswith(".git") else remote_url

codeflash/discovery/functions_to_optimize.py

Lines changed: 20 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
module_name_from_file_path,
2323
path_belongs_to_site_packages,
2424
)
25-
from codeflash.code_utils.config_consts import REPEAT_OPTIMIZATION_PROBABILITY
2625
from codeflash.code_utils.env_utils import get_pr_number
2726
from codeflash.code_utils.git_utils import get_git_diff, get_repo_owner_and_name
2827
from codeflash.code_utils.time_utils import humanize_runtime
@@ -34,6 +33,7 @@
3433
from libcst import CSTNode
3534
from libcst.metadata import CodeRange
3635

36+
from codeflash.models.models import CodeOptimizationContext
3737
from codeflash.verification.verification_utils import TestConfig
3838

3939

@@ -127,8 +127,8 @@ class FunctionToOptimize:
127127
function_name: str
128128
file_path: Path
129129
parents: list[FunctionParent] # list[ClassDef | FunctionDef | AsyncFunctionDef]
130-
starting_line: Optional[int] = None
131-
ending_line: Optional[int] = None
130+
starting_line: int | None = None
131+
ending_line: int | None = None
132132

133133
@property
134134
def top_level_parent_name(self) -> str:
@@ -147,47 +147,6 @@ def qualified_name(self) -> str:
147147
def qualified_name_with_modules_from_root(self, project_root_path: Path) -> str:
148148
return f"{module_name_from_file_path(self.file_path, project_root_path)}.{self.qualified_name}"
149149

150-
def get_code_context_hash(self) -> str:
151-
"""Generate a SHA-256 hash representing the code context of this function.
152-
153-
This hash includes the function's code content, file path, and qualified name
154-
to uniquely identify the function for optimization tracking.
155-
"""
156-
try:
157-
with open(self.file_path, encoding="utf-8") as f:
158-
file_content = f.read()
159-
160-
# Extract the function's code content
161-
lines = file_content.splitlines()
162-
print("starting and ending line ", self.starting_line, self.ending_line)
163-
if self.starting_line is not None and self.ending_line is not None:
164-
# Use line numbers if available (1-indexed to 0-indexed)
165-
function_content = "\n".join(lines[self.starting_line - 1 : self.ending_line])
166-
else:
167-
# Fallback: use the entire file content if line numbers aren't available
168-
function_content = file_content
169-
170-
# Create a context string that includes:
171-
# - File path (relative to make it portable)
172-
# - Qualified function name
173-
# - Function code content
174-
context_parts = [
175-
str(self.file_path.name), # Just filename for portability
176-
self.qualified_name,
177-
function_content.strip(),
178-
]
179-
180-
context_string = "\n---\n".join(context_parts)
181-
182-
# Generate SHA-256 hash
183-
return hashlib.sha256(context_string.encode("utf-8")).hexdigest()
184-
185-
except OSError as e:
186-
logger.warning(f"Could not read file {self.file_path} for hashing: {e}")
187-
# Fallback hash using available metadata
188-
fallback_string = f"{self.file_path.name}:{self.qualified_name}"
189-
return hashlib.sha256(fallback_string.encode("utf-8")).hexdigest()
190-
191150

192151
def get_functions_to_optimize(
193152
optimize_all: str | None,
@@ -461,58 +420,55 @@ def inspect_top_level_functions_or_methods(
461420
)
462421

463422

464-
def check_optimization_status(functions_by_file: dict[Path, list[FunctionToOptimize]], project_root_path: Path) -> set[tuple[str, str]]:
423+
def check_optimization_status(function_to_optimize: FunctionToOptimize, code_context: CodeOptimizationContext) -> bool:
465424
"""Check which functions have already been optimized and filter them out.
466425
467426
This function calls the optimization API to:
468427
1. Check which functions are already optimized
469428
2. Log new function hashes to the database
470429
3. Return only functions that need optimization
471430
472-
Args:
473-
functions_by_file: Dictionary mapping file paths to lists of functions
474-
owner: Repository owner
475-
repo: Repository name
476-
pr_number: Pull request number
477-
478431
Returns:
479432
Tuple of (filtered_functions_dict, remaining_count)
480433
481434
"""
482435
# Check optimization status if repository info is provided
483436
# already_optimized_count = 0
484437
try:
485-
repository = git.Repo(search_parent_directories=True)
486-
owner, repo = get_repo_owner_and_name(repository)
438+
owner, repo = get_repo_owner_and_name()
487439
except git.exc.InvalidGitRepositoryError:
488440
logger.warning("No git repository found")
489441
owner, repo = None, None
490442
pr_number = get_pr_number()
491443

492444
if not owner or not repo or pr_number is None:
493-
return []
445+
return False
494446

495447
code_contexts = []
496448

497-
for file_path, functions in functions_by_file.items():
498-
for func in functions:
499-
func_hash = func.get_code_context_hash()
500-
# Use a unique path identifier that includes function info
501-
code_contexts.append({"file_path": Path(file_path).relative_to(project_root_path),
502-
"function_name": func.qualified_name, "code_hash": func_hash})
449+
func_hash = hashlib.sha256(code_context.hashing_code_context.encode("utf-8")).hexdigest()
450+
# Use a unique path identifier that includes function info
451+
452+
code_contexts.append(
453+
{
454+
"file_path": function_to_optimize.file_path,
455+
"function_name": function_to_optimize.qualified_name,
456+
"code_hash": func_hash,
457+
}
458+
)
503459

504460
if not code_contexts:
505-
return set(tuple())
461+
return False
506462

507463
try:
508464
result = is_function_being_optimized_again(owner, repo, pr_number, code_contexts)
509465
already_optimized_paths: list[tuple[str, str]] = result.get("already_optimized_tuples", [])
510-
return set(( project_root_path / Path(path[0]), path[1]) for path in already_optimized_paths)
466+
return len(already_optimized_paths) > 0
511467

512468
except Exception as e:
513469
logger.warning(f"Failed to check optimization status: {e}")
514470
# Return all functions if API call fails
515-
return set(tuple())
471+
return False
516472

517473

518474
def filter_functions(
@@ -528,8 +484,7 @@ def filter_functions(
528484
blocklist_funcs = get_blocklisted_functions()
529485
logger.debug(f"Blocklisted functions: {blocklist_funcs}")
530486
# Remove any function that we don't want to optimize
531-
already_optimized_paths = check_optimization_status(modified_functions, project_root)
532-
487+
# already_optimized_paths = check_optimization_status(modified_functions, project_root)
533488

534489
# Ignore files with submodule path, cache the submodule paths
535490
submodule_paths = ignored_submodule_paths(module_root)
@@ -543,7 +498,6 @@ def filter_functions(
543498
already_optimized_count: int = 0
544499
submodule_ignored_paths_count: int = 0
545500
blocklist_funcs_removed_count: int = 0
546-
already_optimized_paths_removed_count: int = 0
547501
previous_checkpoint_functions_removed_count: int = 0
548502
tests_root_str = str(tests_root)
549503
module_root_str = str(module_root)
@@ -590,17 +544,6 @@ def filter_functions(
590544
# This function is NOT in blocklist. we can keep it
591545
functions_tmp.append(function)
592546
_functions = functions_tmp
593-
functions_tmp = []
594-
for function in _functions:
595-
if (
596-
function.file_path,
597-
function.qualified_name,
598-
) in already_optimized_paths and random.random() > REPEAT_OPTIMIZATION_PROBABILITY:
599-
# This function is in blocklist, we can skip it with a probability
600-
already_optimized_paths_removed_count += 1
601-
continue
602-
functions_tmp.append(function)
603-
_functions = functions_tmp
604547

605548
if previous_checkpoint_functions:
606549
functions_tmp = []
@@ -625,7 +568,6 @@ def filter_functions(
625568
f"{already_optimized_count} already optimized function{'s' if already_optimized_count != 1 else ''}": already_optimized_count,
626569
f"{blocklist_funcs_removed_count} function{'s' if blocklist_funcs_removed_count != 1 else ''} as previously optimized": blocklist_funcs_removed_count,
627570
f"{previous_checkpoint_functions_removed_count} function{'s' if previous_checkpoint_functions_removed_count != 1 else ''} skipped from checkpoint": previous_checkpoint_functions_removed_count,
628-
f"{already_optimized_paths_removed_count} function{'s' if already_optimized_paths_removed_count != 1 else ''} as previously attempted optimization": already_optimized_paths_removed_count,
629571
}
630572
log_string = "\n".join([k for k, v in log_info.items() if v > 0])
631573
if log_string:

codeflash/optimization/function_optimizer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
from codeflash.code_utils.time_utils import humanize_runtime
5454
from codeflash.context import code_context_extractor
5555
from codeflash.context.unused_definition_remover import detect_unused_helper_functions, revert_unused_helper_functions
56+
from codeflash.discovery.functions_to_optimize import check_optimization_status
5657
from codeflash.either import Failure, Success, is_successful
5758
from codeflash.models.ExperimentMetadata import ExperimentMetadata
5859
from codeflash.models.models import (
@@ -151,8 +152,11 @@ def optimize_function(self) -> Result[BestOptimization, str]: # noqa: PLR0911
151152
with helper_function_path.open(encoding="utf8") as f:
152153
helper_code = f.read()
153154
original_helper_code[helper_function_path] = helper_code
155+
154156
if has_any_async_functions(code_context.read_writable_code):
155157
return Failure("Codeflash does not support async functions in the code to optimize.")
158+
if check_optimization_status(self.function_to_optimize, code_context):
159+
return Failure("This function has already been optimized, skipping.")
156160

157161
code_print(code_context.read_writable_code)
158162
generated_test_paths = [

0 commit comments

Comments
 (0)