end to end test that proves picklepatcher works. example shown is a socket (which is unpickleable) that's used or not used

alvin-r · alvin-r · commit 3158f9cc1cf9 · 2025-04-10T21:43:56.000-04:00
diff --git a/code_to_optimize/bubble_sort_picklepatch_test_unused_socket.py b/code_to_optimize/bubble_sort_picklepatch_test_unused_socket.py
@@ -0,0 +1,18 @@
+
+from codeflash.benchmarking.codeflash_trace import codeflash_trace
+
+
+@codeflash_trace
+def bubble_sort_with_unused_socket(data_container):
+    # Extract the list to sort, leaving the socket untouched
+    numbers = data_container.get('numbers', []).copy()
+
+    return sorted(numbers)
+
+@codeflash_trace
+def bubble_sort_with_used_socket(data_container):
+    # Extract the list to sort, leaving the socket untouched
+    numbers = data_container.get('numbers', []).copy()
+    socket = data_container.get('socket')
+    socket.send("Hello from the optimized function!")
+    return sorted(numbers)
diff --git a/code_to_optimize/bubble_sort_picklepatch_test_used_socket.py b/code_to_optimize/bubble_sort_picklepatch_test_used_socket.py
@@ -1,38 +1,6 @@
-def bubble_sort_with_unused_socket(data_container):
-    """
-    Performs a bubble sort on a list within the data_container. The data container has the following schema:
-    - 'numbers' (list): The list to be sorted.
-    - 'socket' (socket): A socket
-
-    Args:
-        data_container: A dictionary with at least 'numbers' (list) and 'socket' keys
-
-    Returns:
-        list: The sorted list of numbers
-    """
-    # Extract the list to sort, leaving the socket untouched
-    numbers = data_container.get('numbers', []).copy()
-
-    # Classic bubble sort implementation
-    n = len(numbers)
-    for i in range(n):
-        # Flag to optimize by detecting if no swaps occurred
-        swapped = False
-
-        # Last i elements are already in place
-        for j in range(0, n - i - 1):
-            # Swap if the element is greater than the next element
-            if numbers[j] > numbers[j + 1]:
-                numbers[j], numbers[j + 1] = numbers[j + 1], numbers[j]
-                swapped = True
-
-        # If no swapping occurred in this pass, the list is sorted
-        if not swapped:
-            break
-
-    return numbers
-
+from codeflash.benchmarking.codeflash_trace import codeflash_trace
 
+@codeflash_trace
 def bubble_sort_with_used_socket(data_container):
     """
     Performs a bubble sort on a list within the data_container. The data container has the following schema:
diff --git a/code_to_optimize/tests/pytest/benchmarks_socket_test/test_socket.py b/code_to_optimize/tests/pytest/benchmarks_socket_test/test_socket.py
@@ -0,0 +1,20 @@
+import socket
+
+from code_to_optimize.bubble_sort_picklepatch_test_unused_socket import bubble_sort_with_unused_socket
+from code_to_optimize.bubble_sort_picklepatch_test_used_socket import bubble_sort_with_used_socket
+
+def test_socket_picklepatch(benchmark):
+    s1, s2 = socket.socketpair()
+    data = {
+        "numbers": list(reversed(range(500))),
+        "socket": s1
+    }
+    benchmark(bubble_sort_with_unused_socket, data)
+
+def test_used_socket_picklepatch(benchmark):
+    s1, s2 = socket.socketpair()
+    data = {
+        "numbers": list(reversed(range(500))),
+        "socket": s1
+    }
+    benchmark(bubble_sort_with_used_socket, data)
diff --git a/code_to_optimize/tests/pytest/test_bubble_sort_picklepatch.py b/code_to_optimize/tests/pytest/test_bubble_sort_picklepatch.py
diff --git a/codeflash/benchmarking/codeflash_trace.py b/codeflash/benchmarking/codeflash_trace.py
@@ -2,12 +2,11 @@
 import os
 import pickle
 import sqlite3
-import sys
 import threading
 import time
 from typing import Callable
 
-import dill
+from codeflash.picklepatch.pickle_patcher import PicklePatcher
 
 
 class CodeflashTrace:
@@ -147,34 +146,20 @@ def wrapper(*args, **kwargs):
                 return result
 
             try:
-                original_recursion_limit = sys.getrecursionlimit()
-                sys.setrecursionlimit(10000)
-                # args = dict(args.items())
-                # if class_name and func.__name__ == "__init__" and "self" in args:
-                #     del args["self"]
                 # Pickle the arguments
-                pickled_args = pickle.dumps(args, protocol=pickle.HIGHEST_PROTOCOL)
-                pickled_kwargs = pickle.dumps(kwargs, protocol=pickle.HIGHEST_PROTOCOL)
-                sys.setrecursionlimit(original_recursion_limit)
-            except (TypeError, pickle.PicklingError, AttributeError, RecursionError, OSError):
-                # Retry with dill if pickle fails. It's slower but more comprehensive
-                try:
-                    pickled_args = dill.dumps(args, protocol=pickle.HIGHEST_PROTOCOL)
-                    pickled_kwargs = dill.dumps(kwargs, protocol=pickle.HIGHEST_PROTOCOL)
-                    sys.setrecursionlimit(original_recursion_limit)
-
-                except (TypeError, dill.PicklingError, AttributeError, RecursionError, OSError) as e:
-                    print(f"Error pickling arguments for function {func.__name__}: {e}")
-                    # Add to the list of function calls without pickled args. Used for timing info only
-                    self._thread_local.active_functions.remove(func_id)
-                    overhead_time = time.thread_time_ns() - end_time
-                    self.function_calls_data.append(
-                        (func.__name__, class_name, func.__module__, func.__code__.co_filename,
-                         benchmark_function_name, benchmark_module_path, benchmark_line_number, execution_time,
-                         overhead_time, None, None)
-                    )
-                    return result
-
+                pickled_args = PicklePatcher.dumps(args, protocol=pickle.HIGHEST_PROTOCOL)
+                pickled_kwargs = PicklePatcher.dumps(kwargs, protocol=pickle.HIGHEST_PROTOCOL)
+            except Exception as e:
+                print(f"Error pickling arguments for function {func.__name__}: {e}")
+                # Add to the list of function calls without pickled args. Used for timing info only
+                self._thread_local.active_functions.remove(func_id)
+                overhead_time = time.thread_time_ns() - end_time
+                self.function_calls_data.append(
+                    (func.__name__, class_name, func.__module__, func.__code__.co_filename,
+                     benchmark_function_name, benchmark_module_path, benchmark_line_number, execution_time,
+                     overhead_time, None, None)
+                )
+                return result
             # Flush to database every 1000 calls
             if len(self.function_calls_data) > 1000:
                 self.write_function_timings()
diff --git a/codeflash/benchmarking/plugin/plugin.py b/codeflash/benchmarking/plugin/plugin.py
@@ -175,7 +175,6 @@ def get_benchmark_timings(trace_path: Path) -> dict[BenchmarkKey, int]:
                 benchmark_key = BenchmarkKey(module_path=benchmark_file, function_name=benchmark_func)
                 # Subtract overhead from total time
                 overhead = overhead_by_benchmark.get(benchmark_key, 0)
-                print("benchmark_func:", benchmark_func, "Total time:", time_ns, "Overhead:", overhead, "Result:", time_ns - overhead)
                 result[benchmark_key] = time_ns - overhead
 
         finally:
@@ -267,9 +266,9 @@ def _run_benchmark(self, func, *args, **kwargs):
             os.environ["CODEFLASH_BENCHMARK_LINE_NUMBER"] = str(line_number)
             os.environ["CODEFLASH_BENCHMARKING"] = "True"
             # Run the function
-            start = time.thread_time_ns()
+            start = time.time_ns()
             result = func(*args, **kwargs)
-            end = time.thread_time_ns()
+            end = time.time_ns()
             # Reset the environment variable
             os.environ["CODEFLASH_BENCHMARKING"] = "False"
 
diff --git a/codeflash/benchmarking/replay_test.py b/codeflash/benchmarking/replay_test.py
@@ -62,7 +62,7 @@ def create_trace_replay_test_code(
     assert test_framework in ["pytest", "unittest"]
 
     # Create Imports
-    imports = f"""import dill as pickle 
+    imports = f"""from codeflash.picklepatch.pickle_patcher import PicklePatcher as pickle
 {"import unittest" if test_framework == "unittest" else ""}
 from codeflash.benchmarking.replay_test import get_next_arg_and_return
 """
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
@@ -16,7 +16,7 @@
 from enum import Enum, IntEnum
 from pathlib import Path
 from re import Pattern
-from typing import Annotated, Any, Optional, Union, cast
+from typing import Annotated, Optional, cast
 
 from jedi.api.classes import Name
 from pydantic import AfterValidator, BaseModel, ConfigDict, Field
@@ -362,6 +362,7 @@ class FunctionCoverage:
 class TestingMode(enum.Enum):
     BEHAVIOR = "behavior"
     PERFORMANCE = "performance"
+    LINE_PROFILE = "line_profile"
 
 
 class VerificationType(str, Enum):
@@ -533,7 +534,7 @@ def report_to_tree(report: dict[TestType, dict[str, int]], title: str) -> Tree:
             tree.add(
                 f"{test_type.to_name()} - Passed: {report[test_type]['passed']}, Failed: {report[test_type]['failed']}"
             )
-        return
+        return tree
 
     def usable_runtime_data_by_test_case(self) -> dict[InvocationId, list[int]]:
 
@@ -606,4 +607,4 @@ def __eq__(self, other: object) -> bool:
                 sys.setrecursionlimit(original_recursion_limit)
                 return False
         sys.setrecursionlimit(original_recursion_limit)
-        return True
+        return True
diff --git a/codeflash/picklepatch/pickle_placeholder.py b/codeflash/picklepatch/pickle_placeholder.py
@@ -1,3 +1,8 @@
+class PicklePlaceholderAccessError(Exception):
+    """Custom exception raised when attempting to access an unpicklable object."""
+
+
+
 class PicklePlaceholder:
     """A placeholder for an object that couldn't be pickled.
 
@@ -22,22 +27,22 @@ def __init__(self, obj_type, obj_str, error_msg, path=None):
         self.__dict__["path"] = path if path is not None else []
 
     def __getattr__(self, name):
-        """Raise an error when any attribute is accessed."""
+        """Raise a custom error when any attribute is accessed."""
         path_str = ".".join(self.__dict__["path"]) if self.__dict__["path"] else "root object"
-        raise AttributeError(
-            f"Cannot access attribute '{name}' on unpicklable object at {path_str}. "
+        raise PicklePlaceholderAccessError(
+            f"Attempt to access unpickleable object: Cannot access attribute '{name}' on unpicklable object at {path_str}. "
             f"Original type: {self.__dict__['obj_type']}. Error: {self.__dict__['error_msg']}"
         )
 
     def __setattr__(self, name, value):
         """Prevent setting attributes."""
-        self.__getattr__(name)  # This will raise an AttributeError
+        self.__getattr__(name)  # This will raise our custom error
 
     def __call__(self, *args, **kwargs):
-        """Raise an error when the object is called."""
+        """Raise a custom error when the object is called."""
         path_str = ".".join(self.__dict__["path"]) if self.__dict__["path"] else "root object"
-        raise TypeError(
-            f"Cannot call unpicklable object at {path_str}. "
+        raise PicklePlaceholderAccessError(
+            f"Attempt to access unpickleable object: Cannot call unpicklable object at {path_str}. "
             f"Original type: {self.__dict__['obj_type']}. Error: {self.__dict__['error_msg']}"
         )
 
diff --git a/codeflash/verification/comparator.py b/codeflash/verification/comparator.py
@@ -10,6 +10,7 @@
 import sentry_sdk
 
 from codeflash.cli_cmds.console import logger
+from codeflash.picklepatch.pickle_placeholder import PicklePlaceholderAccessError
 
 try:
     import numpy as np
@@ -64,7 +65,11 @@ def comparator(orig: Any, new: Any, superset_obj=False) -> bool:
             if len(orig) != len(new):
                 return False
             return all(comparator(elem1, elem2, superset_obj) for elem1, elem2 in zip(orig, new))
-
+        if isinstance(orig, PicklePlaceholderAccessError) or isinstance(new, PicklePlaceholderAccessError):
+            # If this error was raised, there was an attempt to access the PicklePlaceholder, which represents an unpickleable object.
+            # The test results should be rejected as the behavior of the unpickleable object is unknown.
+            logger.debug("Unable to verify behavior of unpickleable object in replay test")
+            return False
         if isinstance(
             orig,
             (
diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+import dill as pickle
 from junitparser.xunit2 import JUnitXml
 from lxml.etree import XMLParser, parse
 
@@ -20,7 +21,6 @@
 )
 from codeflash.discovery.discover_unit_tests import discover_parameters_unittest
 from codeflash.models.models import FunctionTestInvocation, InvocationId, TestResults, TestType, VerificationType
-from codeflash.picklepatch.pickle_patcher import PicklePatcher
 from codeflash.verification.coverage_utils import CoverageUtils
 
 if TYPE_CHECKING:
@@ -75,7 +75,7 @@ def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, tes
 
                 test_type = test_files.get_test_type_by_instrumented_file_path(test_file_path)
                 try:
-                    test_pickle = PicklePatcher.loads(test_pickle_bin) if loop_index == 1 else None
+                    test_pickle = pickle.loads(test_pickle_bin) if loop_index == 1 else None
                 except Exception as e:
                     if DEBUG_MODE:
                         logger.exception(f"Failed to load pickle file for {encoded_test_name} Exception: {e}")
@@ -133,7 +133,7 @@ def parse_sqlite_test_results(sqlite_file_path: Path, test_files: TestFiles, tes
                 # TODO : this is because sqlite writes original file module path. Should make it consistent
                 test_type = test_files.get_test_type_by_original_file_path(test_file_path)
             try:
-                ret_val = (PicklePatcher.loads(val[7]) if loop_index == 1 else None,)
+                ret_val = (pickle.loads(val[7]) if loop_index == 1 else None,)
             except Exception:
                 continue
             test_results.add(
diff --git a/tests/test_pickle_patcher.py b/tests/test_pickle_patcher.py
diff --git a/tests/test_trace_benchmarks.py b/tests/test_trace_benchmarks.py