fix: reduced __inits__ overhead in 7% (#14689)

AlexsanderHamir · web-flow · commit 59409429d4af · 2025-09-18T17:18:05.000-07:00
* fix: avoid redundant __init__ calls on hot path

Previously, imports on the request hot path caused __init__ to run
excessively for every request. This change ensures initialization
happens once, reducing cpu overhead.

* fix: remove redundant __init__ import

The current implementation no longer requires an import at the top of the function.

* fix: placed on core utils for future reuse

* test: add coverage &amp; remove inline import

A general import-checking tool across all endpoints would be a large PR.
This commit focuses on a smaller, targeted fix for the discussed case.

* added import check to CI
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -1458,6 +1458,7 @@ jobs:
       # - run: python ./tests/documentation_tests/test_general_setting_keys.py
       - run: python ./tests/code_coverage_tests/check_licenses.py
       - run: python ./tests/code_coverage_tests/router_code_coverage.py
+      - run: python ./tests/code_coverage_tests/test_chat_completion_imports.py
       - run: python ./tests/code_coverage_tests/info_log_check.py
       - run: python ./tests/code_coverage_tests/test_ban_set_verbose.py
       - run: python ./tests/code_coverage_tests/code_qa_check_tests.py
diff --git a/litellm/litellm_core_utils/cached_imports.py b/litellm/litellm_core_utils/cached_imports.py
@@ -0,0 +1,56 @@
+"""
+Cached imports module for LiteLLM.
+
+This module provides cached import functionality to avoid repeated imports
+inside functions that are critical to performance.
+"""
+
+from typing import TYPE_CHECKING, Callable, Optional, Type
+
+# Type annotations for cached imports
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging
+    from litellm.litellm_core_utils.coroutine_checker import CoroutineChecker
+
+# Global cache variables
+_LiteLLMLogging: Optional[Type["Logging"]] = None
+_coroutine_checker: Optional["CoroutineChecker"] = None
+_set_callbacks: Optional[Callable] = None
+
+
+def get_litellm_logging_class() -> Type["Logging"]:
+    """Get the cached LiteLLM Logging class, initializing if needed."""
+    global _LiteLLMLogging
+    if _LiteLLMLogging is not None:
+        return _LiteLLMLogging
+    from litellm.litellm_core_utils.litellm_logging import Logging
+    _LiteLLMLogging = Logging
+    return _LiteLLMLogging
+
+
+def get_coroutine_checker() -> "CoroutineChecker":
+    """Get the cached coroutine checker instance, initializing if needed."""
+    global _coroutine_checker
+    if _coroutine_checker is not None:
+        return _coroutine_checker
+    from litellm.litellm_core_utils.coroutine_checker import coroutine_checker
+    _coroutine_checker = coroutine_checker
+    return _coroutine_checker
+
+
+def get_set_callbacks() -> Callable:
+    """Get the cached set_callbacks function, initializing if needed."""
+    global _set_callbacks
+    if _set_callbacks is not None:
+        return _set_callbacks
+    from litellm.litellm_core_utils.litellm_logging import set_callbacks
+    _set_callbacks = set_callbacks
+    return _set_callbacks
+
+
+def clear_cached_imports() -> None:
+    """Clear all cached imports. Useful for testing or memory management."""
+    global _LiteLLMLogging, _coroutine_checker, _set_callbacks
+    _LiteLLMLogging = None
+    _coroutine_checker = None
+    _set_callbacks = None
diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm/proxy/common_utils/http_parsing_utils.py
@@ -1,4 +1,5 @@
 import json
+import re
 from typing import Any, Dict, List, Optional
 
 import orjson
@@ -51,8 +52,6 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
                     body_str = body.decode("utf-8") if isinstance(body, bytes) else body
 
                     # Replace invalid surrogate pairs
-                    import re
-
                     # This regex finds incomplete surrogate pairs
                     body_str = re.sub(
                         r"[\uD800-\uDBFF](?![\uDC00-\uDFFF])", "", body_str
diff --git a/litellm/utils.py b/litellm/utils.py
@@ -59,6 +59,12 @@
 import litellm.litellm_core_utils.json_validation_rule
 import litellm.llms
 import litellm.llms.gemini
+# Import cached imports utilities
+from litellm.litellm_core_utils.cached_imports import (
+    get_coroutine_checker,
+    get_litellm_logging_class,
+    get_set_callbacks,
+)
 from litellm.caching._internal_lru_cache import lru_cache_wrapper
 from litellm.caching.caching import DualCache
 from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler
@@ -222,6 +228,7 @@
     get_args,
 )
 
+
 from openai import OpenAIError as OriginalError
 
 from litellm.litellm_core_utils.thread_pool_executor import executor
@@ -521,16 +528,12 @@ def get_dynamic_callbacks(
 
 
 
-from litellm.litellm_core_utils.coroutine_checker import coroutine_checker
 
 
 def function_setup(  # noqa: PLR0915
     original_function: str, rules_obj, start_time, *args, **kwargs
 ):  # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
     ### NOTICES ###
-    from litellm import Logging as LiteLLMLogging
-    from litellm.litellm_core_utils.litellm_logging import set_callbacks
-
     if litellm.set_verbose is True:
         verbose_logger.warning(
             "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs."
@@ -593,12 +596,12 @@ def function_setup(  # noqa: PLR0915
                     + litellm.failure_callback
                 )
             )
-            set_callbacks(callback_list=callback_list, function_id=function_id)
+            get_set_callbacks()(callback_list=callback_list, function_id=function_id)
         ## ASYNC CALLBACKS
         if len(litellm.input_callback) > 0:
             removed_async_items = []
             for index, callback in enumerate(litellm.input_callback):  # type: ignore
-                if coroutine_checker.is_async_callable(callback):
+                if get_coroutine_checker().is_async_callable(callback):
                     litellm._async_input_callback.append(callback)
                     removed_async_items.append(index)
 
@@ -608,7 +611,7 @@ def function_setup(  # noqa: PLR0915
         if len(litellm.success_callback) > 0:
             removed_async_items = []
             for index, callback in enumerate(litellm.success_callback):  # type: ignore
-                if coroutine_checker.is_async_callable(callback):
+                if get_coroutine_checker().is_async_callable(callback):
                     litellm.logging_callback_manager.add_litellm_async_success_callback(
                         callback
                     )
@@ -633,7 +636,7 @@ def function_setup(  # noqa: PLR0915
         if len(litellm.failure_callback) > 0:
             removed_async_items = []
             for index, callback in enumerate(litellm.failure_callback):  # type: ignore
-                if coroutine_checker.is_async_callable(callback):
+                if get_coroutine_checker().is_async_callable(callback):
                     litellm.logging_callback_manager.add_litellm_async_failure_callback(
                         callback
                     )
@@ -666,7 +669,7 @@ def function_setup(  # noqa: PLR0915
             removed_async_items = []
             for index, callback in enumerate(kwargs["success_callback"]):
                 if (
-                    coroutine_checker.is_async_callable(callback)
+                    get_coroutine_checker().is_async_callable(callback)
                     or callback == "dynamodb"
                     or callback == "s3"
                 ):
@@ -790,7 +793,7 @@ def function_setup(  # noqa: PLR0915
             call_type=call_type,
         ):
             stream = True
-        logging_obj = LiteLLMLogging(
+        logging_obj = get_litellm_logging_class()( # Victim for object pool
             model=model,  # type: ignore
             messages=messages,
             stream=stream,
@@ -903,7 +906,7 @@ def client(original_function):  # noqa: PLR0915
     rules_obj = Rules()
 
     def check_coroutine(value) -> bool:
-        return coroutine_checker.is_async_callable(value)
+        return get_coroutine_checker().is_async_callable(value)
 
     async def async_pre_call_deployment_hook(kwargs: Dict[str, Any], call_type: str):
         """
@@ -1597,7 +1600,7 @@ async def wrapper_async(*args, **kwargs):  # noqa: PLR0915
             setattr(e, "timeout", timeout)
             raise e
 
-    is_coroutine = coroutine_checker.is_async_callable(original_function)
+    is_coroutine = get_coroutine_checker().is_async_callable(original_function)
 
     # Return the appropriate wrapper based on the original function type
     if is_coroutine:
diff --git a/tests/code_coverage_tests/test_chat_completion_imports.py b/tests/code_coverage_tests/test_chat_completion_imports.py
@@ -0,0 +1,43 @@
+## Tests that chat_completion endpoint has no imports inside function bodies
+## This is critical for performance optimization in the hot path
+
+import ast
+from pathlib import Path
+
+
+def test_chat_completion_no_imports():
+    """Test that chat_completion endpoint has no imports in function bodies."""
+    # Path to the proxy server file
+    proxy_server_path = Path(__file__).parent.parent.parent / "litellm" / "proxy" / "proxy_server.py"
+    
+    with open(proxy_server_path, 'r') as f:
+        content = f.read()
+    
+    # Parse the AST
+    tree = ast.parse(content)
+    
+    # Find the chat_completion function
+    chat_completion_func = None
+    for node in ast.walk(tree):
+        if (isinstance(node, ast.AsyncFunctionDef) and node.name == "chat_completion"):
+            chat_completion_func = node
+            break
+    
+    assert chat_completion_func is not None, "chat_completion function not found"
+    
+    # Check for imports inside the function body
+    import_violations = []
+    
+    for node in ast.walk(chat_completion_func):
+        if isinstance(node, (ast.Import, ast.ImportFrom)):
+            # Get line number
+            line_num = node.lineno
+            import_violations.append(line_num)
+    
+    # Assert no import violations found
+    if import_violations:
+        print(f"Found {len(import_violations)} import violations in chat_completion endpoint:")
+        for line_num in import_violations:
+            print(f"  - Line {line_num}: Import statement found")
+        print("\nchat_completion endpoint should not contain imports for optimal performance.")
+        raise Exception("Import violations found in chat_completion endpoint")