feat(litellm_logging.py): support new litellm debug parameter - litellm_request_debug on requests

krrishdholakia · krrishdholakia · commit d89152bb2ccc · 2025-09-11T20:04:24.000-07:00
enables printing raw request when flag is set to true on requests
diff --git a/litellm/constants.py b/litellm/constants.py
@@ -15,7 +15,7 @@
     os.getenv("DEFAULT_SQS_FLUSH_INTERVAL_SECONDS", 10)
 )
 DEFAULT_NUM_WORKERS_LITELLM_PROXY = int(
-    os.getenv("DEFAULT_NUM_WORKERS_LITELLM_PROXY", os.cpu_count() or 4)
+    os.getenv("DEFAULT_NUM_WORKERS_LITELLM_PROXY", 1)
 )
 DEFAULT_SQS_BATCH_SIZE = int(os.getenv("DEFAULT_SQS_BATCH_SIZE", 512))
 SQS_SEND_MESSAGE_ACTION = "SendMessage"
@@ -60,7 +60,9 @@
     os.getenv("DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET_GEMINI_2_5_PRO", 128)
 )
 DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET_GEMINI_2_5_FLASH_LITE = int(
-    os.getenv("DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET_GEMINI_2_5_FLASH_LITE", 512)
+    os.getenv(
+        "DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET_GEMINI_2_5_FLASH_LITE", 512
+    )
 )
 
 # Generic fallback for unknown models
@@ -949,7 +951,9 @@
 DB_SPEND_UPDATE_JOB_NAME = "db_spend_update_job"
 PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME = "prometheus_emit_budget_metrics"
 CLOUDZERO_EXPORT_USAGE_DATA_JOB_NAME = "cloudzero_export_usage_data"
-CLOUDZERO_MAX_FETCHED_DATA_RECORDS = int(os.getenv("CLOUDZERO_MAX_FETCHED_DATA_RECORDS", 50000))
+CLOUDZERO_MAX_FETCHED_DATA_RECORDS = int(
+    os.getenv("CLOUDZERO_MAX_FETCHED_DATA_RECORDS", 50000)
+)
 SPEND_LOG_CLEANUP_JOB_NAME = "spend_log_cleanup"
 SPEND_LOG_RUN_LOOPS = int(os.getenv("SPEND_LOG_RUN_LOOPS", 500))
 SPEND_LOG_CLEANUP_BATCH_SIZE = int(os.getenv("SPEND_LOG_CLEANUP_BATCH_SIZE", 1000))
diff --git a/litellm/litellm_core_utils/get_litellm_params.py b/litellm/litellm_core_utils/get_litellm_params.py
@@ -62,6 +62,7 @@ def get_litellm_params(
     use_litellm_proxy: Optional[bool] = None,
     api_version: Optional[str] = None,
     max_retries: Optional[int] = None,
+    litellm_request_debug: Optional[bool] = None,
     **kwargs,
 ) -> dict:
     litellm_params = {
@@ -118,5 +119,6 @@ def get_litellm_params(
         "vertex_credentials": kwargs.get("vertex_credentials"),
         "vertex_project": kwargs.get("vertex_project"),
         "use_litellm_proxy": use_litellm_proxy,
+        "litellm_request_debug": litellm_request_debug,
     }
     return litellm_params
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
@@ -245,6 +245,7 @@ class Logging(LiteLLMLoggingBaseClass):
     global supabaseClient, promptLayerLogger, weightsBiasesLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
     custom_pricing: bool = False
     stream_options = None
+    litellm_request_debug: bool = False
 
     def __init__(
         self,
@@ -470,6 +471,7 @@ def update_environment_variables(
             **self.litellm_params,
             **scrub_sensitive_keys_in_metadata(litellm_params),
         }
+        self.litellm_request_debug = litellm_params.get("litellm_request_debug", False)
         self.logger_fn = litellm_params.get("logger_fn", None)
         verbose_logger.debug(f"self.optional_params: {self.optional_params}")
 
@@ -907,13 +909,19 @@ def _print_llm_call_debugging_log(
 
         Prints the RAW curl command sent from LiteLLM
         """
-        if _is_debugging_on():
+        if _is_debugging_on() or self.litellm_request_debug:
             if json_logs:
                 masked_headers = self._get_masked_headers(headers)
-                verbose_logger.debug(
-                    "POST Request Sent from LiteLLM",
-                    extra={"api_base": {api_base}, **masked_headers},
-                )
+                if self.litellm_request_debug:
+                    verbose_logger.warning(  # .warning ensures this shows up in all environments
+                        "POST Request Sent from LiteLLM",
+                        extra={"api_base": {api_base}, **masked_headers},
+                    )
+                else:
+                    verbose_logger.debug(
+                        "POST Request Sent from LiteLLM",
+                        extra={"api_base": {api_base}, **masked_headers},
+                    )
             else:
                 headers = additional_args.get("headers", {})
                 if headers is None:
@@ -926,7 +934,12 @@ def _print_llm_call_debugging_log(
                     additional_args=additional_args,
                     data=data,
                 )
-                verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
+                if self.litellm_request_debug:
+                    verbose_logger.warning(
+                        f"\033[92m{curl_command}\033[0m\n"
+                    )  # .warning ensures this shows up in all environments
+                else:
+                    verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
 
     def _get_request_body(self, data: dict) -> str:
         return str(data)
@@ -1714,12 +1727,16 @@ def success_handler(  # noqa: PLR0915
                             response_obj=result,
                             start_time=start_time,
                             end_time=end_time,
-                            litellm_call_id=current_call_id
-                            if (
-                                current_call_id := litellm_params.get("litellm_call_id")
-                            )
-                            is not None
-                            else str(uuid.uuid4()),
+                            litellm_call_id=(
+                                current_call_id
+                                if (
+                                    current_call_id := litellm_params.get(
+                                        "litellm_call_id"
+                                    )
+                                )
+                                is not None
+                                else str(uuid.uuid4())
+                            ),
                             print_verbose=print_verbose,
                         )
                     if callback == "wandb" and weightsBiasesLogger is not None:
@@ -3367,6 +3384,7 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
             return galileo_logger  # type: ignore
         elif logging_integration == "cloudzero":
             from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger
+
             for callback in _in_memory_loggers:
                 if isinstance(callback, CloudZeroLogger):
                     return callback  # type: ignore
@@ -3594,6 +3612,7 @@ def get_custom_logger_compatible_class(  # noqa: PLR0915
                     return callback
         elif logging_integration == "cloudzero":
             from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger
+
             for callback in _in_memory_loggers:
                 if isinstance(callback, CloudZeroLogger):
                     return callback
@@ -4504,7 +4523,7 @@ def get_standard_logging_object_payload(
 
 def emit_standard_logging_payload(payload: StandardLoggingPayload):
     if os.getenv("LITELLM_PRINT_STANDARD_LOGGING_PAYLOAD"):
-        print(json.dumps(payload, indent=4)) # noqa
+        print(json.dumps(payload, indent=4))  # noqa
 
 
 def get_standard_logging_metadata(
diff --git a/litellm/main.py b/litellm/main.py
@@ -150,9 +150,9 @@
 from .llms.custom_llm import CustomLLM, custom_chat_llm_router
 from .llms.databricks.embed.handler import DatabricksEmbeddingHandler
 from .llms.deprecated_providers import aleph_alpha, palm
+from .llms.gemini.common_utils import get_api_key_from_env
 from .llms.groq.chat.handler import GroqChatCompletion
 from .llms.heroku.chat.transformation import HerokuChatConfig
-from .llms.gemini.common_utils import get_api_key_from_env
 from .llms.huggingface.embedding.handler import HuggingFaceEmbedding
 from .llms.nlp_cloud.chat.handler import completion as nlp_cloud_chat_completion
 from .llms.oci.chat.transformation import OCIChatConfig
@@ -358,7 +358,9 @@ async def acompletion(
     logprobs: Optional[bool] = None,
     top_logprobs: Optional[int] = None,
     deployment_id=None,
-    reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "default"]] = None,
+    reasoning_effort: Optional[
+        Literal["none", "minimal", "low", "medium", "high", "default"]
+    ] = None,
     safety_identifier: Optional[str] = None,
     # set api_base, api_version, api_key
     base_url: Optional[str] = None,
@@ -504,7 +506,9 @@ async def acompletion(
     }
     if custom_llm_provider is None:
         _, custom_llm_provider, _, _ = get_llm_provider(
-            model=model, custom_llm_provider=custom_llm_provider, api_base=completion_kwargs.get("base_url", None)
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            api_base=completion_kwargs.get("base_url", None),
         )
 
     fallbacks = fallbacks or litellm.model_fallbacks
@@ -899,7 +903,9 @@ def completion(  # type: ignore # noqa: PLR0915
     logit_bias: Optional[dict] = None,
     user: Optional[str] = None,
     # openai v1.0+ new params
-    reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "default"]] = None,
+    reasoning_effort: Optional[
+        Literal["none", "minimal", "low", "medium", "high", "default"]
+    ] = None,
     response_format: Optional[Union[dict, Type[BaseModel]]] = None,
     seed: Optional[int] = None,
     tools: Optional[List] = None,
@@ -1116,10 +1122,12 @@ def completion(  # type: ignore # noqa: PLR0915
         )
 
         if provider_specific_header is not None:
-            headers.update(ProviderSpecificHeaderUtils.get_provider_specific_headers(
-                provider_specific_header=provider_specific_header,
-                custom_llm_provider=custom_llm_provider,
-            ))
+            headers.update(
+                ProviderSpecificHeaderUtils.get_provider_specific_headers(
+                    provider_specific_header=provider_specific_header,
+                    custom_llm_provider=custom_llm_provider,
+                )
+            )
 
         if model_response is not None and hasattr(model_response, "_hidden_params"):
             model_response._hidden_params["custom_llm_provider"] = custom_llm_provider
@@ -1325,6 +1333,7 @@ def completion(  # type: ignore # noqa: PLR0915
             azure_scope=kwargs.get("azure_scope"),
             max_retries=max_retries,
             timeout=timeout,
+            litellm_request_debug=kwargs.get("litellm_request_debug", False),
         )
         cast(LiteLLMLoggingObj, logging).update_environment_variables(
             model=model,
@@ -2712,9 +2721,7 @@ def completion(  # type: ignore # noqa: PLR0915
             )
 
             api_key = (
-                api_key
-                or litellm.api_key
-                or get_secret("VERCEL_AI_GATEWAY_API_KEY")
+                api_key or litellm.api_key or get_secret("VERCEL_AI_GATEWAY_API_KEY")
             )
 
             vercel_site_url = get_secret("VERCEL_SITE_URL") or "https://litellm.ai"
@@ -2730,7 +2737,7 @@ def completion(  # type: ignore # noqa: PLR0915
                 vercel_headers.update(_headers)
 
             headers = vercel_headers
-            
+
             ## Load Config
             config = litellm.VercelAIGatewayConfig.get_config()
             for k, v in config.items():
@@ -3712,7 +3719,9 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse:
         func_with_context = partial(ctx.run, func)
 
         _, custom_llm_provider, _, _ = get_llm_provider(
-            model=model, custom_llm_provider=custom_llm_provider, api_base=kwargs.get("api_base", None)
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            api_base=kwargs.get("api_base", None),
         )
 
         # Await normally
@@ -5780,7 +5789,14 @@ async def ahealth_check(
                 input=input or ["test"],
             ),
             "audio_speech": lambda: litellm.aspeech(
-                **{**_filter_model_params(model_params), **({"voice": "alloy"} if "voice" not in _filter_model_params(model_params) else {})},
+                **{
+                    **_filter_model_params(model_params),
+                    **(
+                        {"voice": "alloy"}
+                        if "voice" not in _filter_model_params(model_params)
+                        else {}
+                    ),
+                },
                 input=prompt or "test",
             ),
             "audio_transcription": lambda: litellm.atranscription(
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
@@ -1995,7 +1995,7 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False):
     ]
     guardrail_request: Optional[dict]
     guardrail_response: Optional[Union[dict, str, List[dict]]]
-    guardrail_status: Literal["success", "failure","blocked"]
+    guardrail_status: Literal["success", "failure", "blocked"]
     start_time: Optional[float]
     end_time: Optional[float]
     duration: Optional[float]
@@ -2123,6 +2123,7 @@ class StandardCallbackDynamicParams(TypedDict, total=False):
     "metadata",
     "litellm_metadata",
     "litellm_trace_id",
+    "litellm_request_debug",
     "guardrails",
     "tags",
     "acompletion",