BerriAI
diff --git a/‎docs/my-website/docs/proxy/debugging.md‎
Lines changed: 53 additions & 15 deletions b/‎docs/my-website/docs/proxy/debugging.md‎
Lines changed: 53 additions & 15 deletions
diff --git a/‎litellm/litellm_core_utils/get_litellm_params.py‎
Lines changed: 2 additions & 0 deletions b/‎litellm/litellm_core_utils/get_litellm_params.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎litellm/litellm_core_utils/litellm_logging.py‎
Lines changed: 41 additions & 15 deletions b/‎litellm/litellm_core_utils/litellm_logging.py‎
Lines changed: 41 additions & 15 deletions
@@ -11,65 +11,103 @@ The proxy also supports json logs. [See here](#json-logs)
 
 **via cli**
 
-```bash
+```bash showLineNumbers
 $ litellm --debug
 ```
 
 **via env**
 
-```python
+```python showLineNumbers
 os.environ["LITELLM_LOG"] = "INFO"
 ```
 
 ## `detailed debug`
 
 **via cli**
 
-```bash
+```bash showLineNumbers
 $ litellm --detailed_debug
 ```
 
 **via env**
 
-```python
+```python showLineNumbers
 os.environ["LITELLM_LOG"] = "DEBUG"
 ```
 
 ### Debug Logs 
 
 Run the proxy with `--detailed_debug` to view detailed debug logs
-```shell
+```shell showLineNumbers
 litellm --config /path/to/config.yaml --detailed_debug
 ```
 
 When making requests you should see the POST request sent by LiteLLM to the LLM on the Terminal output
-```shell
+```shell showLineNumbers
 POST Request Sent from LiteLLM:
 curl -X POST \
 https://api.openai.com/v1/chat/completions \
 -H 'content-type: application/json' -H 'Authorization: Bearer sk-qnWGUIW9****************************************' \
 -d '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "this is a test request, write a short poem"}]}'
 ```
 
+## Debug single request
+
+Pass in `litellm_request_debug=True` in the request body
+
+```bash showLineNumbers
+curl -L -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{ 
+    "model":"fake-openai-endpoint",
+    "messages": [{"role": "user","content": "How many r in the word strawberry?"}],
+    "litellm_request_debug": true
+}'
+```
+
+This will emit the raw request sent by LiteLLM to the API Provider and raw response received from the API Provider for **just** this request in the logs. 
+
+
+```bash showLineNumbers
+INFO:     Uvicorn running on http://0.0.0.0:4000 (Press CTRL+C to quit)
+20:14:06 - LiteLLM:WARNING: litellm_logging.py:938 - 
+
+POST Request Sent from LiteLLM:
+curl -X POST \
+https://exampleopenaiendpoint-production.up.railway.app/chat/completions \
+-H 'Authorization: Be****ey' -H 'Content-Type: application/json' \
+-d '{'model': 'fake', 'messages': [{'role': 'user', 'content': 'How many r in the word strawberry?'}], 'stream': False}'
+
+
+20:14:06 - LiteLLM:WARNING: litellm_logging.py:1015 - RAW RESPONSE:
+{"id":"chatcmpl-817fc08f0d6c451485d571dab39b26a1","object":"chat.completion","created":1677652288,"model":"gpt-3.5-turbo-0301","system_fingerprint":"fp_44709d6fcb","choices":[{"index":0,"message":{"role":"assistant","content":"\n\nHello there, how may I assist you today?"},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":9,"completion_tokens":12,"total_tokens":21}}
+
+
+INFO:     127.0.0.1:56155 - "POST /chat/completions HTTP/1.1" 200 OK
+
+```
+
+
 ## JSON LOGS
 
 Set `JSON_LOGS="True"` in your env:
 
-```bash
+```bash showLineNumbers
 export JSON_LOGS="True"
 ```
 **OR**
 
 Set `json_logs: true` in your yaml: 
 
-```yaml
+```yaml showLineNumbers
 litellm_settings:
     json_logs: true
 ```
 
 Start proxy 
 
-```bash
+```bash showLineNumbers
 $ litellm
 ```
 
@@ -80,7 +118,7 @@ The proxy will now all logs in json format.
 Turn off fastapi's default 'INFO' logs 
 
 1. Turn on 'json logs' 
-```yaml
+```yaml showLineNumbers
 litellm_settings:
     json_logs: true
 ```
@@ -89,20 +127,20 @@ litellm_settings:
 
 Only get logs if an error occurs. 
 
-```bash
+```bash showLineNumbers
 LITELLM_LOG="ERROR"
 ```
 
 3. Start proxy 
 
 
-```bash
+```bash showLineNumbers
 $ litellm
 ```
 
 Expected Output: 
 
-```bash
+```bash showLineNumbers
 # no info statements
 ```
 
@@ -119,14 +157,14 @@ This can be caused due to all your models hitting rate limit errors, causing the
 How to control this? 
 - Adjust the cooldown time
 
-```yaml
+```yaml showLineNumbers
 router_settings:
     cooldown_time: 0 # 👈 KEY CHANGE
 ```
 
 - Disable Cooldowns [NOT RECOMMENDED]
 
-```yaml
+```yaml showLineNumbers
 router_settings:
     disable_cooldowns: True
 ```
 
@@ -62,6 +62,7 @@ def get_litellm_params(
     use_litellm_proxy: Optional[bool] = None,
     api_version: Optional[str] = None,
     max_retries: Optional[int] = None,
+    litellm_request_debug: Optional[bool] = None,
     **kwargs,
 ) -> dict:
     litellm_params = {
@@ -118,5 +119,6 @@ def get_litellm_params(
         "vertex_credentials": kwargs.get("vertex_credentials"),
         "vertex_project": kwargs.get("vertex_project"),
         "use_litellm_proxy": use_litellm_proxy,
+        "litellm_request_debug": litellm_request_debug,
     }
     return litellm_params
@@ -245,6 +245,7 @@ class Logging(LiteLLMLoggingBaseClass):
     global supabaseClient, promptLayerLogger, weightsBiasesLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
     custom_pricing: bool = False
     stream_options = None
+    litellm_request_debug: bool = False
 
     def __init__(
         self,
@@ -470,6 +471,7 @@ def update_environment_variables(
             **self.litellm_params,
             **scrub_sensitive_keys_in_metadata(litellm_params),
         }
+        self.litellm_request_debug = litellm_params.get("litellm_request_debug", False)
         self.logger_fn = litellm_params.get("logger_fn", None)
         verbose_logger.debug(f"self.optional_params: {self.optional_params}")
 
@@ -907,13 +909,19 @@ def _print_llm_call_debugging_log(
 
         Prints the RAW curl command sent from LiteLLM
         """
-        if _is_debugging_on():
+        if _is_debugging_on() or self.litellm_request_debug:
             if json_logs:
                 masked_headers = self._get_masked_headers(headers)
-                verbose_logger.debug(
-                    "POST Request Sent from LiteLLM",
-                    extra={"api_base": {api_base}, **masked_headers},
-                )
+                if self.litellm_request_debug:
+                    verbose_logger.warning(  # .warning ensures this shows up in all environments
+                        "POST Request Sent from LiteLLM",
+                        extra={"api_base": {api_base}, **masked_headers},
+                    )
+                else:
+                    verbose_logger.debug(
+                        "POST Request Sent from LiteLLM",
+                        extra={"api_base": {api_base}, **masked_headers},
+                    )
             else:
                 headers = additional_args.get("headers", {})
                 if headers is None:
@@ -926,7 +934,12 @@ def _print_llm_call_debugging_log(
                     additional_args=additional_args,
                     data=data,
                 )
-                verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
+                if self.litellm_request_debug:
+                    verbose_logger.warning(
+                        f"\033[92m{curl_command}\033[0m\n"
+                    )  # .warning ensures this shows up in all environments
+                else:
+                    verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
 
     def _get_request_body(self, data: dict) -> str:
         return str(data)
@@ -983,16 +996,23 @@ def post_call(
             self.model_call_details["additional_args"] = additional_args
             self.model_call_details["log_event_type"] = "post_api_call"
 
+            if self.litellm_request_debug:
+                attr = "warning"
+            else:
+                attr = "debug"
+
             if json_logs:
-                verbose_logger.debug(
+                callattr = getattr(verbose_logger, attr)
+                callattr(
                     "RAW RESPONSE:\n{}\n\n".format(
                         self.model_call_details.get(
                             "original_response", self.model_call_details
                         )
                     ),
                 )
             else:
-                print_verbose(
+                callattr = getattr(verbose_logger, attr)
+                callattr(
                     "RAW RESPONSE:\n{}\n\n".format(
                         self.model_call_details.get(
                             "original_response", self.model_call_details
@@ -1714,12 +1734,16 @@ def success_handler(  # noqa: PLR0915
                             response_obj=result,
                             start_time=start_time,
                             end_time=end_time,
-                            litellm_call_id=current_call_id
-                            if (
-                                current_call_id := litellm_params.get("litellm_call_id")
-                            )
-                            is not None
-                            else str(uuid.uuid4()),
+                            litellm_call_id=(
+                                current_call_id
+                                if (
+                                    current_call_id := litellm_params.get(
+                                        "litellm_call_id"
+                                    )
+                                )
+                                is not None
+                                else str(uuid.uuid4())
+                            ),
                             print_verbose=print_verbose,
                         )
                     if callback == "wandb" and weightsBiasesLogger is not None:
@@ -3367,6 +3391,7 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
             return galileo_logger  # type: ignore
         elif logging_integration == "cloudzero":
             from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger
+
             for callback in _in_memory_loggers:
                 if isinstance(callback, CloudZeroLogger):
                     return callback  # type: ignore
@@ -3594,6 +3619,7 @@ def get_custom_logger_compatible_class(  # noqa: PLR0915
                     return callback
         elif logging_integration == "cloudzero":
             from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger
+
             for callback in _in_memory_loggers:
                 if isinstance(callback, CloudZeroLogger):
                     return callback
@@ -4504,7 +4530,7 @@ def get_standard_logging_object_payload(
 
 def emit_standard_logging_payload(payload: StandardLoggingPayload):
     if os.getenv("LITELLM_PRINT_STANDARD_LOGGING_PAYLOAD"):
-        print(json.dumps(payload, indent=4)) # noqa
+        print(json.dumps(payload, indent=4))  # noqa
 
 
 def get_standard_logging_metadata(