feat(langsmith.py): add per request sampling_rate support

krrishdholakia · krrishdholakia · commit 83522016f23b · 2025-09-17T18:39:34.000-07:00
allows setting langsmith sampling rate per team/per key

Closes LIT-879
diff --git a/litellm/integrations/langsmith.py b/litellm/integrations/langsmith.py
@@ -78,26 +78,14 @@ def get_credentials_from_env(
         langsmith_base_url: Optional[str] = None,
     ) -> LangsmithCredentialsObject:
         _credentials_api_key = langsmith_api_key or os.getenv("LANGSMITH_API_KEY")
-        if _credentials_api_key is None:
-            raise Exception(
-                "Invalid Langsmith API Key given. _credentials_api_key=None."
-            )
         _credentials_project = (
             langsmith_project or os.getenv("LANGSMITH_PROJECT") or "litellm-completion"
         )
-        if _credentials_project is None:
-            raise Exception(
-                "Invalid Langsmith API Key given. _credentials_project=None."
-            )
         _credentials_base_url = (
             langsmith_base_url
             or os.getenv("LANGSMITH_BASE_URL")
             or "https://api.smith.langchain.com"
         )
-        if _credentials_base_url is None:
-            raise Exception(
-                "Invalid Langsmith API Key given. _credentials_base_url=None."
-            )
 
         return LangsmithCredentialsObject(
             LANGSMITH_API_KEY=_credentials_api_key,
@@ -202,12 +190,7 @@ def _prepare_log_data(
 
     def log_success_event(self, kwargs, response_obj, start_time, end_time):
         try:
-            sampling_rate = (
-                float(os.getenv("LANGSMITH_SAMPLING_RATE"))  # type: ignore
-                if os.getenv("LANGSMITH_SAMPLING_RATE") is not None
-                and os.getenv("LANGSMITH_SAMPLING_RATE").strip().isdigit()  # type: ignore
-                else 1.0
-            )
+            sampling_rate = self._get_sampling_rate_to_use_for_request(kwargs=kwargs)
             random_sample = random.random()
             if random_sample > sampling_rate:
                 verbose_logger.info(
@@ -221,6 +204,7 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time):
                 kwargs,
                 response_obj,
             )
+
             credentials = self._get_credentials_to_use_for_request(kwargs=kwargs)
             data = self._prepare_log_data(
                 kwargs=kwargs,
@@ -247,7 +231,7 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time):
 
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
         try:
-            sampling_rate = self.sampling_rate
+            sampling_rate = self._get_sampling_rate_to_use_for_request(kwargs=kwargs)
             random_sample = random.random()
             if random_sample > sampling_rate:
                 verbose_logger.info(
@@ -288,7 +272,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
             )
 
     async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
-        sampling_rate = self.sampling_rate
+        sampling_rate = self._get_sampling_rate_to_use_for_request(kwargs=kwargs)
         random_sample = random.random()
         if random_sample > sampling_rate:
             verbose_logger.info(
@@ -419,6 +403,17 @@ def _group_batches_by_credentials(self) -> Dict[CredentialsKey, BatchGroup]:
 
         for queue_object in self.log_queue:
             credentials = queue_object["credentials"]
+            # if credential missing, skip - log warning
+            if (
+                credentials["LANGSMITH_API_KEY"] is None
+                or credentials["LANGSMITH_PROJECT"] is None
+            ):
+                verbose_logger.warning(
+                    "Langsmith Logging - credentials missing - api_key: %s, project: %s",
+                    credentials["LANGSMITH_API_KEY"],
+                    credentials["LANGSMITH_PROJECT"],
+                )
+                continue
             key = CredentialsKey(
                 api_key=credentials["LANGSMITH_API_KEY"],
                 project=credentials["LANGSMITH_PROJECT"],
@@ -434,6 +429,19 @@ def _group_batches_by_credentials(self) -> Dict[CredentialsKey, BatchGroup]:
 
         return log_queue_by_credentials
 
+    def _get_sampling_rate_to_use_for_request(self, kwargs: Dict[str, Any]) -> float:
+        standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
+            kwargs.get("standard_callback_dynamic_params", None)
+        )
+        sampling_rate: float = self.sampling_rate
+        if standard_callback_dynamic_params is not None:
+            _sampling_rate = standard_callback_dynamic_params.get(
+                "langsmith_sampling_rate"
+            )
+            if _sampling_rate is not None:
+                sampling_rate = float(_sampling_rate)
+        return sampling_rate
+
     def _get_credentials_to_use_for_request(
         self, kwargs: Dict[str, Any]
     ) -> LangsmithCredentialsObject:
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
@@ -16,17 +16,3 @@ model_list:
       api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
       api_key: dummy
 
-
-
-
-guardrails:
-  - guardrail_name: "intel-bedrock-guard-cfg"
-    litellm_params:
-      guardrail: bedrock
-      mode: [pre_call, post_call]
-      guardrailIdentifier: "1234"
-      guardrailVersion: "1"
-      aws_access_key_id: "os.environ/AWS_ACCESS_KEY_ID"
-      aws_secret_access_key: "os.environ/AWS_SECRET_ACCESS_KEY"
-      aws_bedrock_runtime_endpoint: "os.environ/AWS_BEDROCK_RUNTIME_ENDPOINT"
-      default_on: true
diff --git a/litellm/router.py b/litellm/router.py
@@ -359,9 +359,9 @@ def __init__(  # noqa: PLR0915
         )  # names of models under litellm_params. ex. azure/chatgpt-v-2
         self.deployment_latency_map = {}
         ### CACHING ###
-        cache_type: Literal[
-            "local", "redis", "redis-semantic", "s3", "disk"
-        ] = "local"  # default to an in-memory cache
+        cache_type: Literal["local", "redis", "redis-semantic", "s3", "disk"] = (
+            "local"  # default to an in-memory cache
+        )
         redis_cache = None
         cache_config: Dict[str, Any] = {}
 
@@ -403,9 +403,9 @@ def __init__(  # noqa: PLR0915
         self.default_max_parallel_requests = default_max_parallel_requests
         self.provider_default_deployment_ids: List[str] = []
         self.pattern_router = PatternMatchRouter()
-        self.team_pattern_routers: Dict[
-            str, PatternMatchRouter
-        ] = {}  # {"TEAM_ID": PatternMatchRouter}
+        self.team_pattern_routers: Dict[str, PatternMatchRouter] = (
+            {}
+        )  # {"TEAM_ID": PatternMatchRouter}
         self.auto_routers: Dict[str, "AutoRouter"] = {}
 
         if model_list is not None:
@@ -587,9 +587,9 @@ def __init__(  # noqa: PLR0915
                 )
             )
 
-        self.model_group_retry_policy: Optional[
-            Dict[str, RetryPolicy]
-        ] = model_group_retry_policy
+        self.model_group_retry_policy: Optional[Dict[str, RetryPolicy]] = (
+            model_group_retry_policy
+        )
 
         self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None
         if allowed_fails_policy is not None:
@@ -1211,7 +1211,10 @@ async def stream_with_fallbacks():
 
     async def _acompletion(
         self, model: str, messages: List[Dict[str, str]], **kwargs
-    ) -> Union[ModelResponse, CustomStreamWrapper,]:
+    ) -> Union[
+        ModelResponse,
+        CustomStreamWrapper,
+    ]:
         """
         - Get an available deployment
         - call it with a semaphore over the call
@@ -3155,9 +3158,9 @@ async def create_file_for_deployment(deployment: dict) -> OpenAIFileObject:
                 healthy_deployments=healthy_deployments, responses=responses
             )
             returned_response = cast(OpenAIFileObject, responses[0])
-            returned_response._hidden_params[
-                "model_file_id_mapping"
-            ] = model_file_id_mapping
+            returned_response._hidden_params["model_file_id_mapping"] = (
+                model_file_id_mapping
+            )
             return returned_response
         except Exception as e:
             verbose_router_logger.exception(
@@ -3720,11 +3723,11 @@ async def async_function_with_fallbacks_common_utils(  # noqa: PLR0915
 
             if isinstance(e, litellm.ContextWindowExceededError):
                 if context_window_fallbacks is not None:
-                    context_window_fallback_model_group: Optional[
-                        List[str]
-                    ] = self._get_fallback_model_group_from_fallbacks(
-                        fallbacks=context_window_fallbacks,
-                        model_group=model_group,
+                    context_window_fallback_model_group: Optional[List[str]] = (
+                        self._get_fallback_model_group_from_fallbacks(
+                            fallbacks=context_window_fallbacks,
+                            model_group=model_group,
+                        )
                     )
                     if context_window_fallback_model_group is None:
                         raise original_exception
@@ -3756,11 +3759,11 @@ async def async_function_with_fallbacks_common_utils(  # noqa: PLR0915
                     e.message += "\n{}".format(error_message)
             elif isinstance(e, litellm.ContentPolicyViolationError):
                 if content_policy_fallbacks is not None:
-                    content_policy_fallback_model_group: Optional[
-                        List[str]
-                    ] = self._get_fallback_model_group_from_fallbacks(
-                        fallbacks=content_policy_fallbacks,
-                        model_group=model_group,
+                    content_policy_fallback_model_group: Optional[List[str]] = (
+                        self._get_fallback_model_group_from_fallbacks(
+                            fallbacks=content_policy_fallbacks,
+                            model_group=model_group,
+                        )
                     )
                     if content_policy_fallback_model_group is None:
                         raise original_exception
@@ -4414,7 +4417,7 @@ async def deployment_callback_on_success(
                 return tpm_key
 
         except Exception as e:
-            verbose_router_logger.exception(
+            verbose_router_logger.debug(
                 "litellm.router.Router::deployment_callback_on_success(): Exception occured - {}".format(
                     str(e)
                 )
@@ -4992,26 +4995,26 @@ def init_auto_router_deployment(self, deployment: Deployment):
         """
         from litellm.router_strategy.auto_router.auto_router import AutoRouter
 
-        auto_router_config_path: Optional[
-            str
-        ] = deployment.litellm_params.auto_router_config_path
+        auto_router_config_path: Optional[str] = (
+            deployment.litellm_params.auto_router_config_path
+        )
         auto_router_config: Optional[str] = deployment.litellm_params.auto_router_config
         if auto_router_config_path is None and auto_router_config is None:
             raise ValueError(
                 "auto_router_config_path or auto_router_config is required for auto-router deployments. Please set it in the litellm_params"
             )
 
-        default_model: Optional[
-            str
-        ] = deployment.litellm_params.auto_router_default_model
+        default_model: Optional[str] = (
+            deployment.litellm_params.auto_router_default_model
+        )
         if default_model is None:
             raise ValueError(
                 "auto_router_default_model is required for auto-router deployments. Please set it in the litellm_params"
             )
 
-        embedding_model: Optional[
-            str
-        ] = deployment.litellm_params.auto_router_embedding_model
+        embedding_model: Optional[str] = (
+            deployment.litellm_params.auto_router_embedding_model
+        )
         if embedding_model is None:
             raise ValueError(
                 "auto_router_embedding_model is required for auto-router deployments. Please set it in the litellm_params"
diff --git a/litellm/types/integrations/langsmith.py b/litellm/types/integrations/langsmith.py
@@ -28,8 +28,8 @@ class LangsmithInputs(BaseModel):
 
 
 class LangsmithCredentialsObject(TypedDict):
-    LANGSMITH_API_KEY: str
-    LANGSMITH_PROJECT: str
+    LANGSMITH_API_KEY: Optional[str]
+    LANGSMITH_PROJECT: Optional[str]
     LANGSMITH_BASE_URL: str