chore(llmobs): refactor log/llmobs writer startup, configuration options (#8041)

Yun-Kim · web-flow · commit dfd7a0ab32a8 · 2024-01-09T20:28:56.000-05:00
This PR refactors some startup/configuration logic from the langchain/openai integrations to the base LLM Integration class, including: - Add helper properties to the Base LLM class to check if metrics/logs/llmobs features are enabled (currently just checks the integration config, but later we'll be adding enabled feature flags to the global config). - The `LogWriter` and `LLMObsWriter` instances are not created if the corresponding feature configuration is not enabled. This way we can avoid starting up separate threads unnecessarily as well as making the `log/llmobs_prompt_completion_sample_rate` config options being optional rather than required. - Move the DD_API_KEY/DD_APP_KEY checks from integrations to the LLM integration constructor method. No functionality has been changed by this PR. The unused placeholder config `llmobs_prompt_completion_sample_rate` has been removed from the Langchain integration until it will be later reintroduced when necessary. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [ ] Title is accurate. - [ ] No unnecessary changes are introduced. - [ ] Description motivates each change. - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [ ] Testing strategy adequately addresses listed risk(s). - [ ] Change is maintainable (easy to change, telemetry, documentation). - [ ] Release note makes sense to a user of the library. - [ ] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [ ] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [ ] This PR doesn't touch any of that.
diff --git a/ddtrace/contrib/_trace_utils_llm.py b/ddtrace/contrib/_trace_utils_llm.py
@@ -14,35 +14,89 @@
 from ddtrace.internal.hostname import get_hostname
 from ddtrace.internal.llmobs import LLMObsWriter
 from ddtrace.internal.log_writer import V2LogWriter
+from ddtrace.internal.utils.formats import asbool
 from ddtrace.sampler import RateSampler
 
 
 class BaseLLMIntegration:
     _integration_name = "baseLLM"
 
-    def __init__(self, config, stats_url, site, api_key, app_key=None):
+    def __init__(self, config, stats_url):
         # FIXME: this currently does not consider if the tracer is configured to
         # use a different hostname. eg. tracer.configure(host="new-hostname")
         # Ideally the metrics client should live on the tracer or some other core
         # object that is strongly linked with configuration.
-        self._statsd = get_dogstatsd_client(stats_url, namespace=self._integration_name)
+        self._log_writer = None
+        self._llmobs_writer = None
+        self._statsd = None
         self._config = config
-        self._log_writer = V2LogWriter(
-            site=site,
-            api_key=api_key,
-            interval=float(os.getenv("_DD_%s_LOG_WRITER_INTERVAL" % self._integration_name.upper(), "1.0")),
-            timeout=float(os.getenv("_DD_%s_LOG_WRITER_TIMEOUT" % self._integration_name.upper(), "2.0")),
-        )
-        self._llmobs_writer = LLMObsWriter(
-            site=site,
-            api_key=api_key,
-            app_key=app_key,
-            interval=float(os.getenv("_DD_%s_LLM_WRITER_INTERVAL" % self._integration_name.upper(), "1.0")),
-            timeout=float(os.getenv("_DD_%s_LLM_WRITER_TIMEOUT" % self._integration_name.upper(), "2.0")),
-        )
         self._span_pc_sampler = RateSampler(sample_rate=config.span_prompt_completion_sample_rate)
-        self._log_pc_sampler = RateSampler(sample_rate=config.log_prompt_completion_sample_rate)
-        self._llmobs_pc_sampler = RateSampler(sample_rate=config.llmobs_prompt_completion_sample_rate)
+
+        _dd_api_key = os.getenv("DD_API_KEY", config.get("_api_key"))
+        _dd_app_key = os.getenv("DD_APP_KEY", config.get("_app_key"))
+        _dd_site = os.getenv("DD_SITE", "datadoghq.com")
+
+        if self.metrics_enabled:
+            self._statsd = get_dogstatsd_client(stats_url, namespace=self._integration_name)
+        if self.logs_enabled:
+            if not _dd_api_key:
+                raise ValueError(
+                    f"DD_API_KEY is required for sending logs from the {self._integration_name} integration. "
+                    f"To use the {self._integration_name} integration without logs, "
+                    f"set `DD_{self._integration_name.upper()}_LOGS_ENABLED=false`."
+                )
+            self._log_writer = V2LogWriter(
+                site=_dd_site,
+                api_key=_dd_api_key,
+                interval=float(os.getenv("_DD_%s_LOG_WRITER_INTERVAL" % self._integration_name.upper(), "1.0")),
+                timeout=float(os.getenv("_DD_%s_LOG_WRITER_TIMEOUT" % self._integration_name.upper(), "2.0")),
+            )
+            self._log_pc_sampler = RateSampler(sample_rate=config.log_prompt_completion_sample_rate)
+            self.start_log_writer()
+
+        if self.llmobs_enabled:
+            if not _dd_api_key:
+                raise ValueError(
+                    f"DD_API_KEY is required for sending LLMObs data from the {self._integration_name} integration. "
+                    f"To use the {self._integration_name} integration without LLMObs, "
+                    f"set `DD_{self._integration_name.upper()}_LLMOBS_ENABLED=false`."
+                )
+            if not _dd_app_key:
+                raise ValueError(
+                    f"DD_APP_KEY is required for sending LLMObs payloads from the {self._integration_name} integration."
+                    f" To use the {self._integration_name} integration without LLMObs, "
+                    f"set `DD_{self._integration_name.upper()}_LLMOBS_ENABLED=false`."
+                )
+            self._llmobs_writer = LLMObsWriter(
+                site=_dd_site,
+                api_key=_dd_api_key,
+                app_key=_dd_app_key,
+                interval=float(os.getenv("_DD_%s_LLM_WRITER_INTERVAL" % self._integration_name.upper(), "1.0")),
+                timeout=float(os.getenv("_DD_%s_LLM_WRITER_TIMEOUT" % self._integration_name.upper(), "2.0")),
+            )
+            self._llmobs_pc_sampler = RateSampler(sample_rate=config.llmobs_prompt_completion_sample_rate)
+            self.start_llm_writer()
+
+    @property
+    def metrics_enabled(self) -> bool:
+        """Return whether submitting metrics is enabled for this integration, or global config if not set."""
+        if hasattr(self._config, "metrics_enabled"):
+            return asbool(self._config.metrics_enabled)
+        return False
+
+    @property
+    def logs_enabled(self) -> bool:
+        """Return whether submitting logs is enabled for this integration, or global config if not set."""
+        if hasattr(self._config, "logs_enabled"):
+            return asbool(self._config.logs_enabled)
+        return False
+
+    @property
+    def llmobs_enabled(self) -> bool:
+        """Return whether submitting llmobs payloads is enabled for this integration, or global config if not set."""
+        if hasattr(self._config, "llmobs_enabled"):
+            return asbool(self._config.llmobs_enabled)
+        return False
 
     def is_pc_sampled_span(self, span: Span) -> bool:
         if not span.sampled:
@@ -61,10 +115,13 @@ def is_pc_sampled_llmobs(self, span: Span) -> bool:
         return self._llmobs_pc_sampler.sample(span)
 
     def start_log_writer(self) -> None:
+        if not self._config.logs_enabled:
+            return
         self._log_writer.start()
 
-    def start_llm_writer(self):
-        # type: (...) -> None
+    def start_llm_writer(self) -> None:
+        if not self._config.llmobs_enabled:
+            return
         self._llmobs_writer.start()
 
     @abc.abstractmethod
@@ -96,7 +153,7 @@ def _logs_tags(cls, span):
 
     def log(self, span, level, msg, attrs):
         # type: (Span, str, str, Dict[str, Any]) -> None
-        if not self._config.logs_enabled:
+        if not self.logs_enabled:
             return
         tags = self._logs_tags(span)
         log = {
@@ -124,7 +181,7 @@ def _metrics_tags(cls, span):
     def metric(self, span, kind, name, val, tags=None):
         # type: (Span, str, str, Any, Optional[List[str]]) -> None
         """Set a metric using the context from the given span."""
-        if not self._config.metrics_enabled:
+        if not self.metrics_enabled:
             return
         metric_tags = self._metrics_tags(span)
         if tags:
@@ -154,7 +211,7 @@ def trunc(self, text):
     def llm_record(self, span, attrs):
         # type: (Span, Dict[str, Any]) -> None
         """Create a LLM record to send to the LLM Obs intake."""
-        if not self._config.llmobs_enabled:
+        if not self.llmobs_enabled:
             return
         llm_record = {}
         if span is not None:
diff --git a/ddtrace/contrib/langchain/patch.py b/ddtrace/contrib/langchain/patch.py
@@ -58,11 +58,6 @@ def get_version():
         "logs_enabled": asbool(os.getenv("DD_LANGCHAIN_LOGS_ENABLED", False)),
         "metrics_enabled": asbool(os.getenv("DD_LANGCHAIN_METRICS_ENABLED", True)),
         "span_prompt_completion_sample_rate": float(os.getenv("DD_LANGCHAIN_SPAN_PROMPT_COMPLETION_SAMPLE_RATE", 1.0)),
-        # FIXME: llmobs_prompt_completion_sample_rate does not currently work as the langchain integration doesn't
-        #  send LLMObs payloads. This is a placeholder for when we do.
-        "llmobs_prompt_completion_sample_rate": float(
-            os.getenv("DD_LANGCHAIN_LLMOBS_PROMPT_COMPLETION_SAMPLE_RATE", 1.0)
-        ),
         "log_prompt_completion_sample_rate": float(os.getenv("DD_LANGCHAIN_LOG_PROMPT_COMPLETION_SAMPLE_RATE", 0.1)),
         "span_char_limit": int(os.getenv("DD_LANGCHAIN_SPAN_CHAR_LIMIT", 128)),
         "_api_key": os.getenv("DD_API_KEY"),
@@ -73,9 +68,6 @@ def get_version():
 class _LangChainIntegration(BaseLLMIntegration):
     _integration_name = "langchain"
 
-    def __init__(self, config, stats_url, site, api_key):
-        super().__init__(config, stats_url, site, api_key)
-
     def _set_base_span_tags(self, span, interface_type="", provider=None, model=None, api_key=None):
         # type: (Span, str, Optional[str], Optional[str], Optional[str]) -> None
         """Set base level tags that should be present on all LangChain spans (if they are not None)."""
@@ -130,7 +122,7 @@ def _metrics_tags(cls, span):
 
     def record_usage(self, span, usage):
         # type: (Span, Dict[str, Any]) -> None
-        if not usage or self._config.metrics_enabled is False:
+        if not usage or self.metrics_enabled is False:
             return
         for token_type in ("prompt", "completion", "total"):
             num_tokens = usage.get("token_usage", {}).get(token_type + "_tokens")
@@ -748,28 +740,13 @@ def patch():
         return
     langchain._datadog_patch = True
 
-    #  TODO: How do we test this? Can we mock out the metric/logger/sampler?
-    ddsite = os.getenv("DD_SITE", "datadoghq.com")
-    ddapikey = os.getenv("DD_API_KEY", config.langchain._api_key)
-
     Pin().onto(langchain)
     integration = _LangChainIntegration(
         config=config.langchain,
         stats_url=get_stats_url(),
-        site=ddsite,
-        api_key=ddapikey,
     )
     langchain._datadog_integration = integration
 
-    if config.langchain.logs_enabled:
-        if not ddapikey:
-            raise ValueError(
-                "DD_API_KEY is required for sending logs from the LangChain integration."
-                " The LangChain integration can be disabled by setting the ``DD_TRACE_LANGCHAIN_ENABLED``"
-                " environment variable to False."
-            )
-        integration.start_log_writer()
-
     # Langchain doesn't allow wrapping directly from root, so we have to import the base classes first before wrapping.
     # ref: https://github.com/DataDog/dd-trace-py/issues/7123
     from langchain import embeddings  # noqa:F401
diff --git a/ddtrace/contrib/openai/patch.py b/ddtrace/contrib/openai/patch.py
@@ -154,12 +154,12 @@ def get_version():
 class _OpenAIIntegration(BaseLLMIntegration):
     _integration_name = "openai"
 
-    def __init__(self, config, openai, stats_url, site, api_key, app_key=None):
+    def __init__(self, config, openai, stats_url):
         # FIXME: this currently does not consider if the tracer is configured to
         # use a different hostname. eg. tracer.configure(host="new-hostname")
         # Ideally the metrics client should live on the tracer or some other core
         # object that is strongly linked with configuration.
-        super().__init__(config, stats_url, site, api_key, app_key=app_key)
+        super().__init__(config, stats_url)
         self._openai = openai
         self._user_api_key = None
         self._client = None
@@ -237,7 +237,7 @@ def _metrics_tags(cls, span):
         return tags
 
     def record_usage(self, span, usage):
-        if not usage or not self._config.metrics_enabled:
+        if not usage or not self.metrics_enabled:
             return
         tags = self._metrics_tags(span)
         tags.append("openai.estimated:false")
@@ -251,7 +251,7 @@ def record_usage(self, span, usage):
     def generate_completion_llm_records(self, resp, span, args, kwargs):
         # type: (Any, Span, List[Any], Dict[str, Any]) -> None
         """Generate payloads for the LLM Obs API from a completion."""
-        if not self._config.llmobs_enabled:
+        if not self.llmobs_enabled:
             return
         choices = resp.choices
         n = kwargs.get("n", 1)
@@ -284,7 +284,7 @@ def generate_completion_llm_records(self, resp, span, args, kwargs):
     def generate_chat_llm_records(self, resp, span, args, kwargs):
         # type: (Any, Span, List[Any], Dict[str, Any]) -> None
         """Generate payloads for the LLM Obs API from a chat completion."""
-        if not self._config.llmobs_enabled:
+        if not self.llmobs_enabled:
             return
         choices = resp.choices
         now = time.time()
@@ -332,40 +332,13 @@ def patch():
     if getattr(openai, "__datadog_patch", False):
         return
 
-    ddsite = os.getenv("DD_SITE", "datadoghq.com")
-    ddapikey = os.getenv("DD_API_KEY", config.openai._api_key)
-    ddappkey = os.getenv("DD_APP_KEY", config.openai._app_key)
-
     Pin().onto(openai)
     integration = _OpenAIIntegration(
         config=config.openai,
         openai=openai,
         stats_url=get_stats_url(),
-        site=ddsite,
-        api_key=ddapikey,
-        app_key=ddappkey,
     )
 
-    if config.openai.logs_enabled:
-        if not ddapikey:
-            raise ValueError(
-                "DD_API_KEY is required for sending logs from the OpenAI integration."
-                "To use the OpenAI integration without logs, set `DD_OPENAI_LOGS_ENABLED=false`."
-            )
-        integration.start_log_writer()
-    if config.openai.llmobs_enabled:
-        if not ddapikey:
-            raise ValueError(
-                "DD_API_KEY is required for sending LLMObs data from the OpenAI integration."
-                "To use the OpenAI integration without LLMObs, set `DD_OPENAI_LLMOBS_ENABLED=false`."
-            )
-        if not ddappkey:
-            raise ValueError(
-                "DD_APP_KEY is required for sending LLMObs payloads from the OpenAI integration."
-                "To use the OpenAI integration without LLMObs, set `DD_OPENAI_LLMOBS_ENABLED=false`."
-            )
-        integration.start_llm_writer()
-
     if OPENAI_VERSION >= (1, 0, 0):
         wrap(openai._base_client.BaseClient._process_response, _patched_convert(openai, integration))
         wrap(openai.OpenAI.__init__, _patched_client_init(openai, integration))