inclusionAI
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎areal/api/cli_args.py‎
Lines changed: 46 additions & 2 deletions b/‎areal/api/cli_args.py‎
Lines changed: 46 additions & 2 deletions
diff --git a/‎areal/experimental/openai/client.py‎
Lines changed: 20 additions & 4 deletions b/‎areal/experimental/openai/client.py‎
Lines changed: 20 additions & 4 deletions
@@ -3,6 +3,10 @@
 .data/
 .idea/
 
+
+# Ruff
+.ruff_cache/
+
 # Mac
 .DS_Store
 
 
@@ -1,7 +1,8 @@
 import argparse
 import json
 import os
-from dataclasses import asdict, dataclass, field
+from dataclasses import MISSING as dataclass_missing
+from dataclasses import asdict, dataclass, field, fields
 from pathlib import Path
 from typing import Any
 
@@ -13,11 +14,13 @@
 from omegaconf import MISSING, DictConfig, OmegaConf
 
 from areal.platforms import current_platform
-from areal.utils import name_resolve, pkg_version
+from areal.utils import logging, name_resolve, pkg_version
 from areal.utils.pkg_version import is_version_less
 
 uvloop.install()
 
+logger = logging.getLogger("CLI args")
+
 
 @dataclass
 class NormConfig:
@@ -160,6 +163,47 @@ def new(self, **kwargs):
         args.update(kwargs)
         return GenerationHyperparameters(**args)
 
+    def to_openai_args_dict(
+        self, exclude_args: list[str] | None = None
+    ) -> dict[str, Any]:
+        """Convert the generation hyperparameters to a dictionary of arguments for OpenAI client."""
+        final_exclude_args = set(exclude_args) if exclude_args is not None else set()
+        final_exclude_args.update(
+            {
+                "min_new_tokens",  # Not supported by OpenAI
+                "greedy",  # Not directly supported by OpenAI
+                "top_k",  # Not supported by OpenAI
+                "stop_token_ids",  # Not supported by OpenAI
+            }
+        )
+
+        mapping = {
+            "n_samples": "n",
+            "max_new_tokens": "max_completion_tokens",
+        }
+        res = {}
+        for k, v in asdict(self).items():
+            if k in final_exclude_args:
+                should_warn = False
+
+                current_value = getattr(self, k)
+                f = next(_field for _field in fields(self) if _field.name == k)
+
+                # Check if equal to the default value
+                if f.default is not dataclass_missing:
+                    if current_value != f.default:
+                        should_warn = True
+                elif f.default_factory is not dataclass_missing:
+                    if current_value != f.default_factory():
+                        should_warn = True
+
+                if should_warn:
+                    logger.warning(f"Unsupported arg for openai format: `{k}`")
+                continue
+            res[mapping.get(k, k)] = v
+
+        return res
+
 
 # Train Engine Configs
 
 
@@ -49,8 +49,8 @@
     from areal.api.engine_api import InferenceEngine
 
 # reset OpenAI keys when using the wrapped client.
-os.environ["OPENAI_API_KEY"] = "none"
-os.environ["OPENAI_BASE_URL"] = "none"
+os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY", "none")
+os.environ["OPENAI_BASE_URL"] = os.environ.get("OPENAI_BASE_URL", "none")
 
 logger = logging.getLogger("AReaLOpenAI Client")
 
@@ -97,6 +97,7 @@ async def create(
         tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
         top_p: float | None | NotGiven = NOT_GIVEN,
         extra_body: Body | None = None,
+        areal_completion_cache: dict[str, InteractionWithTokenLogpReward] | None = None,
         **kwargs: Any,
     ) -> ChatCompletion:
         """Override create method to use AReaL engine and cache responses."""
@@ -218,7 +219,15 @@ async def create(
 
         if is_omitted(store) or store:
             # Cache the completion with its input messages
-            self._cache[completion_id] = InteractionWithTokenLogpReward(
+            cache = (
+                areal_completion_cache
+                if areal_completion_cache is not None
+                else self._cache
+            )
+            if completion_id in cache:
+                raise ValueError(f"Completion {completion_id} already exists in cache")
+
+            cache[completion_id] = InteractionWithTokenLogpReward(
                 completion=deepcopy(chat_completion),
                 model_response=response,  # Should not deepcopy response because of tokenizer
                 messages=deepcopy(messages_list),  # Store a copy of the input messages
@@ -262,6 +271,7 @@ async def create(
         temperature: float | None | NotGiven = NOT_GIVEN,
         top_p: float | None | NotGiven = NOT_GIVEN,
         extra_body: Body | None = None,
+        areal_response_cache: dict[str, InteractionWithTokenLogpReward] | None = None,
         **kwargs: Any,
     ) -> Response:
         """Override create method to use AReaL engine"""
@@ -490,7 +500,13 @@ def _build_messages_list(item: ResponseInputItemParam) -> list[dict]:
         )
 
         # Cache the response with its input data
-        self._cache[resp_id] = InteractionWithTokenLogpReward(
+        cache = (
+            areal_response_cache if areal_response_cache is not None else self._cache
+        )
+        if resp_id in cache:
+            raise ValueError(f"Response {resp_id} already exists in cache")
+
+        cache[resp_id] = InteractionWithTokenLogpReward(
             response=deepcopy(response),
             model_response=engine_resp,  # Should not deepcopy because of tokenizer
             input_data=(