diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py
new file mode 100644
index 00000000..34d8f2f6
--- /dev/null
+++ b/mellea/backends/litellm.py
@@ -0,0 +1,354 @@
+"""A generic LiteLLM compatible backend that wraps around the openai python sdk."""
+
+import datetime
+import json
+from collections.abc import Callable
+from typing import Any
+
+import litellm
+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.get_supported_openai_params
+
+import mellea.backends.model_ids as model_ids
+from mellea.backends import BaseModelSubclass
+from mellea.backends.formatter import Formatter, FormatterBackend, TemplateFormatter
+from mellea.backends.tools import (
+    add_tools_from_context_actions,
+    add_tools_from_model_options,
+    convert_tools_to_json,
+)
+from mellea.backends.types import ModelOption
+from mellea.helpers.fancy_logger import FancyLogger
+from mellea.stdlib.base import (
+    CBlock,
+    Component,
+    Context,
+    GenerateLog,
+    ModelOutputThunk,
+    ModelToolCall,
+)
+from mellea.stdlib.chat import Message
+from mellea.stdlib.requirement import ALoraRequirement
+
+
+class LiteLLMBackend(FormatterBackend):
+    """A generic LiteLLM compatible backend."""
+
+    def __init__(
+        self,
+        model_id: str = "ollama/" + str(model_ids.IBM_GRANITE_3_3_8B.ollama_name),
+        formatter: Formatter | None = None,
+        base_url: str | None = "http://localhost:11434",
+        model_options: dict | None = None,
+    ):
+        """Initialize and OpenAI compatible backend. For any additional kwargs that you need to pass the the client, pass them as a part of **kwargs.
+
+        Args:
+            model_id : The LiteLLM model identifier. Make sure that all necessary credentials are in OS environment variables.
+            formatter: A custom formatter based on backend.If None, defaults to TemplateFormatter
+            base_url : Base url for LLM API. Defaults to None.
+            model_options : Generation options to pass to the LLM. Defaults to None.
+        """
+        super().__init__(
+            model_id=model_id,
+            formatter=(
+                formatter
+                if formatter is not None
+                else TemplateFormatter(model_id=model_id)
+            ),
+            model_options=model_options,
+        )
+
+        assert isinstance(model_id, str), "Model ID must be a string."
+        self._model_id = model_id
+
+        if base_url is None:
+            self._base_url = "http://localhost:11434/v1"  # ollama
+        else:
+            self._base_url = base_url
+
+        # A mapping of common options for this backend mapped to their Mellea ModelOptions equivalent.
+        # These are usually values that must be extracted before hand or that are common among backend providers.
+        # OpenAI has some deprecated parameters. Those map to the same mellea parameter, but
+        # users should only be specifying a single one in their request.
+        self.to_mellea_model_opts_map = {
+            "system": ModelOption.SYSTEM_PROMPT,
+            "reasoning_effort": ModelOption.THINKING,  # TODO: JAL; see which of these are actually extracted...
+            "seed": ModelOption.SEED,
+            "max_completion_tokens": ModelOption.MAX_NEW_TOKENS,
+            "max_tokens": ModelOption.MAX_NEW_TOKENS,
+            "tools": ModelOption.TOOLS,
+            "functions": ModelOption.TOOLS,
+        }
+
+        # A mapping of Mellea specific ModelOptions to the specific names for this backend.
+        # These options should almost always be a subset of those specified in the `to_mellea_model_opts_map`.
+        # Usually, values that are intentionally extracted while prepping for the backend generate call
+        # will be omitted here so that they will be removed when model_options are processed
+        # for the call to the model.
+        self.from_mellea_model_opts_map = {
+            ModelOption.SEED: "seed",
+            ModelOption.MAX_NEW_TOKENS: "max_completion_tokens",
+        }
+
+    def generate_from_context(
+        self,
+        action: Component | CBlock,
+        ctx: Context,
+        *,
+        format: type[BaseModelSubclass] | None = None,
+        model_options: dict | None = None,
+        generate_logs: list[GenerateLog] | None = None,
+        tool_calls: bool = False,
+    ):
+        """See `generate_from_chat_context`."""
+        assert ctx.is_chat_context, NotImplementedError(
+            "The Openai backend only supports chat-like contexts."
+        )
+        return self._generate_from_chat_context_standard(
+            action,
+            ctx,
+            format=format,
+            model_options=model_options,
+            generate_logs=generate_logs,
+            tool_calls=tool_calls,
+        )
+
+    def _simplify_and_merge(
+        self, model_options: dict[str, Any] | None
+    ) -> dict[str, Any]:
+        """Simplifies model_options to use the Mellea specific ModelOption.Option and merges the backend's model_options with those passed into this call.
+
+        Rules:
+        - Within a model_options dict, existing keys take precedence. This means remapping to mellea specific keys will maintain the value of the mellea specific key if one already exists.
+        - When merging, the keys/values from the dictionary passed into this function take precedence.
+
+        Because this function simplifies and then merges, non-Mellea keys from the passed in model_options will replace
+        Mellea specific keys from the backend's model_options.
+
+        Args:
+            model_options: the model_options for this call
+
+        Returns:
+            a new dict
+        """
+        backend_model_opts = ModelOption.replace_keys(
+            self.model_options, self.to_mellea_model_opts_map
+        )
+
+        if model_options is None:
+            return backend_model_opts
+
+        generate_call_model_opts = ModelOption.replace_keys(
+            model_options, self.to_mellea_model_opts_map
+        )
+        return ModelOption.merge_model_options(
+            backend_model_opts, generate_call_model_opts
+        )
+
+    def _make_backend_specific_and_remove(
+        self, model_options: dict[str, Any]
+    ) -> dict[str, Any]:
+        """Maps specified Mellea specific keys to their backend specific version and removes any remaining Mellea keys.
+
+        Additionally, logs any params unknown to litellm and any params that are openai specific but not supported by this model/provider.
+
+        Args:
+            model_options: the model_options for this call
+
+        Returns:
+            a new dict
+        """
+        backend_specific = ModelOption.replace_keys(
+            model_options, self.from_mellea_model_opts_map
+        )
+        backend_specific = ModelOption.remove_special_keys(backend_specific)
+
+        # We set `drop_params=True` which will drop non-supported openai params; check for non-openai
+        # params that might cause errors and log which openai params aren't supported here.
+        # See https://docs.litellm.ai/docs/completion/input.
+        # standard_openai_subset = litellm.get_standard_openai_params(backend_specific)
+        supported_params_list = litellm.litellm_core_utils.get_supported_openai_params.get_supported_openai_params(
+            self._model_id
+        )
+        supported_params = (
+            set(supported_params_list) if supported_params_list is not None else set()
+        )
+
+        # unknown_keys = []  # keys that are unknown to litellm
+        unsupported_openai_params = []  # openai params that are known to litellm but not supported for this model/provider
+        for key in backend_specific.keys():
+            if key not in supported_params:
+                unsupported_openai_params.append(key)
+
+        # if len(unknown_keys) > 0:
+        #     FancyLogger.get_logger().warning(
+        #         f"litellm allows for unknown / non-openai input params; mellea won't validate the following params that may cause issues: {', '.join(unknown_keys)}"
+        #     )
+
+        if len(unsupported_openai_params) > 0:
+            FancyLogger.get_logger().warning(
+                f"litellm will automatically drop the following openai keys that aren't supported by the current model/provider: {', '.join(unsupported_openai_params)}"
+            )
+            for key in unsupported_openai_params:
+                del backend_specific[key]
+
+        return backend_specific
+
+    def _generate_from_chat_context_standard(
+        self,
+        action: Component | CBlock,
+        ctx: Context,
+        *,
+        format: type[BaseModelSubclass]
+        | None = None,  # Type[BaseModelSubclass] is a class object of a subclass of BaseModel
+        model_options: dict | None = None,
+        generate_logs: list[GenerateLog] | None = None,
+        tool_calls: bool = False,
+    ) -> ModelOutputThunk:
+        model_opts = self._simplify_and_merge(model_options)
+        linearized_context = ctx.render_for_generation()
+        assert linearized_context is not None, (
+            "Cannot generate from a non-linear context in a FormatterBackend."
+        )
+        # Convert our linearized context into a sequence of chat messages. Template formatters have a standard way of doing this.
+        messages: list[Message] = self.formatter.to_chat_messages(linearized_context)
+        # Add the final message.
+        match action:
+            case ALoraRequirement():
+                raise Exception("The LiteLLM backend does not support activated LoRAs.")
+            case _:
+                messages.extend(self.formatter.to_chat_messages([action]))
+
+        conversation: list[dict] = []
+        system_prompt = model_opts.get(ModelOption.SYSTEM_PROMPT, "")
+        if system_prompt != "":
+            conversation.append({"role": "system", "content": system_prompt})
+        conversation.extend([{"role": m.role, "content": m.content} for m in messages])
+
+        if format is not None:
+            response_format = {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": format.__name__,
+                    "schema": format.model_json_schema(),
+                    "strict": True,
+                },
+            }
+        else:
+            response_format = {"type": "text"}
+
+        thinking = model_opts.get(ModelOption.THINKING, None)
+        if type(thinking) is bool and thinking:
+            # OpenAI uses strings for its reasoning levels.
+            thinking = "medium"
+
+        # Append tool call information if applicable.
+        tools = self._extract_tools(action, format, model_opts, tool_calls, ctx)
+        formatted_tools = convert_tools_to_json(tools) if len(tools) > 0 else None
+
+        model_specific_options = self._make_backend_specific_and_remove(model_opts)
+
+        chat_response: litellm.ModelResponse = litellm.completion(
+            model=self._model_id,
+            messages=conversation,
+            tools=formatted_tools,
+            response_format=response_format,
+            reasoning_effort=thinking,  # type: ignore
+            drop_params=True,  # See note in `_make_backend_specific_and_remove`.
+            **model_specific_options,
+        )
+
+        choice_0 = chat_response.choices[0]
+        assert isinstance(choice_0, litellm.utils.Choices), (
+            "Only works for non-streaming response for now"
+        )
+        result = ModelOutputThunk(
+            value=choice_0.message.content,
+            meta={
+                "litellm_chat_response": chat_response.choices[0].model_dump()
+            },  # NOTE: Using model dump here to comply with `TemplateFormatter`
+            tool_calls=self._extract_model_tool_requests(tools, chat_response),
+        )
+
+        parsed_result = self.formatter.parse(source_component=action, result=result)
+
+        if generate_logs is not None:
+            assert isinstance(generate_logs, list)
+            generate_log = GenerateLog()
+            generate_log.prompt = conversation
+            generate_log.backend = f"litellm::{self.model_id!s}"
+            generate_log.model_options = model_specific_options
+            generate_log.date = datetime.datetime.now()
+            generate_log.model_output = chat_response
+            generate_log.extra = {
+                "format": format,
+                "tools_available": tools,
+                "tools_called": result.tool_calls,
+                "seed": model_opts.get("seed", None),
+            }
+            generate_log.action = action
+            generate_log.result = parsed_result
+            generate_logs.append(generate_log)
+
+        return parsed_result
+
+    @staticmethod
+    def _extract_tools(
+        action, format, model_opts, tool_calls, ctx
+    ) -> dict[str, Callable]:
+        tools: dict[str, Callable] = dict()
+        if tool_calls:
+            if format:
+                FancyLogger.get_logger().warning(
+                    f"Tool calling typically uses constrained generation, but you have specified a `format` in your generate call. NB: tool calling is superseded by format; we will NOT call tools for your request: {action}"
+                )
+            else:
+                add_tools_from_model_options(tools, model_opts)
+                add_tools_from_context_actions(tools, ctx.actions_for_available_tools())
+
+                # Add the tools from the action for this generation last so that
+                # they overwrite conflicting names.
+                add_tools_from_context_actions(tools, [action])
+            FancyLogger.get_logger().info(f"Tools for call: {tools.keys()}")
+        return tools
+
+    def _generate_from_raw(
+        self,
+        actions: list[Component | CBlock],
+        *,
+        format: type[BaseModelSubclass] | None = None,
+        model_options: dict | None = None,
+        generate_logs: list[GenerateLog] | None = None,
+    ) -> list[ModelOutputThunk]:
+        """Generate using the completions api. Gives the input provided to the model without templating."""
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def _extract_model_tool_requests(
+        self, tools: dict[str, Callable], chat_response: litellm.ModelResponse
+    ) -> dict[str, ModelToolCall] | None:
+        model_tool_calls: dict[str, ModelToolCall] = {}
+        choice_0 = chat_response.choices[0]
+        assert isinstance(choice_0, litellm.utils.Choices), (
+            "Only works for non-streaming response for now"
+        )
+        calls = choice_0.message.tool_calls
+        if calls:
+            for tool_call in calls:
+                tool_name = str(tool_call.function.name)
+                tool_args = tool_call.function.arguments
+
+                func = tools.get(tool_name)
+                if func is None:
+                    FancyLogger.get_logger().warning(
+                        f"model attempted to call a non-existing function: {tool_name}"
+                    )
+                    continue  # skip this function if we can't find it.
+
+                # Returns the args as a string. Parse it here.
+                args = json.loads(tool_args)
+                model_tool_calls[tool_name] = ModelToolCall(tool_name, func, args)
+
+        if len(model_tool_calls) > 0:
+            return model_tool_calls
+        return None
diff --git a/pyproject.toml b/pyproject.toml
index a4adf89f..2aeddd56 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,6 +62,10 @@ hf = [
     "trl>=0.19.0",
 ]
 
+litellm = [
+    "litellm>=1.76"
+]
+
 watsonx = [
     "ibm-watsonx-ai>=1.3.31",
 ]
@@ -69,7 +73,7 @@ docling = [
     "docling>=2.45.0",
 ]
 
-all = ["mellea[watsonx,docling,hf]"]
+all = ["mellea[watsonx,docling,hf,litellm]"]
 
 [dependency-groups]
 # Use these like:
@@ -140,7 +144,7 @@ ignore = [
 #   "UP006",  # List vs list, etc
 #   "UP007",  # Option and Union
 #   "UP035",  # `typing.Set` is deprecated, use `set` instead"
-  "PD901",  # Avoid using the generic variable name `df` for DataFrames 
+  "PD901",  # Avoid using the generic variable name `df` for DataFrames
 ]
 
 [tool.ruff.lint.pydocstyle]
diff --git a/test/backends/test_litellm_ollama.py b/test/backends/test_litellm_ollama.py
new file mode 100644
index 00000000..782debff
--- /dev/null
+++ b/test/backends/test_litellm_ollama.py
@@ -0,0 +1,73 @@
+import pytest
+
+from mellea import MelleaSession, generative
+from mellea.backends import ModelOption
+from mellea.backends.litellm import LiteLLMBackend
+from mellea.stdlib.chat import Message
+from mellea.stdlib.sampling import RejectionSamplingStrategy
+
+
+@pytest.fixture(scope="function")
+def session():
+    """Fresh Ollama session for each test."""
+    session = MelleaSession(LiteLLMBackend())
+    yield session
+    session.reset()
+
+
+@pytest.mark.qualitative
+def test_litellm_ollama_chat(session):
+    res = session.chat("hello world")
+    assert res is not None
+    assert isinstance(res, Message)
+
+
+@pytest.mark.qualitative
+def test_litellm_ollama_instruct(session):
+    res = session.instruct(
+        "Write an email to the interns.",
+        requirements=["be funny"],
+        strategy=RejectionSamplingStrategy(loop_budget=3),
+    )
+    assert res is not None
+    assert isinstance(res.value, str)
+
+
+@pytest.mark.qualitative
+def test_litellm_ollama_instruct_options(session):
+    res = session.instruct(
+        "Write an email to the interns.",
+        requirements=["be funny"],
+        model_options={
+            ModelOption.SEED: 123,
+            ModelOption.TEMPERATURE: 0.5,
+            ModelOption.THINKING: True,
+            ModelOption.MAX_NEW_TOKENS: 100,
+            "reasoning_effort": True,
+            "stream": False,
+            "homer_simpson": "option should be kicked out",
+        },
+    )
+    assert res is not None
+    assert isinstance(res.value, str)
+    # make sure that homer_simpson is ignored for generation
+    assert "homer_simpson" not in session.ctx.last_output_and_logs()[1].model_options
+
+
+@pytest.mark.qualitative
+def test_gen_slot(session):
+    @generative
+    def is_happy(text: str) -> bool:
+        """Determine if text is of happy mood."""
+
+    h = is_happy(session, text="I'm enjoying life.")
+
+    assert isinstance(h, bool)
+    # should yield to true - but, of course, is model dependent
+    assert h is True
+
+
+if __name__ == "__main__":
+    import pytest
+
+    pytest.main([__file__])
diff --git a/uv.lock b/uv.lock
index 5f5ffd6e..26da4829 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.10"
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'darwin'",
@@ -1012,6 +1012,30 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/a8/20d0723294217e47de6d9e2e40fd4a9d2f7c4b6ef974babd482a59743694/fastjsonschema-2.21.2-py3-none-any.whl", hash = "sha256:1c797122d0a86c5cace2e54bf4e819c36223b552017172f32c5c024a6b77e463", size = 24024, upload-time = "2025-08-14T18:49:34.776Z" },
 ]
 
+[[package]]
+name = "fastuuid"
+version = "0.12.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/19/17/13146a1e916bd2971d0a58db5e0a4ad23efdd49f78f33ac871c161f8007b/fastuuid-0.12.0.tar.gz", hash = "sha256:d0bd4e5b35aad2826403f4411937c89e7c88857b1513fe10f696544c03e9bd8e", size = 19180, upload-time = "2025-01-27T18:04:14.387Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/97/c3/9db9aee6f34e6dfd1f909d3d7432ac26e491a0471f8bb8b676c44b625b3f/fastuuid-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:22a900ef0956aacf862b460e20541fdae2d7c340594fe1bd6fdcb10d5f0791a9", size = 247356, upload-time = "2025-01-27T18:04:45.397Z" },
+    { url = "https://files.pythonhosted.org/packages/14/a5/999e6e017af3d85841ce1e172d32fd27c8700804c125f496f71bfddc1a9f/fastuuid-0.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0302f5acf54dc75de30103025c5a95db06d6c2be36829043a0aa16fc170076bc", size = 258384, upload-time = "2025-01-27T18:04:03.562Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/e6/beae8411cac5b3b0b9d59ee08405eb39c3abe81dad459114363eff55c14a/fastuuid-0.12.0-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:7946b4a310cfc2d597dcba658019d72a2851612a2cebb949d809c0e2474cf0a6", size = 278480, upload-time = "2025-01-27T18:04:05.663Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/f6/c598b9a052435716fc5a084ef17049edd35ca2c8241161269bfea4905ab4/fastuuid-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:a1b6764dd42bf0c46c858fb5ade7b7a3d93b7a27485a7a5c184909026694cd88", size = 156799, upload-time = "2025-01-27T18:05:41.867Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/99/555eab31381c7912103d4c8654082611e5e82a7bb88ad5ab067e36b622d7/fastuuid-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2bced35269315d16fe0c41003f8c9d63f2ee16a59295d90922cad5e6a67d0418", size = 247249, upload-time = "2025-01-27T18:03:23.092Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/3b/d62ce7f2af3d50a8e787603d44809770f43a3f2ff708bf10c252bf479109/fastuuid-0.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82106e4b0a24f4f2f73c88f89dadbc1533bb808900740ca5db9bbb17d3b0c824", size = 258369, upload-time = "2025-01-27T18:04:08.903Z" },
+    { url = "https://files.pythonhosted.org/packages/86/23/33ec5355036745cf83ea9ca7576d2e0750ff8d268c03b4af40ed26f1a303/fastuuid-0.12.0-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:4db1bc7b8caa1d7412e1bea29b016d23a8d219131cff825b933eb3428f044dca", size = 278316, upload-time = "2025-01-27T18:04:12.74Z" },
+    { url = "https://files.pythonhosted.org/packages/40/91/32ce82a14650148b6979ccd1a0089fd63d92505a90fb7156d2acc3245cbd/fastuuid-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:07afc8e674e67ac3d35a608c68f6809da5fab470fb4ef4469094fdb32ba36c51", size = 156643, upload-time = "2025-01-27T18:05:59.266Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/28/442e79d6219b90208cb243ac01db05d89cc4fdf8ecd563fb89476baf7122/fastuuid-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:328694a573fe9dce556b0b70c9d03776786801e028d82f0b6d9db1cb0521b4d1", size = 247372, upload-time = "2025-01-27T18:03:40.967Z" },
+    { url = "https://files.pythonhosted.org/packages/40/eb/e0fd56890970ca7a9ec0d116844580988b692b1a749ac38e0c39e1dbdf23/fastuuid-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02acaea2c955bb2035a7d8e7b3fba8bd623b03746ae278e5fa932ef54c702f9f", size = 258200, upload-time = "2025-01-27T18:04:12.138Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/3c/4b30e376e65597a51a3dc929461a0dec77c8aec5d41d930f482b8f43e781/fastuuid-0.12.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:ed9f449cba8cf16cced252521aee06e633d50ec48c807683f21cc1d89e193eb0", size = 278446, upload-time = "2025-01-27T18:04:15.877Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/96/cc5975fd23d2197b3e29f650a7a9beddce8993eaf934fa4ac595b77bb71f/fastuuid-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:0df2ea4c9db96fd8f4fa38d0e88e309b3e56f8fd03675a2f6958a5b082a0c1e4", size = 157185, upload-time = "2025-01-27T18:06:19.21Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/e8/d2bb4f19e5ee15f6f8e3192a54a897678314151aa17d0fb766d2c2cbc03d/fastuuid-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7fe2407316a04ee8f06d3dbc7eae396d0a86591d92bafe2ca32fce23b1145786", size = 247512, upload-time = "2025-01-27T18:04:08.115Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/53/25e811d92fd60f5c65e098c3b68bd8f1a35e4abb6b77a153025115b680de/fastuuid-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9b31dd488d0778c36f8279b306dc92a42f16904cba54acca71e107d65b60b0c", size = 258257, upload-time = "2025-01-27T18:03:56.408Z" },
+    { url = "https://files.pythonhosted.org/packages/10/23/73618e7793ea0b619caae2accd9e93e60da38dd78dd425002d319152ef2f/fastuuid-0.12.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:b19361ee649365eefc717ec08005972d3d1eb9ee39908022d98e3bfa9da59e37", size = 278559, upload-time = "2025-01-27T18:03:58.661Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/41/6317ecfc4757d5f2a604e5d3993f353ba7aee85fa75ad8b86fce6fc2fa40/fastuuid-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:8fc66b11423e6f3e1937385f655bedd67aebe56a3dcec0cb835351cfe7d358c9", size = 157276, upload-time = "2025-01-27T18:06:39.245Z" },
+]
+
 [[package]]
 name = "filelock"
 version = "3.19.1"
@@ -2058,6 +2082,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/83/60/d497a310bde3f01cb805196ac61b7ad6dc5dcf8dce66634dc34364b20b4f/lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc", size = 12097, upload-time = "2024-04-05T13:03:10.514Z" },
 ]
 
+[[package]]
+name = "litellm"
+version = "1.76.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "click" },
+    { name = "fastuuid" },
+    { name = "httpx" },
+    { name = "importlib-metadata" },
+    { name = "jinja2" },
+    { name = "jsonschema" },
+    { name = "openai" },
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "tiktoken" },
+    { name = "tokenizers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/46/57b6539365616452bb6f4401487448ce62e62755738fce55d8222d7a557e/litellm-1.76.3.tar.gz", hash = "sha256:fc81219c59b17b26cc81276ce32582f3715612877ab11c1ea2c26e4853ac67e8", size = 10210403, upload-time = "2025-09-07T01:59:19.55Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d0/d9/5f8ed27241b487f51f04573b8ba06d4460ebed9f792ff5cc148649fbf862/litellm-1.76.3-py3-none-any.whl", hash = "sha256:d62e3ff2a80ec5e551c6d7a0fe199ffe718ecb6cbaa43fc9250dd8d7c0944352", size = 9000797, upload-time = "2025-09-07T01:59:16.261Z" },
+]
+
 [[package]]
 name = "lomond"
 version = "0.3.3"
@@ -2290,6 +2337,7 @@ all = [
     { name = "datasets" },
     { name = "docling" },
     { name = "ibm-watsonx-ai" },
+    { name = "litellm" },
     { name = "outlines" },
     { name = "peft" },
     { name = "transformers" },
@@ -2307,6 +2355,9 @@ hf = [
     { name = "transformers" },
     { name = "trl" },
 ]
+litellm = [
+    { name = "litellm" },
+]
 watsonx = [
     { name = "ibm-watsonx-ai" },
 ]
@@ -2348,7 +2399,8 @@ requires-dist = [
     { name = "ibm-watsonx-ai", marker = "extra == 'watsonx'", specifier = ">=1.3.31" },
     { name = "jinja2" },
     { name = "json5" },
-    { name = "mellea", extras = ["watsonx", "docling", "hf"], marker = "extra == 'all'" },
+    { name = "litellm", marker = "extra == 'litellm'", specifier = ">=1.76" },
+    { name = "mellea", extras = ["watsonx", "docling", "hf", "litellm"], marker = "extra == 'all'" },
     { name = "mistletoe", specifier = ">=1.4.0" },
     { name = "ollama", specifier = ">=0.5.1" },
     { name = "openai" },
@@ -2363,7 +2415,7 @@ requires-dist = [
     { name = "types-tqdm" },
     { name = "uvicorn" },
 ]
-provides-extras = ["hf", "watsonx", "docling", "all"]
+provides-extras = ["hf", "litellm", "watsonx", "docling", "all"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -3009,7 +3061,7 @@ name = "nvidia-cudnn-cu12"
 version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
@@ -3020,7 +3072,7 @@ name = "nvidia-cufft-cu12"
 version = "11.3.3.83"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
@@ -3047,9 +3099,9 @@ name = "nvidia-cusolver-cu12"
 version = "11.7.3.90"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12" },
-    { name = "nvidia-cusparse-cu12" },
-    { name = "nvidia-nvjitlink-cu12" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
@@ -3060,7 +3112,7 @@ name = "nvidia-cusparse-cu12"
 version = "12.5.8.93"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
@@ -5386,6 +5438,42 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/56/b3/23eec760215910609914dd99aba23ce1c72a3bcbe046ee44f45adf740452/tifffile-2025.8.28-py3-none-any.whl", hash = "sha256:b274a6d9eeba65177cf7320af25ef38ecf910b3369ac6bc494a94a3f6bd99c78", size = 231049, upload-time = "2025-08-27T19:47:33.909Z" },
 ]
 
+[[package]]
+name = "tiktoken"
+version = "0.11.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "regex" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a7/86/ad0155a37c4f310935d5ac0b1ccf9bdb635dcb906e0a9a26b616dd55825a/tiktoken-0.11.0.tar.gz", hash = "sha256:3c518641aee1c52247c2b97e74d8d07d780092af79d5911a6ab5e79359d9b06a", size = 37648, upload-time = "2025-08-08T23:58:08.495Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8b/4d/c6a2e7dca2b4f2e9e0bfd62b3fe4f114322e2c028cfba905a72bc76ce479/tiktoken-0.11.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:8a9b517d6331d7103f8bef29ef93b3cca95fa766e293147fe7bacddf310d5917", size = 1059937, upload-time = "2025-08-08T23:57:28.57Z" },
+    { url = "https://files.pythonhosted.org/packages/41/54/3739d35b9f94cb8dc7b0db2edca7192d5571606aa2369a664fa27e811804/tiktoken-0.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b4ddb1849e6bf0afa6cc1c5d809fb980ca240a5fffe585a04e119519758788c0", size = 999230, upload-time = "2025-08-08T23:57:30.241Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/f4/ec8d43338d28d53513004ebf4cd83732a135d11011433c58bf045890cc10/tiktoken-0.11.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10331d08b5ecf7a780b4fe4d0281328b23ab22cdb4ff65e68d56caeda9940ecc", size = 1130076, upload-time = "2025-08-08T23:57:31.706Z" },
+    { url = "https://files.pythonhosted.org/packages/94/80/fb0ada0a882cb453caf519a4bf0d117c2a3ee2e852c88775abff5413c176/tiktoken-0.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b062c82300341dc87e0258c69f79bed725f87e753c21887aea90d272816be882", size = 1183942, upload-time = "2025-08-08T23:57:33.142Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/e9/6c104355b463601719582823f3ea658bc3aa7c73d1b3b7553ebdc48468ce/tiktoken-0.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:195d84bec46169af3b1349a1495c151d37a0ff4cba73fd08282736be7f92cc6c", size = 1244705, upload-time = "2025-08-08T23:57:34.594Z" },
+    { url = "https://files.pythonhosted.org/packages/94/75/eaa6068f47e8b3f0aab9e05177cce2cf5aa2cc0ca93981792e620d4d4117/tiktoken-0.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe91581b0ecdd8783ce8cb6e3178f2260a3912e8724d2f2d49552b98714641a1", size = 884152, upload-time = "2025-08-08T23:57:36.18Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/91/912b459799a025d2842566fe1e902f7f50d54a1ce8a0f236ab36b5bd5846/tiktoken-0.11.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4ae374c46afadad0f501046db3da1b36cd4dfbfa52af23c998773682446097cf", size = 1059743, upload-time = "2025-08-08T23:57:37.516Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/e9/6faa6870489ce64f5f75dcf91512bf35af5864583aee8fcb0dcb593121f5/tiktoken-0.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25a512ff25dc6c85b58f5dd4f3d8c674dc05f96b02d66cdacf628d26a4e4866b", size = 999334, upload-time = "2025-08-08T23:57:38.595Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/3e/a05d1547cf7db9dc75d1461cfa7b556a3b48e0516ec29dfc81d984a145f6/tiktoken-0.11.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2130127471e293d385179c1f3f9cd445070c0772be73cdafb7cec9a3684c0458", size = 1129402, upload-time = "2025-08-08T23:57:39.627Z" },
+    { url = "https://files.pythonhosted.org/packages/34/9a/db7a86b829e05a01fd4daa492086f708e0a8b53952e1dbc9d380d2b03677/tiktoken-0.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21e43022bf2c33f733ea9b54f6a3f6b4354b909f5a73388fb1b9347ca54a069c", size = 1184046, upload-time = "2025-08-08T23:57:40.689Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/bb/52edc8e078cf062ed749248f1454e9e5cfd09979baadb830b3940e522015/tiktoken-0.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:adb4e308eb64380dc70fa30493e21c93475eaa11669dea313b6bbf8210bfd013", size = 1244691, upload-time = "2025-08-08T23:57:42.251Z" },
+    { url = "https://files.pythonhosted.org/packages/60/d9/884b6cd7ae2570ecdcaffa02b528522b18fef1cbbfdbcaa73799807d0d3b/tiktoken-0.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:ece6b76bfeeb61a125c44bbefdfccc279b5288e6007fbedc0d32bfec602df2f2", size = 884392, upload-time = "2025-08-08T23:57:43.628Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/9e/eceddeffc169fc75fe0fd4f38471309f11cb1906f9b8aa39be4f5817df65/tiktoken-0.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fd9e6b23e860973cf9526544e220b223c60badf5b62e80a33509d6d40e6c8f5d", size = 1055199, upload-time = "2025-08-08T23:57:45.076Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/cf/5f02bfefffdc6b54e5094d2897bc80efd43050e5b09b576fd85936ee54bf/tiktoken-0.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6a76d53cee2da71ee2731c9caa747398762bda19d7f92665e882fef229cb0b5b", size = 996655, upload-time = "2025-08-08T23:57:46.304Z" },
+    { url = "https://files.pythonhosted.org/packages/65/8e/c769b45ef379bc360c9978c4f6914c79fd432400a6733a8afc7ed7b0726a/tiktoken-0.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ef72aab3ea240646e642413cb363b73869fed4e604dcfd69eec63dc54d603e8", size = 1128867, upload-time = "2025-08-08T23:57:47.438Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/2d/4d77f6feb9292bfdd23d5813e442b3bba883f42d0ac78ef5fdc56873f756/tiktoken-0.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f929255c705efec7a28bf515e29dc74220b2f07544a8c81b8d69e8efc4578bd", size = 1183308, upload-time = "2025-08-08T23:57:48.566Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/65/7ff0a65d3bb0fc5a1fb6cc71b03e0f6e71a68c5eea230d1ff1ba3fd6df49/tiktoken-0.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:61f1d15822e4404953d499fd1dcc62817a12ae9fb1e4898033ec8fe3915fdf8e", size = 1244301, upload-time = "2025-08-08T23:57:49.642Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/6e/5b71578799b72e5bdcef206a214c3ce860d999d579a3b56e74a6c8989ee2/tiktoken-0.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:45927a71ab6643dfd3ef57d515a5db3d199137adf551f66453be098502838b0f", size = 884282, upload-time = "2025-08-08T23:57:50.759Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/cd/a9034bcee638716d9310443818d73c6387a6a96db93cbcb0819b77f5b206/tiktoken-0.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a5f3f25ffb152ee7fec78e90a5e5ea5b03b4ea240beed03305615847f7a6ace2", size = 1055339, upload-time = "2025-08-08T23:57:51.802Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/91/9922b345f611b4e92581f234e64e9661e1c524875c8eadd513c4b2088472/tiktoken-0.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7dc6e9ad16a2a75b4c4be7208055a1f707c9510541d94d9cc31f7fbdc8db41d8", size = 997080, upload-time = "2025-08-08T23:57:53.442Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/9d/49cd047c71336bc4b4af460ac213ec1c457da67712bde59b892e84f1859f/tiktoken-0.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a0517634d67a8a48fd4a4ad73930c3022629a85a217d256a6e9b8b47439d1e4", size = 1128501, upload-time = "2025-08-08T23:57:54.808Z" },
+    { url = "https://files.pythonhosted.org/packages/52/d5/a0dcdb40dd2ea357e83cb36258967f0ae96f5dd40c722d6e382ceee6bba9/tiktoken-0.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fb4effe60574675118b73c6fbfd3b5868e5d7a1f570d6cc0d18724b09ecf318", size = 1182743, upload-time = "2025-08-08T23:57:56.307Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/17/a0fc51aefb66b7b5261ca1314afa83df0106b033f783f9a7bcbe8e741494/tiktoken-0.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94f984c9831fd32688aef4348803b0905d4ae9c432303087bae370dc1381a2b8", size = 1244057, upload-time = "2025-08-08T23:57:57.628Z" },
+    { url = "https://files.pythonhosted.org/packages/50/79/bcf350609f3a10f09fe4fc207f132085e497fdd3612f3925ab24d86a0ca0/tiktoken-0.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:2177ffda31dec4023356a441793fed82f7af5291120751dee4d696414f54db0c", size = 883901, upload-time = "2025-08-08T23:57:59.359Z" },
+]
+
 [[package]]
 name = "tinycss2"
 version = "1.4.0"
@@ -5623,7 +5711,7 @@ name = "triton"
 version = "3.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "setuptools" },
+    { name = "setuptools", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" },