diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py new file mode 100644 index 00000000..34d8f2f6 --- /dev/null +++ b/mellea/backends/litellm.py @@ -0,0 +1,354 @@ +"""A generic LiteLLM compatible backend that wraps around the openai python sdk.""" + +import datetime +import json +from collections.abc import Callable +from typing import Any + +import litellm +import litellm.litellm_core_utils +import litellm.litellm_core_utils.get_supported_openai_params + +import mellea.backends.model_ids as model_ids +from mellea.backends import BaseModelSubclass +from mellea.backends.formatter import Formatter, FormatterBackend, TemplateFormatter +from mellea.backends.tools import ( + add_tools_from_context_actions, + add_tools_from_model_options, + convert_tools_to_json, +) +from mellea.backends.types import ModelOption +from mellea.helpers.fancy_logger import FancyLogger +from mellea.stdlib.base import ( + CBlock, + Component, + Context, + GenerateLog, + ModelOutputThunk, + ModelToolCall, +) +from mellea.stdlib.chat import Message +from mellea.stdlib.requirement import ALoraRequirement + + +class LiteLLMBackend(FormatterBackend): + """A generic LiteLLM compatible backend.""" + + def __init__( + self, + model_id: str = "ollama/" + str(model_ids.IBM_GRANITE_3_3_8B.ollama_name), + formatter: Formatter | None = None, + base_url: str | None = "http://localhost:11434", + model_options: dict | None = None, + ): + """Initialize and OpenAI compatible backend. For any additional kwargs that you need to pass the the client, pass them as a part of **kwargs. + + Args: + model_id : The LiteLLM model identifier. Make sure that all necessary credentials are in OS environment variables. + formatter: A custom formatter based on backend.If None, defaults to TemplateFormatter + base_url : Base url for LLM API. Defaults to None. + model_options : Generation options to pass to the LLM. Defaults to None. + """ + super().__init__( + model_id=model_id, + formatter=( + formatter + if formatter is not None + else TemplateFormatter(model_id=model_id) + ), + model_options=model_options, + ) + + assert isinstance(model_id, str), "Model ID must be a string." + self._model_id = model_id + + if base_url is None: + self._base_url = "http://localhost:11434/v1" # ollama + else: + self._base_url = base_url + + # A mapping of common options for this backend mapped to their Mellea ModelOptions equivalent. + # These are usually values that must be extracted before hand or that are common among backend providers. + # OpenAI has some deprecated parameters. Those map to the same mellea parameter, but + # users should only be specifying a single one in their request. + self.to_mellea_model_opts_map = { + "system": ModelOption.SYSTEM_PROMPT, + "reasoning_effort": ModelOption.THINKING, # TODO: JAL; see which of these are actually extracted... + "seed": ModelOption.SEED, + "max_completion_tokens": ModelOption.MAX_NEW_TOKENS, + "max_tokens": ModelOption.MAX_NEW_TOKENS, + "tools": ModelOption.TOOLS, + "functions": ModelOption.TOOLS, + } + + # A mapping of Mellea specific ModelOptions to the specific names for this backend. + # These options should almost always be a subset of those specified in the `to_mellea_model_opts_map`. + # Usually, values that are intentionally extracted while prepping for the backend generate call + # will be omitted here so that they will be removed when model_options are processed + # for the call to the model. + self.from_mellea_model_opts_map = { + ModelOption.SEED: "seed", + ModelOption.MAX_NEW_TOKENS: "max_completion_tokens", + } + + def generate_from_context( + self, + action: Component | CBlock, + ctx: Context, + *, + format: type[BaseModelSubclass] | None = None, + model_options: dict | None = None, + generate_logs: list[GenerateLog] | None = None, + tool_calls: bool = False, + ): + """See `generate_from_chat_context`.""" + assert ctx.is_chat_context, NotImplementedError( + "The Openai backend only supports chat-like contexts." + ) + return self._generate_from_chat_context_standard( + action, + ctx, + format=format, + model_options=model_options, + generate_logs=generate_logs, + tool_calls=tool_calls, + ) + + def _simplify_and_merge( + self, model_options: dict[str, Any] | None + ) -> dict[str, Any]: + """Simplifies model_options to use the Mellea specific ModelOption.Option and merges the backend's model_options with those passed into this call. + + Rules: + - Within a model_options dict, existing keys take precedence. This means remapping to mellea specific keys will maintain the value of the mellea specific key if one already exists. + - When merging, the keys/values from the dictionary passed into this function take precedence. + + Because this function simplifies and then merges, non-Mellea keys from the passed in model_options will replace + Mellea specific keys from the backend's model_options. + + Args: + model_options: the model_options for this call + + Returns: + a new dict + """ + backend_model_opts = ModelOption.replace_keys( + self.model_options, self.to_mellea_model_opts_map + ) + + if model_options is None: + return backend_model_opts + + generate_call_model_opts = ModelOption.replace_keys( + model_options, self.to_mellea_model_opts_map + ) + return ModelOption.merge_model_options( + backend_model_opts, generate_call_model_opts + ) + + def _make_backend_specific_and_remove( + self, model_options: dict[str, Any] + ) -> dict[str, Any]: + """Maps specified Mellea specific keys to their backend specific version and removes any remaining Mellea keys. + + Additionally, logs any params unknown to litellm and any params that are openai specific but not supported by this model/provider. + + Args: + model_options: the model_options for this call + + Returns: + a new dict + """ + backend_specific = ModelOption.replace_keys( + model_options, self.from_mellea_model_opts_map + ) + backend_specific = ModelOption.remove_special_keys(backend_specific) + + # We set `drop_params=True` which will drop non-supported openai params; check for non-openai + # params that might cause errors and log which openai params aren't supported here. + # See https://docs.litellm.ai/docs/completion/input. + # standard_openai_subset = litellm.get_standard_openai_params(backend_specific) + supported_params_list = litellm.litellm_core_utils.get_supported_openai_params.get_supported_openai_params( + self._model_id + ) + supported_params = ( + set(supported_params_list) if supported_params_list is not None else set() + ) + + # unknown_keys = [] # keys that are unknown to litellm + unsupported_openai_params = [] # openai params that are known to litellm but not supported for this model/provider + for key in backend_specific.keys(): + if key not in supported_params: + unsupported_openai_params.append(key) + + # if len(unknown_keys) > 0: + # FancyLogger.get_logger().warning( + # f"litellm allows for unknown / non-openai input params; mellea won't validate the following params that may cause issues: {', '.join(unknown_keys)}" + # ) + + if len(unsupported_openai_params) > 0: + FancyLogger.get_logger().warning( + f"litellm will automatically drop the following openai keys that aren't supported by the current model/provider: {', '.join(unsupported_openai_params)}" + ) + for key in unsupported_openai_params: + del backend_specific[key] + + return backend_specific + + def _generate_from_chat_context_standard( + self, + action: Component | CBlock, + ctx: Context, + *, + format: type[BaseModelSubclass] + | None = None, # Type[BaseModelSubclass] is a class object of a subclass of BaseModel + model_options: dict | None = None, + generate_logs: list[GenerateLog] | None = None, + tool_calls: bool = False, + ) -> ModelOutputThunk: + model_opts = self._simplify_and_merge(model_options) + linearized_context = ctx.render_for_generation() + assert linearized_context is not None, ( + "Cannot generate from a non-linear context in a FormatterBackend." + ) + # Convert our linearized context into a sequence of chat messages. Template formatters have a standard way of doing this. + messages: list[Message] = self.formatter.to_chat_messages(linearized_context) + # Add the final message. + match action: + case ALoraRequirement(): + raise Exception("The LiteLLM backend does not support activated LoRAs.") + case _: + messages.extend(self.formatter.to_chat_messages([action])) + + conversation: list[dict] = [] + system_prompt = model_opts.get(ModelOption.SYSTEM_PROMPT, "") + if system_prompt != "": + conversation.append({"role": "system", "content": system_prompt}) + conversation.extend([{"role": m.role, "content": m.content} for m in messages]) + + if format is not None: + response_format = { + "type": "json_schema", + "json_schema": { + "name": format.__name__, + "schema": format.model_json_schema(), + "strict": True, + }, + } + else: + response_format = {"type": "text"} + + thinking = model_opts.get(ModelOption.THINKING, None) + if type(thinking) is bool and thinking: + # OpenAI uses strings for its reasoning levels. + thinking = "medium" + + # Append tool call information if applicable. + tools = self._extract_tools(action, format, model_opts, tool_calls, ctx) + formatted_tools = convert_tools_to_json(tools) if len(tools) > 0 else None + + model_specific_options = self._make_backend_specific_and_remove(model_opts) + + chat_response: litellm.ModelResponse = litellm.completion( + model=self._model_id, + messages=conversation, + tools=formatted_tools, + response_format=response_format, + reasoning_effort=thinking, # type: ignore + drop_params=True, # See note in `_make_backend_specific_and_remove`. + **model_specific_options, + ) + + choice_0 = chat_response.choices[0] + assert isinstance(choice_0, litellm.utils.Choices), ( + "Only works for non-streaming response for now" + ) + result = ModelOutputThunk( + value=choice_0.message.content, + meta={ + "litellm_chat_response": chat_response.choices[0].model_dump() + }, # NOTE: Using model dump here to comply with `TemplateFormatter` + tool_calls=self._extract_model_tool_requests(tools, chat_response), + ) + + parsed_result = self.formatter.parse(source_component=action, result=result) + + if generate_logs is not None: + assert isinstance(generate_logs, list) + generate_log = GenerateLog() + generate_log.prompt = conversation + generate_log.backend = f"litellm::{self.model_id!s}" + generate_log.model_options = model_specific_options + generate_log.date = datetime.datetime.now() + generate_log.model_output = chat_response + generate_log.extra = { + "format": format, + "tools_available": tools, + "tools_called": result.tool_calls, + "seed": model_opts.get("seed", None), + } + generate_log.action = action + generate_log.result = parsed_result + generate_logs.append(generate_log) + + return parsed_result + + @staticmethod + def _extract_tools( + action, format, model_opts, tool_calls, ctx + ) -> dict[str, Callable]: + tools: dict[str, Callable] = dict() + if tool_calls: + if format: + FancyLogger.get_logger().warning( + f"Tool calling typically uses constrained generation, but you have specified a `format` in your generate call. NB: tool calling is superseded by format; we will NOT call tools for your request: {action}" + ) + else: + add_tools_from_model_options(tools, model_opts) + add_tools_from_context_actions(tools, ctx.actions_for_available_tools()) + + # Add the tools from the action for this generation last so that + # they overwrite conflicting names. + add_tools_from_context_actions(tools, [action]) + FancyLogger.get_logger().info(f"Tools for call: {tools.keys()}") + return tools + + def _generate_from_raw( + self, + actions: list[Component | CBlock], + *, + format: type[BaseModelSubclass] | None = None, + model_options: dict | None = None, + generate_logs: list[GenerateLog] | None = None, + ) -> list[ModelOutputThunk]: + """Generate using the completions api. Gives the input provided to the model without templating.""" + raise NotImplementedError("This method is not implemented yet.") + + def _extract_model_tool_requests( + self, tools: dict[str, Callable], chat_response: litellm.ModelResponse + ) -> dict[str, ModelToolCall] | None: + model_tool_calls: dict[str, ModelToolCall] = {} + choice_0 = chat_response.choices[0] + assert isinstance(choice_0, litellm.utils.Choices), ( + "Only works for non-streaming response for now" + ) + calls = choice_0.message.tool_calls + if calls: + for tool_call in calls: + tool_name = str(tool_call.function.name) + tool_args = tool_call.function.arguments + + func = tools.get(tool_name) + if func is None: + FancyLogger.get_logger().warning( + f"model attempted to call a non-existing function: {tool_name}" + ) + continue # skip this function if we can't find it. + + # Returns the args as a string. Parse it here. + args = json.loads(tool_args) + model_tool_calls[tool_name] = ModelToolCall(tool_name, func, args) + + if len(model_tool_calls) > 0: + return model_tool_calls + return None diff --git a/pyproject.toml b/pyproject.toml index a4adf89f..2aeddd56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,10 @@ hf = [ "trl>=0.19.0", ] +litellm = [ + "litellm>=1.76" +] + watsonx = [ "ibm-watsonx-ai>=1.3.31", ] @@ -69,7 +73,7 @@ docling = [ "docling>=2.45.0", ] -all = ["mellea[watsonx,docling,hf]"] +all = ["mellea[watsonx,docling,hf,litellm]"] [dependency-groups] # Use these like: @@ -140,7 +144,7 @@ ignore = [ # "UP006", # List vs list, etc # "UP007", # Option and Union # "UP035", # `typing.Set` is deprecated, use `set` instead" - "PD901", # Avoid using the generic variable name `df` for DataFrames + "PD901", # Avoid using the generic variable name `df` for DataFrames ] [tool.ruff.lint.pydocstyle] diff --git a/test/backends/test_litellm_ollama.py b/test/backends/test_litellm_ollama.py new file mode 100644 index 00000000..782debff --- /dev/null +++ b/test/backends/test_litellm_ollama.py @@ -0,0 +1,73 @@ +import pytest + +from mellea import MelleaSession, generative +from mellea.backends import ModelOption +from mellea.backends.litellm import LiteLLMBackend +from mellea.stdlib.chat import Message +from mellea.stdlib.sampling import RejectionSamplingStrategy + + +@pytest.fixture(scope="function") +def session(): + """Fresh Ollama session for each test.""" + session = MelleaSession(LiteLLMBackend()) + yield session + session.reset() + + +@pytest.mark.qualitative +def test_litellm_ollama_chat(session): + res = session.chat("hello world") + assert res is not None + assert isinstance(res, Message) + + +@pytest.mark.qualitative +def test_litellm_ollama_instruct(session): + res = session.instruct( + "Write an email to the interns.", + requirements=["be funny"], + strategy=RejectionSamplingStrategy(loop_budget=3), + ) + assert res is not None + assert isinstance(res.value, str) + + +@pytest.mark.qualitative +def test_litellm_ollama_instruct_options(session): + res = session.instruct( + "Write an email to the interns.", + requirements=["be funny"], + model_options={ + ModelOption.SEED: 123, + ModelOption.TEMPERATURE: 0.5, + ModelOption.THINKING: True, + ModelOption.MAX_NEW_TOKENS: 100, + "reasoning_effort": True, + "stream": False, + "homer_simpson": "option should be kicked out", + }, + ) + assert res is not None + assert isinstance(res.value, str) + # make sure that homer_simpson is ignored for generation + assert "homer_simpson" not in session.ctx.last_output_and_logs()[1].model_options + + +@pytest.mark.qualitative +def test_gen_slot(session): + @generative + def is_happy(text: str) -> bool: + """Determine if text is of happy mood.""" + + h = is_happy(session, text="I'm enjoying life.") + + assert isinstance(h, bool) + # should yield to true - but, of course, is model dependent + assert h is True + + +if __name__ == "__main__": + import pytest + + pytest.main([__file__]) diff --git a/uv.lock b/uv.lock index 5f5ffd6e..26da4829 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'darwin'", @@ -1012,6 +1012,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/a8/20d0723294217e47de6d9e2e40fd4a9d2f7c4b6ef974babd482a59743694/fastjsonschema-2.21.2-py3-none-any.whl", hash = "sha256:1c797122d0a86c5cace2e54bf4e819c36223b552017172f32c5c024a6b77e463", size = 24024, upload-time = "2025-08-14T18:49:34.776Z" }, ] +[[package]] +name = "fastuuid" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/17/13146a1e916bd2971d0a58db5e0a4ad23efdd49f78f33ac871c161f8007b/fastuuid-0.12.0.tar.gz", hash = "sha256:d0bd4e5b35aad2826403f4411937c89e7c88857b1513fe10f696544c03e9bd8e", size = 19180, upload-time = "2025-01-27T18:04:14.387Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/97/c3/9db9aee6f34e6dfd1f909d3d7432ac26e491a0471f8bb8b676c44b625b3f/fastuuid-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:22a900ef0956aacf862b460e20541fdae2d7c340594fe1bd6fdcb10d5f0791a9", size = 247356, upload-time = "2025-01-27T18:04:45.397Z" }, + { url = "https://files.pythonhosted.org/packages/14/a5/999e6e017af3d85841ce1e172d32fd27c8700804c125f496f71bfddc1a9f/fastuuid-0.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0302f5acf54dc75de30103025c5a95db06d6c2be36829043a0aa16fc170076bc", size = 258384, upload-time = "2025-01-27T18:04:03.562Z" }, + { url = "https://files.pythonhosted.org/packages/c4/e6/beae8411cac5b3b0b9d59ee08405eb39c3abe81dad459114363eff55c14a/fastuuid-0.12.0-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:7946b4a310cfc2d597dcba658019d72a2851612a2cebb949d809c0e2474cf0a6", size = 278480, upload-time = "2025-01-27T18:04:05.663Z" }, + { url = "https://files.pythonhosted.org/packages/f1/f6/c598b9a052435716fc5a084ef17049edd35ca2c8241161269bfea4905ab4/fastuuid-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:a1b6764dd42bf0c46c858fb5ade7b7a3d93b7a27485a7a5c184909026694cd88", size = 156799, upload-time = "2025-01-27T18:05:41.867Z" }, + { url = "https://files.pythonhosted.org/packages/d4/99/555eab31381c7912103d4c8654082611e5e82a7bb88ad5ab067e36b622d7/fastuuid-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2bced35269315d16fe0c41003f8c9d63f2ee16a59295d90922cad5e6a67d0418", size = 247249, upload-time = "2025-01-27T18:03:23.092Z" }, + { url = "https://files.pythonhosted.org/packages/6d/3b/d62ce7f2af3d50a8e787603d44809770f43a3f2ff708bf10c252bf479109/fastuuid-0.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82106e4b0a24f4f2f73c88f89dadbc1533bb808900740ca5db9bbb17d3b0c824", size = 258369, upload-time = "2025-01-27T18:04:08.903Z" }, + { url = "https://files.pythonhosted.org/packages/86/23/33ec5355036745cf83ea9ca7576d2e0750ff8d268c03b4af40ed26f1a303/fastuuid-0.12.0-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:4db1bc7b8caa1d7412e1bea29b016d23a8d219131cff825b933eb3428f044dca", size = 278316, upload-time = "2025-01-27T18:04:12.74Z" }, + { url = "https://files.pythonhosted.org/packages/40/91/32ce82a14650148b6979ccd1a0089fd63d92505a90fb7156d2acc3245cbd/fastuuid-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:07afc8e674e67ac3d35a608c68f6809da5fab470fb4ef4469094fdb32ba36c51", size = 156643, upload-time = "2025-01-27T18:05:59.266Z" }, + { url = "https://files.pythonhosted.org/packages/f6/28/442e79d6219b90208cb243ac01db05d89cc4fdf8ecd563fb89476baf7122/fastuuid-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:328694a573fe9dce556b0b70c9d03776786801e028d82f0b6d9db1cb0521b4d1", size = 247372, upload-time = "2025-01-27T18:03:40.967Z" }, + { url = "https://files.pythonhosted.org/packages/40/eb/e0fd56890970ca7a9ec0d116844580988b692b1a749ac38e0c39e1dbdf23/fastuuid-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02acaea2c955bb2035a7d8e7b3fba8bd623b03746ae278e5fa932ef54c702f9f", size = 258200, upload-time = "2025-01-27T18:04:12.138Z" }, + { url = "https://files.pythonhosted.org/packages/f5/3c/4b30e376e65597a51a3dc929461a0dec77c8aec5d41d930f482b8f43e781/fastuuid-0.12.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:ed9f449cba8cf16cced252521aee06e633d50ec48c807683f21cc1d89e193eb0", size = 278446, upload-time = "2025-01-27T18:04:15.877Z" }, + { url = "https://files.pythonhosted.org/packages/fe/96/cc5975fd23d2197b3e29f650a7a9beddce8993eaf934fa4ac595b77bb71f/fastuuid-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:0df2ea4c9db96fd8f4fa38d0e88e309b3e56f8fd03675a2f6958a5b082a0c1e4", size = 157185, upload-time = "2025-01-27T18:06:19.21Z" }, + { url = "https://files.pythonhosted.org/packages/a9/e8/d2bb4f19e5ee15f6f8e3192a54a897678314151aa17d0fb766d2c2cbc03d/fastuuid-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7fe2407316a04ee8f06d3dbc7eae396d0a86591d92bafe2ca32fce23b1145786", size = 247512, upload-time = "2025-01-27T18:04:08.115Z" }, + { url = "https://files.pythonhosted.org/packages/bc/53/25e811d92fd60f5c65e098c3b68bd8f1a35e4abb6b77a153025115b680de/fastuuid-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9b31dd488d0778c36f8279b306dc92a42f16904cba54acca71e107d65b60b0c", size = 258257, upload-time = "2025-01-27T18:03:56.408Z" }, + { url = "https://files.pythonhosted.org/packages/10/23/73618e7793ea0b619caae2accd9e93e60da38dd78dd425002d319152ef2f/fastuuid-0.12.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:b19361ee649365eefc717ec08005972d3d1eb9ee39908022d98e3bfa9da59e37", size = 278559, upload-time = "2025-01-27T18:03:58.661Z" }, + { url = "https://files.pythonhosted.org/packages/e4/41/6317ecfc4757d5f2a604e5d3993f353ba7aee85fa75ad8b86fce6fc2fa40/fastuuid-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:8fc66b11423e6f3e1937385f655bedd67aebe56a3dcec0cb835351cfe7d358c9", size = 157276, upload-time = "2025-01-27T18:06:39.245Z" }, +] + [[package]] name = "filelock" version = "3.19.1" @@ -2058,6 +2082,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/60/d497a310bde3f01cb805196ac61b7ad6dc5dcf8dce66634dc34364b20b4f/lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc", size = 12097, upload-time = "2024-04-05T13:03:10.514Z" }, ] +[[package]] +name = "litellm" +version = "1.76.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "click" }, + { name = "fastuuid" }, + { name = "httpx" }, + { name = "importlib-metadata" }, + { name = "jinja2" }, + { name = "jsonschema" }, + { name = "openai" }, + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "tiktoken" }, + { name = "tokenizers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/46/57b6539365616452bb6f4401487448ce62e62755738fce55d8222d7a557e/litellm-1.76.3.tar.gz", hash = "sha256:fc81219c59b17b26cc81276ce32582f3715612877ab11c1ea2c26e4853ac67e8", size = 10210403, upload-time = "2025-09-07T01:59:19.55Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/d9/5f8ed27241b487f51f04573b8ba06d4460ebed9f792ff5cc148649fbf862/litellm-1.76.3-py3-none-any.whl", hash = "sha256:d62e3ff2a80ec5e551c6d7a0fe199ffe718ecb6cbaa43fc9250dd8d7c0944352", size = 9000797, upload-time = "2025-09-07T01:59:16.261Z" }, +] + [[package]] name = "lomond" version = "0.3.3" @@ -2290,6 +2337,7 @@ all = [ { name = "datasets" }, { name = "docling" }, { name = "ibm-watsonx-ai" }, + { name = "litellm" }, { name = "outlines" }, { name = "peft" }, { name = "transformers" }, @@ -2307,6 +2355,9 @@ hf = [ { name = "transformers" }, { name = "trl" }, ] +litellm = [ + { name = "litellm" }, +] watsonx = [ { name = "ibm-watsonx-ai" }, ] @@ -2348,7 +2399,8 @@ requires-dist = [ { name = "ibm-watsonx-ai", marker = "extra == 'watsonx'", specifier = ">=1.3.31" }, { name = "jinja2" }, { name = "json5" }, - { name = "mellea", extras = ["watsonx", "docling", "hf"], marker = "extra == 'all'" }, + { name = "litellm", marker = "extra == 'litellm'", specifier = ">=1.76" }, + { name = "mellea", extras = ["watsonx", "docling", "hf", "litellm"], marker = "extra == 'all'" }, { name = "mistletoe", specifier = ">=1.4.0" }, { name = "ollama", specifier = ">=0.5.1" }, { name = "openai" }, @@ -2363,7 +2415,7 @@ requires-dist = [ { name = "types-tqdm" }, { name = "uvicorn" }, ] -provides-extras = ["hf", "watsonx", "docling", "all"] +provides-extras = ["hf", "litellm", "watsonx", "docling", "all"] [package.metadata.requires-dev] dev = [ @@ -3009,7 +3061,7 @@ name = "nvidia-cudnn-cu12" version = "9.10.2.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, @@ -3020,7 +3072,7 @@ name = "nvidia-cufft-cu12" version = "11.3.3.83" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, @@ -3047,9 +3099,9 @@ name = "nvidia-cusolver-cu12" version = "11.7.3.90" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, @@ -3060,7 +3112,7 @@ name = "nvidia-cusparse-cu12" version = "12.5.8.93" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, @@ -5386,6 +5438,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/56/b3/23eec760215910609914dd99aba23ce1c72a3bcbe046ee44f45adf740452/tifffile-2025.8.28-py3-none-any.whl", hash = "sha256:b274a6d9eeba65177cf7320af25ef38ecf910b3369ac6bc494a94a3f6bd99c78", size = 231049, upload-time = "2025-08-27T19:47:33.909Z" }, ] +[[package]] +name = "tiktoken" +version = "0.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/86/ad0155a37c4f310935d5ac0b1ccf9bdb635dcb906e0a9a26b616dd55825a/tiktoken-0.11.0.tar.gz", hash = "sha256:3c518641aee1c52247c2b97e74d8d07d780092af79d5911a6ab5e79359d9b06a", size = 37648, upload-time = "2025-08-08T23:58:08.495Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/4d/c6a2e7dca2b4f2e9e0bfd62b3fe4f114322e2c028cfba905a72bc76ce479/tiktoken-0.11.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:8a9b517d6331d7103f8bef29ef93b3cca95fa766e293147fe7bacddf310d5917", size = 1059937, upload-time = "2025-08-08T23:57:28.57Z" }, + { url = "https://files.pythonhosted.org/packages/41/54/3739d35b9f94cb8dc7b0db2edca7192d5571606aa2369a664fa27e811804/tiktoken-0.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b4ddb1849e6bf0afa6cc1c5d809fb980ca240a5fffe585a04e119519758788c0", size = 999230, upload-time = "2025-08-08T23:57:30.241Z" }, + { url = "https://files.pythonhosted.org/packages/dd/f4/ec8d43338d28d53513004ebf4cd83732a135d11011433c58bf045890cc10/tiktoken-0.11.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10331d08b5ecf7a780b4fe4d0281328b23ab22cdb4ff65e68d56caeda9940ecc", size = 1130076, upload-time = "2025-08-08T23:57:31.706Z" }, + { url = "https://files.pythonhosted.org/packages/94/80/fb0ada0a882cb453caf519a4bf0d117c2a3ee2e852c88775abff5413c176/tiktoken-0.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b062c82300341dc87e0258c69f79bed725f87e753c21887aea90d272816be882", size = 1183942, upload-time = "2025-08-08T23:57:33.142Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e9/6c104355b463601719582823f3ea658bc3aa7c73d1b3b7553ebdc48468ce/tiktoken-0.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:195d84bec46169af3b1349a1495c151d37a0ff4cba73fd08282736be7f92cc6c", size = 1244705, upload-time = "2025-08-08T23:57:34.594Z" }, + { url = "https://files.pythonhosted.org/packages/94/75/eaa6068f47e8b3f0aab9e05177cce2cf5aa2cc0ca93981792e620d4d4117/tiktoken-0.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe91581b0ecdd8783ce8cb6e3178f2260a3912e8724d2f2d49552b98714641a1", size = 884152, upload-time = "2025-08-08T23:57:36.18Z" }, + { url = "https://files.pythonhosted.org/packages/8a/91/912b459799a025d2842566fe1e902f7f50d54a1ce8a0f236ab36b5bd5846/tiktoken-0.11.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4ae374c46afadad0f501046db3da1b36cd4dfbfa52af23c998773682446097cf", size = 1059743, upload-time = "2025-08-08T23:57:37.516Z" }, + { url = "https://files.pythonhosted.org/packages/8c/e9/6faa6870489ce64f5f75dcf91512bf35af5864583aee8fcb0dcb593121f5/tiktoken-0.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25a512ff25dc6c85b58f5dd4f3d8c674dc05f96b02d66cdacf628d26a4e4866b", size = 999334, upload-time = "2025-08-08T23:57:38.595Z" }, + { url = "https://files.pythonhosted.org/packages/a1/3e/a05d1547cf7db9dc75d1461cfa7b556a3b48e0516ec29dfc81d984a145f6/tiktoken-0.11.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2130127471e293d385179c1f3f9cd445070c0772be73cdafb7cec9a3684c0458", size = 1129402, upload-time = "2025-08-08T23:57:39.627Z" }, + { url = "https://files.pythonhosted.org/packages/34/9a/db7a86b829e05a01fd4daa492086f708e0a8b53952e1dbc9d380d2b03677/tiktoken-0.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21e43022bf2c33f733ea9b54f6a3f6b4354b909f5a73388fb1b9347ca54a069c", size = 1184046, upload-time = "2025-08-08T23:57:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/9d/bb/52edc8e078cf062ed749248f1454e9e5cfd09979baadb830b3940e522015/tiktoken-0.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:adb4e308eb64380dc70fa30493e21c93475eaa11669dea313b6bbf8210bfd013", size = 1244691, upload-time = "2025-08-08T23:57:42.251Z" }, + { url = "https://files.pythonhosted.org/packages/60/d9/884b6cd7ae2570ecdcaffa02b528522b18fef1cbbfdbcaa73799807d0d3b/tiktoken-0.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:ece6b76bfeeb61a125c44bbefdfccc279b5288e6007fbedc0d32bfec602df2f2", size = 884392, upload-time = "2025-08-08T23:57:43.628Z" }, + { url = "https://files.pythonhosted.org/packages/e7/9e/eceddeffc169fc75fe0fd4f38471309f11cb1906f9b8aa39be4f5817df65/tiktoken-0.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fd9e6b23e860973cf9526544e220b223c60badf5b62e80a33509d6d40e6c8f5d", size = 1055199, upload-time = "2025-08-08T23:57:45.076Z" }, + { url = "https://files.pythonhosted.org/packages/4f/cf/5f02bfefffdc6b54e5094d2897bc80efd43050e5b09b576fd85936ee54bf/tiktoken-0.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6a76d53cee2da71ee2731c9caa747398762bda19d7f92665e882fef229cb0b5b", size = 996655, upload-time = "2025-08-08T23:57:46.304Z" }, + { url = "https://files.pythonhosted.org/packages/65/8e/c769b45ef379bc360c9978c4f6914c79fd432400a6733a8afc7ed7b0726a/tiktoken-0.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ef72aab3ea240646e642413cb363b73869fed4e604dcfd69eec63dc54d603e8", size = 1128867, upload-time = "2025-08-08T23:57:47.438Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2d/4d77f6feb9292bfdd23d5813e442b3bba883f42d0ac78ef5fdc56873f756/tiktoken-0.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f929255c705efec7a28bf515e29dc74220b2f07544a8c81b8d69e8efc4578bd", size = 1183308, upload-time = "2025-08-08T23:57:48.566Z" }, + { url = "https://files.pythonhosted.org/packages/7a/65/7ff0a65d3bb0fc5a1fb6cc71b03e0f6e71a68c5eea230d1ff1ba3fd6df49/tiktoken-0.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:61f1d15822e4404953d499fd1dcc62817a12ae9fb1e4898033ec8fe3915fdf8e", size = 1244301, upload-time = "2025-08-08T23:57:49.642Z" }, + { url = "https://files.pythonhosted.org/packages/f5/6e/5b71578799b72e5bdcef206a214c3ce860d999d579a3b56e74a6c8989ee2/tiktoken-0.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:45927a71ab6643dfd3ef57d515a5db3d199137adf551f66453be098502838b0f", size = 884282, upload-time = "2025-08-08T23:57:50.759Z" }, + { url = "https://files.pythonhosted.org/packages/cc/cd/a9034bcee638716d9310443818d73c6387a6a96db93cbcb0819b77f5b206/tiktoken-0.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a5f3f25ffb152ee7fec78e90a5e5ea5b03b4ea240beed03305615847f7a6ace2", size = 1055339, upload-time = "2025-08-08T23:57:51.802Z" }, + { url = "https://files.pythonhosted.org/packages/f1/91/9922b345f611b4e92581f234e64e9661e1c524875c8eadd513c4b2088472/tiktoken-0.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7dc6e9ad16a2a75b4c4be7208055a1f707c9510541d94d9cc31f7fbdc8db41d8", size = 997080, upload-time = "2025-08-08T23:57:53.442Z" }, + { url = "https://files.pythonhosted.org/packages/d0/9d/49cd047c71336bc4b4af460ac213ec1c457da67712bde59b892e84f1859f/tiktoken-0.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a0517634d67a8a48fd4a4ad73930c3022629a85a217d256a6e9b8b47439d1e4", size = 1128501, upload-time = "2025-08-08T23:57:54.808Z" }, + { url = "https://files.pythonhosted.org/packages/52/d5/a0dcdb40dd2ea357e83cb36258967f0ae96f5dd40c722d6e382ceee6bba9/tiktoken-0.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fb4effe60574675118b73c6fbfd3b5868e5d7a1f570d6cc0d18724b09ecf318", size = 1182743, upload-time = "2025-08-08T23:57:56.307Z" }, + { url = "https://files.pythonhosted.org/packages/3b/17/a0fc51aefb66b7b5261ca1314afa83df0106b033f783f9a7bcbe8e741494/tiktoken-0.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94f984c9831fd32688aef4348803b0905d4ae9c432303087bae370dc1381a2b8", size = 1244057, upload-time = "2025-08-08T23:57:57.628Z" }, + { url = "https://files.pythonhosted.org/packages/50/79/bcf350609f3a10f09fe4fc207f132085e497fdd3612f3925ab24d86a0ca0/tiktoken-0.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:2177ffda31dec4023356a441793fed82f7af5291120751dee4d696414f54db0c", size = 883901, upload-time = "2025-08-08T23:57:59.359Z" }, +] + [[package]] name = "tinycss2" version = "1.4.0" @@ -5623,7 +5711,7 @@ name = "triton" version = "3.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "setuptools" }, + { name = "setuptools", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" },