diff --git a/examples/core.py b/examples/core.py index 1eab6c81..71fb081c 100644 --- a/examples/core.py +++ b/examples/core.py @@ -12,12 +12,11 @@ def run_provider(provider, model, api_key=None, **kwargs): print(f"\n\n###RUNNING for <{provider}>, <{model}> ###") llm = LLMCore(provider=provider, api_key=api_key, **kwargs) - latencies = {} - + latencies = {} print("\nAsync Non-Stream") - chat_request = build_chat_request(model, chat_input="Hello, my name is Jason Json", is_stream=False) + chat_request = build_chat_request(model, chat_input="Hello, my name is Jason", is_stream=False) string = """ -What is Lorem Ipsum? json +What is Lorem Ipsum? Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. Why do we use it? @@ -27,7 +26,7 @@ def run_provider(provider, model, api_key=None, **kwargs): Where does it come from? Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32. -What is Lorem Ipsum? json +What is Lorem Ipsum? Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. Why do we use it? @@ -37,7 +36,7 @@ def run_provider(provider, model, api_key=None, **kwargs): Where does it come from? Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32. -What is Lorem Ipsum? json +What is Lorem Ipsum? Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. Why do we use it? @@ -50,7 +49,6 @@ def run_provider(provider, model, api_key=None, **kwargs): """ #chat_request = build_chat_request(model, chat_input=string, is_stream=False) - response_async = asyncio.run(llm.achat(**chat_request)) pprint(response_async) latencies["async (ms)"]= response_async.metrics["latency_s"]*1000 @@ -58,7 +56,7 @@ def run_provider(provider, model, api_key=None, **kwargs): print("\nAsync Stream") async def async_stream(): - chat_request = build_chat_request(model, chat_input="Hello, my name is Tom Json", is_stream=True) + chat_request = build_chat_request(model, chat_input="Hello, my name is Tom", is_stream=True) response_async = await llm.achat(**chat_request) async for p in response_async: @@ -74,7 +72,7 @@ async def async_stream(): print("\nSync Non-Stream") - chat_request = build_chat_request(model, chat_input="Hello, my name is Alice Json", is_stream=False) + chat_request = build_chat_request(model, chat_input="Hello, my name is Alice", is_stream=False) response_sync = llm.chat(**chat_request) pprint(response_sync) @@ -82,7 +80,8 @@ async def async_stream(): print("\nSync Stream") - chat_request = build_chat_request(model, chat_input="Hello, my name is Mary Json", is_stream=True) + chat_request = build_chat_request(model, chat_input="Hello, my name is Mary", is_stream=True) + response_sync_stream = llm.chat(**chat_request) for p in response_sync_stream: @@ -126,7 +125,6 @@ def build_chat_request(model: str, chat_input: str, is_stream: bool, max_tokens: "parameters": { "temperature": 0, "max_tokens": max_tokens, - "response_format": {"type": "json_object"}, "functions": None, } } @@ -137,30 +135,83 @@ def multiple_provider_runs(provider:str, model:str, num_runs:int, api_key:str, * for _ in range(num_runs): latencies = run_provider(provider=provider, model=model, api_key=api_key, **kwargs) pprint(latencies) - - -# OpenAI -multiple_provider_runs(provider="openai", model="gpt-4o-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) -multiple_provider_runs(provider="openai", model="o3-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) -#multiple_provider_runs(provider="openai", model="o1-preview", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + +def run_chat_all_providers(): + # OpenAI + multiple_provider_runs(provider="openai", model="gpt-4o-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + multiple_provider_runs(provider="openai", model="o3-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + #multiple_provider_runs(provider="openai", model="o1-preview", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + + # Azure + multiple_provider_runs(provider="azure", model="gpt-4o-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="gpt-4o", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + + # Azure + multiple_provider_runs(provider="azure", model="gpt-4o-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="gpt-4o", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + -# Azure -multiple_provider_runs(provider="azure", model="gpt-4o-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -#multiple_provider_runs(provider="azure", model="gpt-4o", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -#multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -#multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="anthropic", model="claude-3-opus-20240229", num_runs=1, api_key=os.environ["ANTHROPIC_API_KEY"]) + #multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -#multiple_provider_runs(provider="anthropic", model="claude-3-opus-20240229", num_runs=1, api_key=os.environ["ANTHROPIC_API_KEY"]) -#multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -#multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + multiple_provider_runs(provider="vertexai", model="gemini-1.5-flash", num_runs=1, api_key=os.environ["GOOGLE_API_KEY"]) + # Bedrock + multiple_provider_runs(provider="bedrock", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) + #multiple_provider_runs(provider="bedrock", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) -multiple_provider_runs(provider="vertexai", model="gemini-1.5-flash", num_runs=1, api_key=os.environ["GOOGLE_API_KEY"]) +run_chat_all_providers() -# Bedrock -multiple_provider_runs(provider="bedrock", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) -#multiple_provider_runs(provider="bedrock", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) + +import base64 + +def messages(img_path): + """ + Creates a message payload with both text and image. + Adapts format based on the provider. + """ + with open(img_path, "rb") as f: + image_bytes = f.read() + + base64_image = base64.b64encode(image_bytes).decode("utf-8") + return [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, + }, + { + "type": "image_url", + "image_url": {"url": "https://awsmp-logos.s3.amazonaws.com/seller-zx4pk43qpmxoa/53d235806f343cec94aac3c577d81c13.png"}, + }, + ], + } + ] + +def run_send_imgs(): + provider="bedrock" + model="us.amazon.nova-lite-v1:0" + chat_input=messages(img_path="./libs/llmstudio/tests/integration_tests/test_data/llmstudio-logo.jpeg") + chat_request = build_chat_request(model=model, chat_input=chat_input, is_stream=False) + llm = LLMCore(provider=provider, api_key=os.environ["OPENAI_API_KEY"], region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) + response_sync = llm.chat(**chat_request) + #print(response_sync) + response_sync.clean_print() + + #for p in response_sync: + # if p.metrics: + # p.clean_print() + +run_send_imgs() diff --git a/libs/core/llmstudio_core/providers/bedrock_converse.py b/libs/core/llmstudio_core/providers/bedrock_converse.py index dc756e0a..abc990f7 100644 --- a/libs/core/llmstudio_core/providers/bedrock_converse.py +++ b/libs/core/llmstudio_core/providers/bedrock_converse.py @@ -1,5 +1,7 @@ +import base64 import json import os +import re import time import uuid from typing import ( @@ -14,6 +16,7 @@ ) import boto3 +import requests from llmstudio_core.exceptions import ProviderError from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider from llmstudio_core.utils import OpenAIToolFunction @@ -276,6 +279,34 @@ def _process_messages( } ) messages.append(tool_use) + elif isinstance(message.get("content"), list): + converse_content_list = [] + for content in message.get("content"): + converse_content = {} + if content.get("type") == "text": + converse_content["text"] = content.get("text") + elif content.get("type") == "image_url": + image_url = content.get("image_url")["url"] + bytes_image = BedrockConverseProvider._get_image_bytes( + image_url + ) + format = ( + BedrockConverseProvider._get_img_format_from_bytes( + bytes_image + ) + ) + converse_content["image"] = { + "format": format, + "source": {"bytes": bytes_image}, + } + converse_content_list.append(converse_content) + + messages.append( + { + "role": message.get("role"), + "content": converse_content_list, + } + ) else: messages.append( { @@ -303,6 +334,62 @@ def _process_messages( return messages, system_prompt + @staticmethod + def _base64_to_bytes(image_url: str) -> bytes: + """ + Extracts and decodes Base64 image data from a 'data:image/...;base64,...' URL. + Returns the raw image bytes. + """ + if not image_url.startswith("data:image/"): + raise ValueError("Invalid Base64 image URL") + + base64_data = re.sub(r"^data:image/[^;]+;base64,", "", image_url) + + return base64.b64decode(base64_data) + + @staticmethod + def _get_img_format_from_bytes(image_bytes: bytes) -> str: + """ + Determines the image format from raw image bytes using file signatures (magic numbers). + """ + if image_bytes.startswith(b"\xFF\xD8\xFF"): + return "jpeg" + elif image_bytes.startswith(b"\x89PNG\r\n\x1A\n"): + return "png" + elif image_bytes.startswith(b"GIF87a") or image_bytes.startswith(b"GIF89a"): + return "gif" + elif ( + image_bytes.startswith(b"\x52\x49\x46\x46") and image_bytes[8:12] == b"WEBP" + ): + return "webp" + elif image_bytes.startswith(b"\x49\x49\x2A\x00") or image_bytes.startswith( + b"\x4D\x4D\x00\x2A" + ): + return "tiff" + else: + raise ValueError("Unknown image format") + + @staticmethod + def _get_image_bytes(image_url: str) -> bytes: + """ + Converts an image URL to a Base64-encoded string. + - If already in 'data:image/...;base64,...' format, it returns as-is. + - If it's a normal URL, downloads and encodes the image in Base64. + """ + if image_url.startswith("data:image/"): + return BedrockConverseProvider._base64_to_bytes(image_url) + + elif image_url.startswith(("http://", "https://")): + response = requests.get(image_url) + if response.status_code != 200: + raise ValueError(f"Failed to download image: {response.status_code}") + + image_bytes = response.content + return image_bytes + + else: + raise ValueError("Invalid image URL format") + @staticmethod def _process_tools(parameters: dict) -> Optional[Dict]: if parameters.get("tools") is None and parameters.get("functions") is None: diff --git a/libs/core/llmstudio_core/providers/data_structures.py b/libs/core/llmstudio_core/providers/data_structures.py index 374ad6eb..85c9482e 100644 --- a/libs/core/llmstudio_core/providers/data_structures.py +++ b/libs/core/llmstudio_core/providers/data_structures.py @@ -1,4 +1,5 @@ -from typing import Any, List, Optional +import copy +from typing import Any, List, Optional, Union from openai.types.chat import ChatCompletion, ChatCompletionChunk from pydantic import BaseModel @@ -90,8 +91,68 @@ def items(self): return self.model_dump().items() -class ChatCompletionLLMstudio(ChatCompletion): - chat_input: str +class ChatCompletionLLMstudioBase: + """ + Base class to share the methods between different ChatCompletionLLMstudio classes. + """ + + def clean_print(self): + """ + Custom representation of the class to prevent large fields from bloating the output. + Ensures missing fields are handled gracefully without errors. + """ + data = copy.deepcopy(self.model_dump()) + + def clean_large_fields(d): + """ + Recursively traverses the dictionary to replace large image Base64 data + with a placeholder while ensuring missing fields do not cause errors. + """ + for key, value in d.items(): + if isinstance(value, list): + for item in value: + if isinstance(item, dict): + # Handle image_url directly under chat_input or context + if "image_url" in item and isinstance( + item["image_url"], dict + ): + if "url" in item["image_url"] and isinstance( + item["image_url"]["url"], str + ): + if item["image_url"]["url"].startswith( + "data:image/" + ): + item["image_url"][ + "url" + ] = "