From 605f1f0a243bd4e9fc1e77a9de69e2cf7a3219c8 Mon Sep 17 00:00:00 2001 From: fm1320 Date: Thu, 16 Jan 2025 22:29:52 +0000 Subject: [PATCH 1/5] [WIP] Azure Client integration --- .../components/model_client/azure_client.py | 273 ++++++++++++++++++ tutorials/azure_example.py | 187 ++++++++++++ 2 files changed, 460 insertions(+) create mode 100644 adalflow/adalflow/components/model_client/azure_client.py create mode 100644 tutorials/azure_example.py diff --git a/adalflow/adalflow/components/model_client/azure_client.py b/adalflow/adalflow/components/model_client/azure_client.py new file mode 100644 index 000000000..7bbfe65dc --- /dev/null +++ b/adalflow/adalflow/components/model_client/azure_client.py @@ -0,0 +1,273 @@ +"""Azure OpenAI ModelClient integration.""" + +import os +from typing import Dict, Optional, Any, Callable, Literal +import backoff +import logging + +from adalflow.core.model_client import ModelClient +from adalflow.core.types import ModelType, CompletionUsage, GeneratorOutput + +# optional import +from adalflow.utils.lazy_import import safe_import, OptionalPackages + +openai = safe_import(OptionalPackages.OPENAI.value[0], OptionalPackages.OPENAI.value[1]) + +from openai import AzureOpenAI, AsyncAzureOpenAI, Stream +from openai import ( + APITimeoutError, + InternalServerError, + RateLimitError, + UnprocessableEntityError, + BadRequestError, +) +from openai.types import ( + Completion, + CreateEmbeddingResponse, +) +from openai.types.chat import ChatCompletionChunk, ChatCompletion +from adalflow.components.model_client.utils import parse_embedding_response + +log = logging.getLogger(__name__) + +def get_first_message_content(completion: ChatCompletion) -> str: + """When we only need the content of the first message. + It is the default parser for chat completion.""" + return completion.choices[0].message.content + +def parse_stream_response(completion: ChatCompletionChunk) -> str: + """Parse the response of the stream API.""" + return completion.choices[0].delta.content + +def handle_streaming_response(generator: Stream[ChatCompletionChunk]): + """Handle the streaming response.""" + for completion in generator: + log.debug(f"Raw chunk completion: {completion}") + parsed_content = parse_stream_response(completion) + yield parsed_content + +class AzureClient(ModelClient): + """A component wrapper for the Azure OpenAI API client. + + This client supports both chat completion and embedding APIs through Azure OpenAI. + It can be used with both sync and async operations. + + Args: + api_key (Optional[str]): Azure OpenAI API key + api_version (Optional[str]): API version to use + azure_endpoint (Optional[str]): Azure OpenAI endpoint URL (e.g., https://.openai.azure.com/) + base_url (Optional[str]): Alternative base URL format (e.g., https://..models.ai.azure.com) + chat_completion_parser (Optional[Callable]): Function to parse chat completions + input_type (Literal["text", "messages"]): Format for input + + Environment Variables: + AZURE_OPENAI_API_KEY: API key + AZURE_OPENAI_ENDPOINT: Endpoint URL (new format) + AZURE_BASE_URL: Base URL (alternative format) + AZURE_OPENAI_VERSION: API version + + Example: + >>> from adalflow.components.model_client import AzureClient + >>> client = AzureClient() + >>> generator = Generator( + ... model_client=client, + ... model_kwargs={ + ... "model": "gpt-4", + ... "temperature": 0.7 + ... } + ... ) + >>> response = generator({"input_str": "What is the capital of France?"}) + """ + + def __init__( + self, + api_key: Optional[str] = None, + api_version: Optional[str] = None, + azure_endpoint: Optional[str] = None, + base_url: Optional[str] = None, + chat_completion_parser: Callable[[Completion], Any] = None, + input_type: Literal["text", "messages"] = "text", + ): + super().__init__() + self._api_key = api_key + self._api_version = api_version + self._azure_endpoint = azure_endpoint + self._base_url = base_url + self.sync_client = self.init_sync_client() + self.async_client = None + self.chat_completion_parser = chat_completion_parser or get_first_message_content + self._input_type = input_type + + def _get_endpoint(self) -> str: + """Get the appropriate endpoint URL based on available configuration.""" + # First try the new format endpoint + endpoint = self._azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT") + if endpoint: + return endpoint + + # Then try the alternative base URL format + base_url = self._base_url or os.getenv("AZURE_BASE_URL") + if base_url: + # If base_url is provided in the format https://..models.ai.azure.com + # we need to extract the model and region + if "models.ai.azure.com" in base_url: + return base_url.rstrip("/") + # If it's just the model name, construct the full URL + return f"https://{base_url}.openai.azure.com" + + raise ValueError( + "Either AZURE_OPENAI_ENDPOINT or AZURE_BASE_URL must be set. " + "Check your deployment page for a URL like: " + "https://.openai.azure.com/ or " + "https://..models.ai.azure.com" + ) + + def init_sync_client(self): + api_key = self._api_key or os.getenv("AZURE_OPENAI_API_KEY") + api_version = self._api_version or os.getenv("AZURE_OPENAI_VERSION") + + if not api_key: + raise ValueError("Environment variable AZURE_OPENAI_API_KEY must be set") + if not api_version: + raise ValueError("Environment variable AZURE_OPENAI_VERSION must be set") + + endpoint = self._get_endpoint() + + return AzureOpenAI( + api_key=api_key, + api_version=api_version, + azure_endpoint=endpoint + ) + + def init_async_client(self): + api_key = self._api_key or os.getenv("AZURE_OPENAI_API_KEY") + api_version = self._api_version or os.getenv("AZURE_OPENAI_VERSION") + + if not api_key: + raise ValueError("Environment variable AZURE_OPENAI_API_KEY must be set") + if not api_version: + raise ValueError("Environment variable AZURE_OPENAI_VERSION must be set") + + endpoint = self._get_endpoint() + + return AsyncAzureOpenAI( + api_key=api_key, + api_version=api_version, + azure_endpoint=endpoint + ) + + def convert_inputs_to_api_kwargs( + self, + input: Optional[Any] = None, + model_kwargs: Dict = {}, + model_type: ModelType = ModelType.UNDEFINED, + ) -> Dict: + """Convert inputs to Azure OpenAI API kwargs format.""" + final_model_kwargs = model_kwargs.copy() + + if model_type == ModelType.EMBEDDER: + if isinstance(input, str): + input = [input] + assert isinstance(input, (list, tuple)), "input must be a sequence of text" + final_model_kwargs["input"] = input + elif model_type == ModelType.LLM: + messages = [] + if input is not None and input != "": + if self._input_type == "text": + messages.append({"role": "system", "content": input}) + else: + messages.extend(input) + final_model_kwargs["messages"] = messages + else: + raise ValueError(f"model_type {model_type} is not supported") + + # Ensure model is specified + if "model" not in final_model_kwargs: + raise ValueError("model must be specified") + + return final_model_kwargs + + def parse_chat_completion(self, completion: ChatCompletion) -> GeneratorOutput: + """Parse chat completion response.""" + log.debug(f"completion: {completion}") + try: + data = self.chat_completion_parser(completion) + usage = self.track_completion_usage(completion) + return GeneratorOutput(data=None, usage=usage, raw_response=data) + except Exception as e: + log.error(f"Error parsing completion: {e}") + return GeneratorOutput( + data=None, error=str(e), raw_response=str(completion) + ) + + def track_completion_usage(self, completion: ChatCompletion) -> CompletionUsage: + """Track completion token usage.""" + usage = completion.usage + return CompletionUsage( + completion_tokens=usage.completion_tokens, + prompt_tokens=usage.prompt_tokens, + total_tokens=usage.total_tokens, + ) + + @backoff.on_exception( + backoff.expo, + ( + APITimeoutError, + InternalServerError, + RateLimitError, + UnprocessableEntityError, + BadRequestError, + ), + max_time=5, + ) + def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED): + """Make a synchronous call to Azure OpenAI API.""" + log.info(f"api_kwargs: {api_kwargs}") + if model_type == ModelType.EMBEDDER: + return self.sync_client.embeddings.create(**api_kwargs) + elif model_type == ModelType.LLM: + if "stream" in api_kwargs and api_kwargs.get("stream", False): + log.debug("streaming call") + self.chat_completion_parser = handle_streaming_response + return self.sync_client.chat.completions.create(**api_kwargs) + return self.sync_client.chat.completions.create(**api_kwargs) + else: + raise ValueError(f"model_type {model_type} is not supported") + + @backoff.on_exception( + backoff.expo, + ( + APITimeoutError, + InternalServerError, + RateLimitError, + UnprocessableEntityError, + BadRequestError, + ), + max_time=5, + ) + async def acall( + self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED + ): + """Make an asynchronous call to Azure OpenAI API.""" + if self.async_client is None: + self.async_client = self.init_async_client() + if model_type == ModelType.EMBEDDER: + return await self.async_client.embeddings.create(**api_kwargs) + elif model_type == ModelType.LLM: + return await self.async_client.chat.completions.create(**api_kwargs) + else: + raise ValueError(f"model_type {model_type} is not supported") + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'AzureClient': + """Create an instance from a dictionary.""" + obj = super().from_dict(data) + obj.sync_client = obj.init_sync_client() + obj.async_client = obj.init_async_client() + return obj + + def to_dict(self) -> Dict[str, Any]: + """Convert the instance to a dictionary.""" + exclude = ["sync_client", "async_client"] + output = super().to_dict(exclude=exclude) + return output \ No newline at end of file diff --git a/tutorials/azure_example.py b/tutorials/azure_example.py new file mode 100644 index 000000000..030cf634f --- /dev/null +++ b/tutorials/azure_example.py @@ -0,0 +1,187 @@ +"""Example script demonstrating Azure OpenAI client usage in AdalFlow.""" + +import os +import asyncio +from adalflow.components.model_client import AzureClient +from adalflow.core.generator import Generator +from adalflow.core.types import ModelType + +# Demo configuration - Replace these with your actual values +DEMO_CONFIG = { + "api_key": "your-api-key", # From Azure Portal > Keys and Endpoint + "azure_endpoint": "https://your-resource.openai.azure.com/", # Your Azure OpenAI endpoint + "api_version": "2024-02-15-preview", # Current API version + "deployment_name": "gpt-35-turbo" # Your model deployment name +} + +def setup_environment(): + """Setup environment variables if not already set.""" + if not os.getenv("AZURE_OPENAI_API_KEY"): + print("Setting up demo environment variables...") + os.environ["AZURE_OPENAI_API_KEY"] = DEMO_CONFIG["api_key"] + os.environ["AZURE_OPENAI_ENDPOINT"] = DEMO_CONFIG["azure_endpoint"] + os.environ["AZURE_OPENAI_VERSION"] = DEMO_CONFIG["api_version"] + else: + print("Using existing environment variables...") + +def test_chat_completion(): + """Test chat completion with Azure OpenAI.""" + print("\nTesting chat completion...") + client = AzureClient() + generator = Generator( + model_client=client, + model_kwargs={ + "model": DEMO_CONFIG["deployment_name"], + "temperature": 0.7, + }, + model_type=ModelType.LLM + ) + + # Single turn conversation + response = generator("What is the capital of France?") + print("\nChat Completion Response:") + print(f"Content: {response.raw_response}") + print(f"Usage: {response.usage}") + print(f"Error: {response.error}") + + # Multi-turn conversation + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the capital of France?"}, + {"role": "assistant", "content": "The capital of France is Paris."}, + {"role": "user", "content": "What is its population?"} + ] + + client = AzureClient(input_type="messages") + generator = Generator( + model_client=client, + model_kwargs={ + "model": DEMO_CONFIG["deployment_name"], + "temperature": 0.7, + }, + model_type=ModelType.LLM + ) + + response = generator(messages) + print("\nMulti-turn Conversation Response:") + print(f"Content: {response.raw_response}") + print(f"Usage: {response.usage}") + print(f"Error: {response.error}") + +def test_embeddings(): + """Test embeddings with Azure OpenAI.""" + print("\nTesting embeddings...") + client = AzureClient() + generator = Generator( + model_client=client, + model_kwargs={ + "model": "text-embedding-ada-002", # Standard embedding model name + }, + model_type=ModelType.EMBEDDER + ) + + # Single text embedding + response = generator("Hello, world!") + print("\nSingle Text Embedding Response:") + print(f"Embedding shape: {len(response.raw_response)}") + print(f"Usage: {response.usage}") + print(f"Error: {response.error}") + + # Multiple text embeddings + texts = ["Hello, world!", "How are you?", "Nice to meet you!"] + response = generator(texts) + print("\nMultiple Text Embeddings Response:") + print(f"Number of embeddings: {len(response.raw_response)}") + print(f"Usage: {response.usage}") + print(f"Error: {response.error}") + +async def test_async_chat(): + """Test async chat completion with Azure OpenAI.""" + print("\nTesting async chat completion...") + client = AzureClient() + generator = Generator( + model_client=client, + model_kwargs={ + "model": DEMO_CONFIG["deployment_name"], + "temperature": 0.7, + }, + model_type=ModelType.LLM + ) + + response = await generator.acall("What is the capital of France?") + print("\nAsync Chat Completion Response:") + print(f"Content: {response.raw_response}") + print(f"Usage: {response.usage}") + print(f"Error: {response.error}") + +def test_streaming(): + """Test streaming chat completion with Azure OpenAI.""" + print("\nTesting streaming chat completion...") + client = AzureClient() + generator = Generator( + model_client=client, + model_kwargs={ + "model": DEMO_CONFIG["deployment_name"], + "temperature": 0.7, + "stream": True + }, + model_type=ModelType.LLM + ) + + print("\nStreaming Chat Completion Response:") + for chunk in generator("Tell me a short story about a cat."): + if chunk.raw_response: + print(chunk.raw_response, end="", flush=True) + print("\n") + +def check_requirements(): + """Check if all required packages are installed.""" + try: + import openai + import azure.identity + import azure.mgmt.cognitiveservices + print("All required packages are installed.") + return True + except ImportError as e: + print(f"Missing required package: {e}") + print("Please install required packages:") + print("poetry add openai azure-identity azure-mgmt-cognitiveservices") + return False + +if __name__ == "__main__": + print("Azure OpenAI Client Test Script") + print("==============================") + + if not check_requirements(): + exit(1) + + setup_environment() + + # Check for required environment variables + required_vars = ["AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_VERSION"] + missing_vars = [var for var in required_vars if not os.getenv(var)] + if missing_vars: + print(f"Missing required environment variables: {', '.join(missing_vars)}") + print("Please set them before running this script.") + print("\nYou can set them in your environment:") + print("export AZURE_OPENAI_API_KEY='your-key'") + print("export AZURE_OPENAI_ENDPOINT='your-endpoint'") + print("export AZURE_OPENAI_VERSION='api-version'") + print("\nOr update the DEMO_CONFIG in this script.") + exit(1) + + print("\nStarting Azure OpenAI tests...") + + try: + # Test synchronous operations + test_chat_completion() + test_embeddings() + test_streaming() + + # Test asynchronous operations + asyncio.run(test_async_chat()) + + print("\nAll tests completed!") + except Exception as e: + print(f"\nError during testing: {e}") + print("\nPlease check your Azure OpenAI setup and credentials.") \ No newline at end of file From fd8097496ab13c9b25c2f66d275b473ba74ea2f1 Mon Sep 17 00:00:00 2001 From: fm1320 Date: Thu, 16 Jan 2025 23:54:00 +0000 Subject: [PATCH 2/5] no subclass for image generation proposal --- .../components/model_client/openai_client.py | 117 +++++++++++++----- adalflow/adalflow/core/generator.py | 15 +++ .../multimodal_client_testing_examples.py | 14 +-- 3 files changed, 104 insertions(+), 42 deletions(-) diff --git a/adalflow/adalflow/components/model_client/openai_client.py b/adalflow/adalflow/components/model_client/openai_client.py index c3750667d..e3b7d01a7 100644 --- a/adalflow/adalflow/components/model_client/openai_client.py +++ b/adalflow/adalflow/components/model_client/openai_client.py @@ -243,7 +243,18 @@ def convert_inputs_to_api_kwargs( - images: Optional image source(s) as path, URL, or list of them - detail: Image detail level ('auto', 'low', or 'high'), defaults to 'auto' - model: The model to use (must support multimodal inputs if images are provided) - model_type: The type of model (EMBEDDER or LLM) + For image generation: + - model: "dall-e-3" or "dall-e-2" + - size: "1024x1024", "1024x1792", or "1792x1024" for DALL-E 3; "256x256", "512x512", or "1024x1024" for DALL-E 2 + - quality: "standard" or "hd" (DALL-E 3 only) + - n: Number of images (1 for DALL-E 3, 1-10 for DALL-E 2) + - response_format: "url" or "b64_json" + For image edits (DALL-E 2 only): + - image: Path to the input image + - mask: Path to the mask image + For variations (DALL-E 2 only): + - image: Path to the input image + model_type: The type of model (EMBEDDER, LLM, or IMAGE_GENERATION) Returns: Dict: API-specific kwargs for the model call @@ -308,20 +319,44 @@ def convert_inputs_to_api_kwargs( # Ensure model is specified if "model" not in final_model_kwargs: raise ValueError("model must be specified for image generation") - # Set defaults for DALL-E 3 if not specified - final_model_kwargs["size"] = final_model_kwargs.get("size", "1024x1024") - final_model_kwargs["quality"] = final_model_kwargs.get("quality", "standard") - final_model_kwargs["n"] = final_model_kwargs.get("n", 1) - final_model_kwargs["response_format"] = final_model_kwargs.get("response_format", "url") - - # Handle image edits and variations - image = final_model_kwargs.get("image") - if isinstance(image, str) and os.path.isfile(image): - final_model_kwargs["image"] = self._encode_image(image) - mask = final_model_kwargs.get("mask") - if isinstance(mask, str) and os.path.isfile(mask): - final_model_kwargs["mask"] = self._encode_image(mask) + # Set defaults for image generation + if "operation" not in final_model_kwargs: + final_model_kwargs["operation"] = "generate" # Default operation + + operation = final_model_kwargs.pop("operation") + + if operation == "generate": + # Set defaults for DALL-E 3 if not specified + final_model_kwargs["size"] = final_model_kwargs.get("size", "1024x1024") + final_model_kwargs["quality"] = final_model_kwargs.get("quality", "standard") + final_model_kwargs["n"] = final_model_kwargs.get("n", 1) + final_model_kwargs["response_format"] = final_model_kwargs.get("response_format", "url") + + elif operation in ["edit", "variation"]: + if "model" not in final_model_kwargs or final_model_kwargs["model"] != "dall-e-2": + raise ValueError(f"{operation} operation is only available with DALL-E 2") + + # Handle image input + image_path = final_model_kwargs.get("image") + if not image_path or not os.path.isfile(image_path): + raise ValueError(f"Valid image path must be provided for {operation}") + final_model_kwargs["image"] = open(image_path, "rb") + + # Handle mask for edit operation + if operation == "edit": + mask_path = final_model_kwargs.get("mask") + if not mask_path or not os.path.isfile(mask_path): + raise ValueError("Valid mask path must be provided for edit operation") + final_model_kwargs["mask"] = open(mask_path, "rb") + + # Set defaults + final_model_kwargs["size"] = final_model_kwargs.get("size", "1024x1024") + final_model_kwargs["n"] = final_model_kwargs.get("n", 1) + final_model_kwargs["response_format"] = final_model_kwargs.get("response_format", "url") + + else: + raise ValueError(f"Invalid operation: {operation}") else: raise ValueError(f"model_type {model_type} is not supported") return final_model_kwargs @@ -371,18 +406,25 @@ def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINE return self.sync_client.chat.completions.create(**api_kwargs) return self.sync_client.chat.completions.create(**api_kwargs) elif model_type == ModelType.IMAGE_GENERATION: - # Determine which image API to call based on the presence of image/mask - if "image" in api_kwargs: - if "mask" in api_kwargs: - # Image edit + operation = api_kwargs.pop("operation", "generate") + + try: + if operation == "generate": + response = self.sync_client.images.generate(**api_kwargs) + elif operation == "edit": response = self.sync_client.images.edit(**api_kwargs) - else: - # Image variation + elif operation == "variation": response = self.sync_client.images.create_variation(**api_kwargs) - else: - # Image generation - response = self.sync_client.images.generate(**api_kwargs) - return response.data + else: + raise ValueError(f"Invalid operation: {operation}") + + return response.data + finally: + # Clean up file handles if they exist + if "image" in api_kwargs and hasattr(api_kwargs["image"], "close"): + api_kwargs["image"].close() + if "mask" in api_kwargs and hasattr(api_kwargs["mask"], "close"): + api_kwargs["mask"].close() else: raise ValueError(f"model_type {model_type} is not supported") @@ -410,18 +452,25 @@ async def acall( elif model_type == ModelType.LLM: return await self.async_client.chat.completions.create(**api_kwargs) elif model_type == ModelType.IMAGE_GENERATION: - # Determine which image API to call based on the presence of image/mask - if "image" in api_kwargs: - if "mask" in api_kwargs: - # Image edit + operation = api_kwargs.pop("operation", "generate") + + try: + if operation == "generate": + response = await self.async_client.images.generate(**api_kwargs) + elif operation == "edit": response = await self.async_client.images.edit(**api_kwargs) - else: - # Image variation + elif operation == "variation": response = await self.async_client.images.create_variation(**api_kwargs) - else: - # Image generation - response = await self.async_client.images.generate(**api_kwargs) - return response.data + else: + raise ValueError(f"Invalid operation: {operation}") + + return response.data + finally: + # Clean up file handles if they exist + if "image" in api_kwargs and hasattr(api_kwargs["image"], "close"): + api_kwargs["image"].close() + if "mask" in api_kwargs and hasattr(api_kwargs["mask"], "close"): + api_kwargs["mask"].close() else: raise ValueError(f"model_type {model_type} is not supported") diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py index baedd8fb7..a1714bbd1 100644 --- a/adalflow/adalflow/core/generator.py +++ b/adalflow/adalflow/core/generator.py @@ -100,6 +100,8 @@ def __init__( # args for the cache cache_path: Optional[str] = None, use_cache: bool = False, + # args for model type + model_type: ModelType = ModelType.LLM, ) -> None: r"""The default prompt is set to the DEFAULT_ADALFLOW_SYSTEM_PROMPT. It has the following variables: - task_desc_str @@ -110,6 +112,17 @@ def __init__( - steps_str You can preset the prompt kwargs to fill in the variables in the prompt using prompt_kwargs. But you can replace the prompt and set any variables you want and use the prompt_kwargs to fill in the variables. + + Args: + model_client (ModelClient): The model client to use for the generator. + model_kwargs (Dict[str, Any], optional): The model kwargs to pass to the model client. Defaults to {}. Please refer to :ref:`ModelClient` for the details on how to set the model_kwargs for your specific model if it is from our library. + template (Optional[str], optional): The template for the prompt. Defaults to :ref:`DEFAULT_ADALFLOW_SYSTEM_PROMPT`. + prompt_kwargs (Optional[Dict], optional): The preset prompt kwargs to fill in the variables in the prompt. Defaults to None. + output_processors (Optional[Component], optional): The output processors after model call. It can be a single component or a chained component via ``Sequential``. Defaults to None. + name (Optional[str], optional): The name of the generator. Defaults to None. + cache_path (Optional[str], optional): The path to save the cache. Defaults to None. + use_cache (bool, optional): Whether to use cache. Defaults to False. + model_type (ModelType, optional): The type of model (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM. """ if not isinstance(model_client, ModelClient): @@ -133,6 +146,7 @@ def __init__( CallbackManager.__init__(self) self.name = name or self.__class__.__name__ + self.model_type = model_type self._init_prompt(template, prompt_kwargs) @@ -163,6 +177,7 @@ def __init__( "name": name, "cache_path": cache_path, "use_cache": use_cache, + "model_type": model_type, } self._teacher: Optional["Generator"] = None self._trace_api_kwargs: Dict[str, Any] = ( diff --git a/tutorials/multimodal_client_testing_examples.py b/tutorials/multimodal_client_testing_examples.py index 44bdbc69e..2c1180de1 100644 --- a/tutorials/multimodal_client_testing_examples.py +++ b/tutorials/multimodal_client_testing_examples.py @@ -23,10 +23,6 @@ from typing import List from numpy.linalg import norm -class ImageGenerator(Generator): - """Generator subclass for image generation.""" - model_type = ModelType.IMAGE_GENERATION - def test_basic_generation(): """Test basic text generation""" client = OpenAIClient() @@ -61,14 +57,15 @@ def test_invalid_image_url(): def test_invalid_image_generation(): """Test DALL-E generation with invalid parameters""" client = OpenAIClient() - gen = ImageGenerator( + gen = Generator( model_client=client, model_kwargs={ "model": "dall-e-3", "size": "invalid_size", # Invalid size parameter "quality": "standard", "n": 1 - } + }, + model_type=ModelType.IMAGE_GENERATION ) print("\n=== Testing Invalid DALL-E Parameters ===") @@ -94,14 +91,15 @@ def test_vision_and_generation(): print(f"Description: {vision_response.raw_response}") # 2. Test DALL-E Image Generation - dalle_gen = ImageGenerator( + dalle_gen = Generator( model_client=client, model_kwargs={ "model": "dall-e-3", "size": "1024x1024", "quality": "standard", "n": 1 - } + }, + model_type=ModelType.IMAGE_GENERATION ) # For image generation, input_str becomes the prompt From 41455959e0905dae44cc6b91741b2ae66f2561b7 Mon Sep 17 00:00:00 2001 From: fm1320 Date: Fri, 17 Jan 2025 03:39:36 +0000 Subject: [PATCH 3/5] undo accidental commit --- .../components/model_client/azure_client.py | 273 ------------------ tutorials/azure_example.py | 187 ------------ 2 files changed, 460 deletions(-) delete mode 100644 adalflow/adalflow/components/model_client/azure_client.py delete mode 100644 tutorials/azure_example.py diff --git a/adalflow/adalflow/components/model_client/azure_client.py b/adalflow/adalflow/components/model_client/azure_client.py deleted file mode 100644 index 7bbfe65dc..000000000 --- a/adalflow/adalflow/components/model_client/azure_client.py +++ /dev/null @@ -1,273 +0,0 @@ -"""Azure OpenAI ModelClient integration.""" - -import os -from typing import Dict, Optional, Any, Callable, Literal -import backoff -import logging - -from adalflow.core.model_client import ModelClient -from adalflow.core.types import ModelType, CompletionUsage, GeneratorOutput - -# optional import -from adalflow.utils.lazy_import import safe_import, OptionalPackages - -openai = safe_import(OptionalPackages.OPENAI.value[0], OptionalPackages.OPENAI.value[1]) - -from openai import AzureOpenAI, AsyncAzureOpenAI, Stream -from openai import ( - APITimeoutError, - InternalServerError, - RateLimitError, - UnprocessableEntityError, - BadRequestError, -) -from openai.types import ( - Completion, - CreateEmbeddingResponse, -) -from openai.types.chat import ChatCompletionChunk, ChatCompletion -from adalflow.components.model_client.utils import parse_embedding_response - -log = logging.getLogger(__name__) - -def get_first_message_content(completion: ChatCompletion) -> str: - """When we only need the content of the first message. - It is the default parser for chat completion.""" - return completion.choices[0].message.content - -def parse_stream_response(completion: ChatCompletionChunk) -> str: - """Parse the response of the stream API.""" - return completion.choices[0].delta.content - -def handle_streaming_response(generator: Stream[ChatCompletionChunk]): - """Handle the streaming response.""" - for completion in generator: - log.debug(f"Raw chunk completion: {completion}") - parsed_content = parse_stream_response(completion) - yield parsed_content - -class AzureClient(ModelClient): - """A component wrapper for the Azure OpenAI API client. - - This client supports both chat completion and embedding APIs through Azure OpenAI. - It can be used with both sync and async operations. - - Args: - api_key (Optional[str]): Azure OpenAI API key - api_version (Optional[str]): API version to use - azure_endpoint (Optional[str]): Azure OpenAI endpoint URL (e.g., https://.openai.azure.com/) - base_url (Optional[str]): Alternative base URL format (e.g., https://..models.ai.azure.com) - chat_completion_parser (Optional[Callable]): Function to parse chat completions - input_type (Literal["text", "messages"]): Format for input - - Environment Variables: - AZURE_OPENAI_API_KEY: API key - AZURE_OPENAI_ENDPOINT: Endpoint URL (new format) - AZURE_BASE_URL: Base URL (alternative format) - AZURE_OPENAI_VERSION: API version - - Example: - >>> from adalflow.components.model_client import AzureClient - >>> client = AzureClient() - >>> generator = Generator( - ... model_client=client, - ... model_kwargs={ - ... "model": "gpt-4", - ... "temperature": 0.7 - ... } - ... ) - >>> response = generator({"input_str": "What is the capital of France?"}) - """ - - def __init__( - self, - api_key: Optional[str] = None, - api_version: Optional[str] = None, - azure_endpoint: Optional[str] = None, - base_url: Optional[str] = None, - chat_completion_parser: Callable[[Completion], Any] = None, - input_type: Literal["text", "messages"] = "text", - ): - super().__init__() - self._api_key = api_key - self._api_version = api_version - self._azure_endpoint = azure_endpoint - self._base_url = base_url - self.sync_client = self.init_sync_client() - self.async_client = None - self.chat_completion_parser = chat_completion_parser or get_first_message_content - self._input_type = input_type - - def _get_endpoint(self) -> str: - """Get the appropriate endpoint URL based on available configuration.""" - # First try the new format endpoint - endpoint = self._azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT") - if endpoint: - return endpoint - - # Then try the alternative base URL format - base_url = self._base_url or os.getenv("AZURE_BASE_URL") - if base_url: - # If base_url is provided in the format https://..models.ai.azure.com - # we need to extract the model and region - if "models.ai.azure.com" in base_url: - return base_url.rstrip("/") - # If it's just the model name, construct the full URL - return f"https://{base_url}.openai.azure.com" - - raise ValueError( - "Either AZURE_OPENAI_ENDPOINT or AZURE_BASE_URL must be set. " - "Check your deployment page for a URL like: " - "https://.openai.azure.com/ or " - "https://..models.ai.azure.com" - ) - - def init_sync_client(self): - api_key = self._api_key or os.getenv("AZURE_OPENAI_API_KEY") - api_version = self._api_version or os.getenv("AZURE_OPENAI_VERSION") - - if not api_key: - raise ValueError("Environment variable AZURE_OPENAI_API_KEY must be set") - if not api_version: - raise ValueError("Environment variable AZURE_OPENAI_VERSION must be set") - - endpoint = self._get_endpoint() - - return AzureOpenAI( - api_key=api_key, - api_version=api_version, - azure_endpoint=endpoint - ) - - def init_async_client(self): - api_key = self._api_key or os.getenv("AZURE_OPENAI_API_KEY") - api_version = self._api_version or os.getenv("AZURE_OPENAI_VERSION") - - if not api_key: - raise ValueError("Environment variable AZURE_OPENAI_API_KEY must be set") - if not api_version: - raise ValueError("Environment variable AZURE_OPENAI_VERSION must be set") - - endpoint = self._get_endpoint() - - return AsyncAzureOpenAI( - api_key=api_key, - api_version=api_version, - azure_endpoint=endpoint - ) - - def convert_inputs_to_api_kwargs( - self, - input: Optional[Any] = None, - model_kwargs: Dict = {}, - model_type: ModelType = ModelType.UNDEFINED, - ) -> Dict: - """Convert inputs to Azure OpenAI API kwargs format.""" - final_model_kwargs = model_kwargs.copy() - - if model_type == ModelType.EMBEDDER: - if isinstance(input, str): - input = [input] - assert isinstance(input, (list, tuple)), "input must be a sequence of text" - final_model_kwargs["input"] = input - elif model_type == ModelType.LLM: - messages = [] - if input is not None and input != "": - if self._input_type == "text": - messages.append({"role": "system", "content": input}) - else: - messages.extend(input) - final_model_kwargs["messages"] = messages - else: - raise ValueError(f"model_type {model_type} is not supported") - - # Ensure model is specified - if "model" not in final_model_kwargs: - raise ValueError("model must be specified") - - return final_model_kwargs - - def parse_chat_completion(self, completion: ChatCompletion) -> GeneratorOutput: - """Parse chat completion response.""" - log.debug(f"completion: {completion}") - try: - data = self.chat_completion_parser(completion) - usage = self.track_completion_usage(completion) - return GeneratorOutput(data=None, usage=usage, raw_response=data) - except Exception as e: - log.error(f"Error parsing completion: {e}") - return GeneratorOutput( - data=None, error=str(e), raw_response=str(completion) - ) - - def track_completion_usage(self, completion: ChatCompletion) -> CompletionUsage: - """Track completion token usage.""" - usage = completion.usage - return CompletionUsage( - completion_tokens=usage.completion_tokens, - prompt_tokens=usage.prompt_tokens, - total_tokens=usage.total_tokens, - ) - - @backoff.on_exception( - backoff.expo, - ( - APITimeoutError, - InternalServerError, - RateLimitError, - UnprocessableEntityError, - BadRequestError, - ), - max_time=5, - ) - def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED): - """Make a synchronous call to Azure OpenAI API.""" - log.info(f"api_kwargs: {api_kwargs}") - if model_type == ModelType.EMBEDDER: - return self.sync_client.embeddings.create(**api_kwargs) - elif model_type == ModelType.LLM: - if "stream" in api_kwargs and api_kwargs.get("stream", False): - log.debug("streaming call") - self.chat_completion_parser = handle_streaming_response - return self.sync_client.chat.completions.create(**api_kwargs) - return self.sync_client.chat.completions.create(**api_kwargs) - else: - raise ValueError(f"model_type {model_type} is not supported") - - @backoff.on_exception( - backoff.expo, - ( - APITimeoutError, - InternalServerError, - RateLimitError, - UnprocessableEntityError, - BadRequestError, - ), - max_time=5, - ) - async def acall( - self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED - ): - """Make an asynchronous call to Azure OpenAI API.""" - if self.async_client is None: - self.async_client = self.init_async_client() - if model_type == ModelType.EMBEDDER: - return await self.async_client.embeddings.create(**api_kwargs) - elif model_type == ModelType.LLM: - return await self.async_client.chat.completions.create(**api_kwargs) - else: - raise ValueError(f"model_type {model_type} is not supported") - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> 'AzureClient': - """Create an instance from a dictionary.""" - obj = super().from_dict(data) - obj.sync_client = obj.init_sync_client() - obj.async_client = obj.init_async_client() - return obj - - def to_dict(self) -> Dict[str, Any]: - """Convert the instance to a dictionary.""" - exclude = ["sync_client", "async_client"] - output = super().to_dict(exclude=exclude) - return output \ No newline at end of file diff --git a/tutorials/azure_example.py b/tutorials/azure_example.py deleted file mode 100644 index 030cf634f..000000000 --- a/tutorials/azure_example.py +++ /dev/null @@ -1,187 +0,0 @@ -"""Example script demonstrating Azure OpenAI client usage in AdalFlow.""" - -import os -import asyncio -from adalflow.components.model_client import AzureClient -from adalflow.core.generator import Generator -from adalflow.core.types import ModelType - -# Demo configuration - Replace these with your actual values -DEMO_CONFIG = { - "api_key": "your-api-key", # From Azure Portal > Keys and Endpoint - "azure_endpoint": "https://your-resource.openai.azure.com/", # Your Azure OpenAI endpoint - "api_version": "2024-02-15-preview", # Current API version - "deployment_name": "gpt-35-turbo" # Your model deployment name -} - -def setup_environment(): - """Setup environment variables if not already set.""" - if not os.getenv("AZURE_OPENAI_API_KEY"): - print("Setting up demo environment variables...") - os.environ["AZURE_OPENAI_API_KEY"] = DEMO_CONFIG["api_key"] - os.environ["AZURE_OPENAI_ENDPOINT"] = DEMO_CONFIG["azure_endpoint"] - os.environ["AZURE_OPENAI_VERSION"] = DEMO_CONFIG["api_version"] - else: - print("Using existing environment variables...") - -def test_chat_completion(): - """Test chat completion with Azure OpenAI.""" - print("\nTesting chat completion...") - client = AzureClient() - generator = Generator( - model_client=client, - model_kwargs={ - "model": DEMO_CONFIG["deployment_name"], - "temperature": 0.7, - }, - model_type=ModelType.LLM - ) - - # Single turn conversation - response = generator("What is the capital of France?") - print("\nChat Completion Response:") - print(f"Content: {response.raw_response}") - print(f"Usage: {response.usage}") - print(f"Error: {response.error}") - - # Multi-turn conversation - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "What is the capital of France?"}, - {"role": "assistant", "content": "The capital of France is Paris."}, - {"role": "user", "content": "What is its population?"} - ] - - client = AzureClient(input_type="messages") - generator = Generator( - model_client=client, - model_kwargs={ - "model": DEMO_CONFIG["deployment_name"], - "temperature": 0.7, - }, - model_type=ModelType.LLM - ) - - response = generator(messages) - print("\nMulti-turn Conversation Response:") - print(f"Content: {response.raw_response}") - print(f"Usage: {response.usage}") - print(f"Error: {response.error}") - -def test_embeddings(): - """Test embeddings with Azure OpenAI.""" - print("\nTesting embeddings...") - client = AzureClient() - generator = Generator( - model_client=client, - model_kwargs={ - "model": "text-embedding-ada-002", # Standard embedding model name - }, - model_type=ModelType.EMBEDDER - ) - - # Single text embedding - response = generator("Hello, world!") - print("\nSingle Text Embedding Response:") - print(f"Embedding shape: {len(response.raw_response)}") - print(f"Usage: {response.usage}") - print(f"Error: {response.error}") - - # Multiple text embeddings - texts = ["Hello, world!", "How are you?", "Nice to meet you!"] - response = generator(texts) - print("\nMultiple Text Embeddings Response:") - print(f"Number of embeddings: {len(response.raw_response)}") - print(f"Usage: {response.usage}") - print(f"Error: {response.error}") - -async def test_async_chat(): - """Test async chat completion with Azure OpenAI.""" - print("\nTesting async chat completion...") - client = AzureClient() - generator = Generator( - model_client=client, - model_kwargs={ - "model": DEMO_CONFIG["deployment_name"], - "temperature": 0.7, - }, - model_type=ModelType.LLM - ) - - response = await generator.acall("What is the capital of France?") - print("\nAsync Chat Completion Response:") - print(f"Content: {response.raw_response}") - print(f"Usage: {response.usage}") - print(f"Error: {response.error}") - -def test_streaming(): - """Test streaming chat completion with Azure OpenAI.""" - print("\nTesting streaming chat completion...") - client = AzureClient() - generator = Generator( - model_client=client, - model_kwargs={ - "model": DEMO_CONFIG["deployment_name"], - "temperature": 0.7, - "stream": True - }, - model_type=ModelType.LLM - ) - - print("\nStreaming Chat Completion Response:") - for chunk in generator("Tell me a short story about a cat."): - if chunk.raw_response: - print(chunk.raw_response, end="", flush=True) - print("\n") - -def check_requirements(): - """Check if all required packages are installed.""" - try: - import openai - import azure.identity - import azure.mgmt.cognitiveservices - print("All required packages are installed.") - return True - except ImportError as e: - print(f"Missing required package: {e}") - print("Please install required packages:") - print("poetry add openai azure-identity azure-mgmt-cognitiveservices") - return False - -if __name__ == "__main__": - print("Azure OpenAI Client Test Script") - print("==============================") - - if not check_requirements(): - exit(1) - - setup_environment() - - # Check for required environment variables - required_vars = ["AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_VERSION"] - missing_vars = [var for var in required_vars if not os.getenv(var)] - if missing_vars: - print(f"Missing required environment variables: {', '.join(missing_vars)}") - print("Please set them before running this script.") - print("\nYou can set them in your environment:") - print("export AZURE_OPENAI_API_KEY='your-key'") - print("export AZURE_OPENAI_ENDPOINT='your-endpoint'") - print("export AZURE_OPENAI_VERSION='api-version'") - print("\nOr update the DEMO_CONFIG in this script.") - exit(1) - - print("\nStarting Azure OpenAI tests...") - - try: - # Test synchronous operations - test_chat_completion() - test_embeddings() - test_streaming() - - # Test asynchronous operations - asyncio.run(test_async_chat()) - - print("\nAll tests completed!") - except Exception as e: - print(f"\nError during testing: {e}") - print("\nPlease check your Azure OpenAI setup and credentials.") \ No newline at end of file From d121c18bcfb911e6600b9ffb3ddc96fd2c9135af Mon Sep 17 00:00:00 2001 From: fm1320 Date: Mon, 20 Jan 2025 10:54:34 +0100 Subject: [PATCH 4/5] fix kwargs implementation --- .../components/model_client/openai_client.py | 28 ++++++++------- adalflow/adalflow/core/generator.py | 34 +++---------------- .../multimodal_client_testing_examples.py | 24 ++++++------- 3 files changed, 31 insertions(+), 55 deletions(-) diff --git a/adalflow/adalflow/components/model_client/openai_client.py b/adalflow/adalflow/components/model_client/openai_client.py index e3b7d01a7..4a13fe8d7 100644 --- a/adalflow/adalflow/components/model_client/openai_client.py +++ b/adalflow/adalflow/components/model_client/openai_client.py @@ -106,6 +106,12 @@ class OpenAIClient(ModelClient): Users (1) simplify use ``Embedder`` and ``Generator`` components by passing OpenAIClient() as the model_client. (2) can use this as an example to create their own API client or extend this class(copying and modifing the code) in their own project. + Args: + api_key (Optional[str], optional): OpenAI API key. Defaults to None. + chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None. + input_type (Literal["text", "messages"], optional): The type of input to use. Defaults to "text". + model_type (ModelType, optional): The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM. + Note: We suggest users not to use `response_format` to enforce output data type or `tools` and `tool_choice` in your model_kwargs when calling the API. We do not know how OpenAI is doing the formating or what prompt they have added. @@ -120,14 +126,9 @@ class OpenAIClient(ModelClient): - prompt: Text description of the image to generate - size: "1024x1024", "1024x1792", or "1792x1024" for DALL-E 3; "256x256", "512x512", or "1024x1024" for DALL-E 2 - quality: "standard" or "hd" (DALL-E 3 only) - - n: Number of images to generate (1 for DALL-E 3, 1-10 for DALL-E 2) + - n: Number of images (1 for DALL-E 3, 1-10 for DALL-E 2) - response_format: "url" or "b64_json" - Args: - api_key (Optional[str], optional): OpenAI API key. Defaults to None. - chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None. - Default is `get_first_message_content`. - References: - Embeddings models: https://platform.openai.com/docs/guides/embeddings - Chat models: https://platform.openai.com/docs/guides/text-generation @@ -141,11 +142,15 @@ def __init__( api_key: Optional[str] = None, chat_completion_parser: Callable[[Completion], Any] = None, input_type: Literal["text", "messages"] = "text", + model_type: ModelType = ModelType.LLM, ): r"""It is recommended to set the OPENAI_API_KEY environment variable instead of passing it as an argument. Args: api_key (Optional[str], optional): OpenAI API key. Defaults to None. + chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None. + input_type (Literal["text", "messages"], optional): The type of input to use. Defaults to "text". + model_type (ModelType, optional): The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM. """ super().__init__() self._api_key = api_key @@ -155,6 +160,7 @@ def __init__( chat_completion_parser or get_first_message_content ) self._input_type = input_type + self.model_type = model_type def init_sync_client(self): api_key = self._api_key or os.getenv("OPENAI_API_KEY") @@ -229,7 +235,6 @@ def convert_inputs_to_api_kwargs( self, input: Optional[Any] = None, model_kwargs: Dict = {}, - model_type: ModelType = ModelType.UNDEFINED, ) -> Dict: r""" Specify the API input type and output api_kwargs that will be used in _call and _acall methods. @@ -254,21 +259,20 @@ def convert_inputs_to_api_kwargs( - mask: Path to the mask image For variations (DALL-E 2 only): - image: Path to the input image - model_type: The type of model (EMBEDDER, LLM, or IMAGE_GENERATION) Returns: Dict: API-specific kwargs for the model call """ final_model_kwargs = model_kwargs.copy() - if model_type == ModelType.EMBEDDER: + if self.model_type == ModelType.EMBEDDER: if isinstance(input, str): input = [input] # convert input to input if not isinstance(input, Sequence): raise TypeError("input must be a sequence of text") final_model_kwargs["input"] = input - elif model_type == ModelType.LLM: + elif self.model_type == ModelType.LLM: # convert input to messages messages: List[Dict[str, str]] = [] images = final_model_kwargs.pop("images", None) @@ -313,7 +317,7 @@ def convert_inputs_to_api_kwargs( else: messages.append({"role": "system", "content": input}) final_model_kwargs["messages"] = messages - elif model_type == ModelType.IMAGE_GENERATION: + elif self.model_type == ModelType.IMAGE_GENERATION: # For image generation, input is the prompt final_model_kwargs["prompt"] = input # Ensure model is specified @@ -358,7 +362,7 @@ def convert_inputs_to_api_kwargs( else: raise ValueError(f"Invalid operation: {operation}") else: - raise ValueError(f"model_type {model_type} is not supported") + raise ValueError(f"model_type {self.model_type} is not supported") return final_model_kwargs def parse_image_generation_response(self, response: List[Image]) -> GeneratorOutput: diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py index a1714bbd1..1070da8ef 100644 --- a/adalflow/adalflow/core/generator.py +++ b/adalflow/adalflow/core/generator.py @@ -70,21 +70,11 @@ class Generator(GradComponent, CachedEngine, CallbackManager): template (Optional[str], optional): The template for the prompt. Defaults to :ref:`DEFAULT_ADALFLOW_SYSTEM_PROMPT`. prompt_kwargs (Optional[Dict], optional): The preset prompt kwargs to fill in the variables in the prompt. Defaults to None. output_processors (Optional[Component], optional): The output processors after model call. It can be a single component or a chained component via ``Sequential``. Defaults to None. - trainable_params (Optional[List[str]], optional): The list of trainable parameters. Defaults to []. - - Note: - The output_processors will be applied to the string output of the model completion. And the result will be stored in the data field of the output. - And we encourage you to only use it to parse the response to data format you will use later. + name (Optional[str], optional): The name of the generator. Defaults to None. + cache_path (Optional[str], optional): The path to save the cache. Defaults to None. + use_cache (bool, optional): Whether to use cache. Defaults to False. """ - model_type: ModelType = ModelType.LLM - model_client: ModelClient # for better type checking - - _use_cache: bool = False - _kwargs: Dict[str, Any] = ( - {} - ) # to create teacher generator from student TODO: might reaccess this - def __init__( self, *, @@ -100,8 +90,6 @@ def __init__( # args for the cache cache_path: Optional[str] = None, use_cache: bool = False, - # args for model type - model_type: ModelType = ModelType.LLM, ) -> None: r"""The default prompt is set to the DEFAULT_ADALFLOW_SYSTEM_PROMPT. It has the following variables: - task_desc_str @@ -112,17 +100,6 @@ def __init__( - steps_str You can preset the prompt kwargs to fill in the variables in the prompt using prompt_kwargs. But you can replace the prompt and set any variables you want and use the prompt_kwargs to fill in the variables. - - Args: - model_client (ModelClient): The model client to use for the generator. - model_kwargs (Dict[str, Any], optional): The model kwargs to pass to the model client. Defaults to {}. Please refer to :ref:`ModelClient` for the details on how to set the model_kwargs for your specific model if it is from our library. - template (Optional[str], optional): The template for the prompt. Defaults to :ref:`DEFAULT_ADALFLOW_SYSTEM_PROMPT`. - prompt_kwargs (Optional[Dict], optional): The preset prompt kwargs to fill in the variables in the prompt. Defaults to None. - output_processors (Optional[Component], optional): The output processors after model call. It can be a single component or a chained component via ``Sequential``. Defaults to None. - name (Optional[str], optional): The name of the generator. Defaults to None. - cache_path (Optional[str], optional): The path to save the cache. Defaults to None. - use_cache (bool, optional): Whether to use cache. Defaults to False. - model_type (ModelType, optional): The type of model (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM. """ if not isinstance(model_client, ModelClient): @@ -134,7 +111,6 @@ def __init__( template = template or DEFAULT_ADALFLOW_SYSTEM_PROMPT # create the cache path and initialize the cache engine - self.set_cache_path( cache_path, model_client, model_kwargs.get("model", "default") ) @@ -146,7 +122,7 @@ def __init__( CallbackManager.__init__(self) self.name = name or self.__class__.__name__ - self.model_type = model_type + self.model_type = model_client.model_type # Get model type from client self._init_prompt(template, prompt_kwargs) @@ -177,7 +153,6 @@ def __init__( "name": name, "cache_path": cache_path, "use_cache": use_cache, - "model_type": model_type, } self._teacher: Optional["Generator"] = None self._trace_api_kwargs: Dict[str, Any] = ( @@ -351,7 +326,6 @@ def _pre_call(self, prompt_kwargs: Dict, model_kwargs: Dict) -> Dict[str, Any]: api_kwargs = self.model_client.convert_inputs_to_api_kwargs( input=prompt_str, model_kwargs=composed_model_kwargs, - model_type=self.model_type, ) return api_kwargs diff --git a/tutorials/multimodal_client_testing_examples.py b/tutorials/multimodal_client_testing_examples.py index 2c1180de1..d94f8e4e1 100644 --- a/tutorials/multimodal_client_testing_examples.py +++ b/tutorials/multimodal_client_testing_examples.py @@ -25,7 +25,7 @@ def test_basic_generation(): """Test basic text generation""" - client = OpenAIClient() + client = OpenAIClient() # Default model_type is LLM gen = Generator( model_client=client, model_kwargs={ @@ -40,7 +40,7 @@ def test_basic_generation(): def test_invalid_image_url(): """Test Generator output with invalid image URL""" - client = OpenAIClient() + client = OpenAIClient() # Default model_type is LLM gen = Generator( model_client=client, model_kwargs={ @@ -56,7 +56,7 @@ def test_invalid_image_url(): def test_invalid_image_generation(): """Test DALL-E generation with invalid parameters""" - client = OpenAIClient() + client = OpenAIClient(model_type=ModelType.IMAGE_GENERATION) gen = Generator( model_client=client, model_kwargs={ @@ -64,8 +64,7 @@ def test_invalid_image_generation(): "size": "invalid_size", # Invalid size parameter "quality": "standard", "n": 1 - }, - model_type=ModelType.IMAGE_GENERATION + } ) print("\n=== Testing Invalid DALL-E Parameters ===") @@ -74,11 +73,10 @@ def test_invalid_image_generation(): def test_vision_and_generation(): """Test both vision analysis and image generation""" - client = OpenAIClient() - - # 1. Test Vision Analysis + # 1. Test Vision Analysis with LLM client + vision_client = OpenAIClient() # Default model_type is LLM vision_gen = Generator( - model_client=client, + model_client=vision_client, model_kwargs={ "model": "gpt-4o-mini", "images": "https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png", @@ -90,16 +88,16 @@ def test_vision_and_generation(): print("\n=== Vision Analysis ===") print(f"Description: {vision_response.raw_response}") - # 2. Test DALL-E Image Generation + # 2. Test DALL-E Image Generation with IMAGE_GENERATION client + dalle_client = OpenAIClient(model_type=ModelType.IMAGE_GENERATION) dalle_gen = Generator( - model_client=client, + model_client=dalle_client, model_kwargs={ "model": "dall-e-3", "size": "1024x1024", "quality": "standard", "n": 1 - }, - model_type=ModelType.IMAGE_GENERATION + } ) # For image generation, input_str becomes the prompt From 922681a4ac84313e43541f3ab08c9dc994198f32 Mon Sep 17 00:00:00 2001 From: fm1320 Date: Sun, 26 Jan 2025 20:11:49 +0100 Subject: [PATCH 5/5] Fixed tests and made adjustments --- .../components/model_client/openai_client.py | 28 ++--- adalflow/adalflow/core/generator.py | 5 +- adalflow/tests/test_generator.py | 5 +- docs/source/tutorials/multimodal_client.rst | 107 ++++++++++++++++++ tests/test_generator.py | 1 + .../multimodal_client_testing_examples.py | 64 ++++++----- 6 files changed, 167 insertions(+), 43 deletions(-) create mode 100644 docs/source/tutorials/multimodal_client.rst create mode 100644 tests/test_generator.py diff --git a/adalflow/adalflow/components/model_client/openai_client.py b/adalflow/adalflow/components/model_client/openai_client.py index 4a13fe8d7..69b419229 100644 --- a/adalflow/adalflow/components/model_client/openai_client.py +++ b/adalflow/adalflow/components/model_client/openai_client.py @@ -110,7 +110,6 @@ class OpenAIClient(ModelClient): api_key (Optional[str], optional): OpenAI API key. Defaults to None. chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None. input_type (Literal["text", "messages"], optional): The type of input to use. Defaults to "text". - model_type (ModelType, optional): The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM. Note: We suggest users not to use `response_format` to enforce output data type or `tools` and `tool_choice` in your model_kwargs when calling the API. @@ -142,7 +141,6 @@ def __init__( api_key: Optional[str] = None, chat_completion_parser: Callable[[Completion], Any] = None, input_type: Literal["text", "messages"] = "text", - model_type: ModelType = ModelType.LLM, ): r"""It is recommended to set the OPENAI_API_KEY environment variable instead of passing it as an argument. @@ -150,7 +148,6 @@ def __init__( api_key (Optional[str], optional): OpenAI API key. Defaults to None. chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None. input_type (Literal["text", "messages"], optional): The type of input to use. Defaults to "text". - model_type (ModelType, optional): The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM. """ super().__init__() self._api_key = api_key @@ -160,7 +157,6 @@ def __init__( chat_completion_parser or get_first_message_content ) self._input_type = input_type - self.model_type = model_type def init_sync_client(self): api_key = self._api_key or os.getenv("OPENAI_API_KEY") @@ -235,6 +231,7 @@ def convert_inputs_to_api_kwargs( self, input: Optional[Any] = None, model_kwargs: Dict = {}, + model_type: ModelType = ModelType.UNDEFINED, # Now required in practice ) -> Dict: r""" Specify the API input type and output api_kwargs that will be used in _call and _acall methods. @@ -259,20 +256,23 @@ def convert_inputs_to_api_kwargs( - mask: Path to the mask image For variations (DALL-E 2 only): - image: Path to the input image + model_type: The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Required. Returns: Dict: API-specific kwargs for the model call """ + if model_type == ModelType.UNDEFINED: + raise ValueError("model_type must be specified") final_model_kwargs = model_kwargs.copy() - if self.model_type == ModelType.EMBEDDER: + if model_type == ModelType.EMBEDDER: if isinstance(input, str): input = [input] # convert input to input if not isinstance(input, Sequence): raise TypeError("input must be a sequence of text") final_model_kwargs["input"] = input - elif self.model_type == ModelType.LLM: + elif model_type == ModelType.LLM: # convert input to messages messages: List[Dict[str, str]] = [] images = final_model_kwargs.pop("images", None) @@ -317,7 +317,7 @@ def convert_inputs_to_api_kwargs( else: messages.append({"role": "system", "content": input}) final_model_kwargs["messages"] = messages - elif self.model_type == ModelType.IMAGE_GENERATION: + elif model_type == ModelType.IMAGE_GENERATION: # For image generation, input is the prompt final_model_kwargs["prompt"] = input # Ensure model is specified @@ -362,7 +362,7 @@ def convert_inputs_to_api_kwargs( else: raise ValueError(f"Invalid operation: {operation}") else: - raise ValueError(f"model_type {self.model_type} is not supported") + raise ValueError(f"model_type {model_type} is not supported") return final_model_kwargs def parse_image_generation_response(self, response: List[Image]) -> GeneratorOutput: @@ -379,11 +379,7 @@ def parse_image_generation_response(self, response: List[Image]) -> GeneratorOut ) except Exception as e: log.error(f"Error parsing image generation response: {e}") - return GeneratorOutput( - data=None, - error=str(e), - raw_response=str(response) - ) + return GeneratorOutput(data=None, error=str(e), raw_response=str(response)) @backoff.on_exception( backoff.expo, @@ -400,6 +396,9 @@ def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINE """ kwargs is the combined input and model_kwargs. Support streaming call. """ + if model_type == ModelType.UNDEFINED: + raise ValueError("model_type must be specified") + log.info(f"api_kwargs: {api_kwargs}") if model_type == ModelType.EMBEDDER: return self.sync_client.embeddings.create(**api_kwargs) @@ -449,6 +448,9 @@ async def acall( """ kwargs is the combined input and model_kwargs """ + if model_type == ModelType.UNDEFINED: + raise ValueError("model_type must be specified") + if self.async_client is None: self.async_client = self.init_async_client() if model_type == ModelType.EMBEDDER: diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py index 1070da8ef..b2bb072b7 100644 --- a/adalflow/adalflow/core/generator.py +++ b/adalflow/adalflow/core/generator.py @@ -73,6 +73,7 @@ class Generator(GradComponent, CachedEngine, CallbackManager): name (Optional[str], optional): The name of the generator. Defaults to None. cache_path (Optional[str], optional): The path to save the cache. Defaults to None. use_cache (bool, optional): Whether to use cache. Defaults to False. + model_type (ModelType, optional): The type of the model. Defaults to ModelType.LLM. """ def __init__( @@ -90,6 +91,7 @@ def __init__( # args for the cache cache_path: Optional[str] = None, use_cache: bool = False, + model_type: ModelType = ModelType.LLM, # Add model_type parameter with default ) -> None: r"""The default prompt is set to the DEFAULT_ADALFLOW_SYSTEM_PROMPT. It has the following variables: - task_desc_str @@ -122,7 +124,7 @@ def __init__( CallbackManager.__init__(self) self.name = name or self.__class__.__name__ - self.model_type = model_client.model_type # Get model type from client + self.model_type = model_type # Use the passed model_type instead of getting from client self._init_prompt(template, prompt_kwargs) @@ -326,6 +328,7 @@ def _pre_call(self, prompt_kwargs: Dict, model_kwargs: Dict) -> Dict[str, Any]: api_kwargs = self.model_client.convert_inputs_to_api_kwargs( input=prompt_str, model_kwargs=composed_model_kwargs, + model_type=self.model_type, ) return api_kwargs diff --git a/adalflow/tests/test_generator.py b/adalflow/tests/test_generator.py index a15c302a5..e6631f10f 100644 --- a/adalflow/tests/test_generator.py +++ b/adalflow/tests/test_generator.py @@ -15,6 +15,7 @@ from adalflow.core.model_client import ModelClient from adalflow.components.model_client.groq_client import GroqAPIClient from adalflow.tracing import GeneratorStateLogger +from adalflow.core.types import ModelType class TestGenerator(IsolatedAsyncioTestCase): @@ -32,7 +33,7 @@ def setUp(self): ) self.mock_api_client = mock_api_client - self.generator = Generator(model_client=mock_api_client) + self.generator = Generator(model_client=mock_api_client, model_type=ModelType.LLM) self.save_dir = "./tests/log" self.project_name = "TestGenerator" self.filename = "prompt_logger_test.json" @@ -182,7 +183,7 @@ def test_groq_client_call(self, mock_call): template = "Hello, {{ input_str }}!" # Initialize the Generator with the mocked client - generator = Generator(model_client=self.client, template=template) + generator = Generator(model_client=self.client, template=template, model_type=ModelType.LLM) # Call the generator and get the output output = generator.call(prompt_kwargs=prompt_kwargs, model_kwargs=model_kwargs) diff --git a/docs/source/tutorials/multimodal_client.rst b/docs/source/tutorials/multimodal_client.rst new file mode 100644 index 000000000..a27547406 --- /dev/null +++ b/docs/source/tutorials/multimodal_client.rst @@ -0,0 +1,107 @@ +Multimodal Client Tutorial +======================= + +This tutorial demonstrates how to use the OpenAI client for different types of tasks: text generation, vision analysis, and image generation. + +Model Types +---------- + +The OpenAI client supports three types of operations: + +1. Text/Chat Completion (``ModelType.LLM``) + - Standard text generation + - Vision analysis (with GPT-4V) +2. Image Generation (``ModelType.IMAGE_GENERATION``) + - DALL-E image generation +3. Embeddings (``ModelType.EMBEDDER``) + - Text embeddings + +Basic Usage +---------- + +The model type is specified when creating a ``Generator`` instance: + +.. code-block:: python + + from adalflow.core import Generator + from adalflow.components.model_client.openai_client import OpenAIClient + from adalflow.core.types import ModelType + + # Create the client + client = OpenAIClient() + + # For text generation + gen = Generator( + model_client=client, + model_kwargs={"model": "gpt-4", "max_tokens": 100}, + model_type=ModelType.LLM # Specify LLM type + ) + response = gen({"input_str": "Hello, world!"}) + +Vision Tasks +----------- + +Vision tasks use ``ModelType.LLM`` since they are handled by GPT-4V: + +.. code-block:: python + + # Vision analysis + vision_gen = Generator( + model_client=client, + model_kwargs={ + "model": "gpt-4o-mini", + "images": "path/to/image.jpg", + "max_tokens": 300, + }, + model_type=ModelType.LLM # Vision uses LLM type + ) + response = vision_gen({"input_str": "What do you see in this image?"}) + +Image Generation +-------------- + +For DALL-E image generation, use ``ModelType.IMAGE_GENERATION``: + +.. code-block:: python + + # Image generation with DALL-E + dalle_gen = Generator( + model_client=client, + model_kwargs={ + "model": "dall-e-3", + "size": "1024x1024", + "quality": "standard", + "n": 1, + }, + model_type=ModelType.IMAGE_GENERATION # Specify image generation type + ) + response = dalle_gen({"input_str": "A cat playing with yarn"}) + +Backward Compatibility +-------------------- + +For backward compatibility with existing code: + +1. ``model_type`` defaults to ``ModelType.LLM`` if not specified +2. Older models that only support text continue to work with ``ModelType.LLM`` +3. The OpenAI client handles the appropriate API endpoints based on the model type + +Error Handling +------------- + +The client includes error handling for: + +1. Invalid model types for operations +2. Invalid image URLs or file paths +3. Unsupported model capabilities +4. API errors and rate limits + +Complete Example +-------------- + +See the complete example in ``tutorials/multimodal_client_testing_examples.py``, which demonstrates: + +1. Basic text generation +2. Vision analysis with image input +3. DALL-E image generation +4. Error handling for invalid inputs \ No newline at end of file diff --git a/tests/test_generator.py b/tests/test_generator.py new file mode 100644 index 000000000..0519ecba6 --- /dev/null +++ b/tests/test_generator.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tutorials/multimodal_client_testing_examples.py b/tutorials/multimodal_client_testing_examples.py index d94f8e4e1..dd7edcf07 100644 --- a/tutorials/multimodal_client_testing_examples.py +++ b/tutorials/multimodal_client_testing_examples.py @@ -25,91 +25,101 @@ def test_basic_generation(): """Test basic text generation""" - client = OpenAIClient() # Default model_type is LLM + client = OpenAIClient() # For text/chat completion gen = Generator( model_client=client, - model_kwargs={ - "model": "gpt-4o-mini", - "max_tokens": 100 - } + model_kwargs={"model": "gpt-4o-mini", "max_tokens": 100}, + model_type=ModelType.LLM # Explicitly specify model type ) - + print("\n=== Testing Basic Generation ===") response = gen({"input_str": "Hello, world!"}) print(f"Response: {response}") + def test_invalid_image_url(): """Test Generator output with invalid image URL""" - client = OpenAIClient() # Default model_type is LLM + client = OpenAIClient() # For vision tasks gen = Generator( model_client=client, model_kwargs={ "model": "gpt-4o-mini", "images": "https://invalid.url/nonexistent.jpg", - "max_tokens": 300 - } + "max_tokens": 300, + }, + model_type=ModelType.LLM # Vision tasks use LLM type ) - + print("\n=== Testing Invalid Image URL ===") response = gen({"input_str": "What do you see in this image?"}) print(f"Response with invalid image URL: {response}") + def test_invalid_image_generation(): """Test DALL-E generation with invalid parameters""" - client = OpenAIClient(model_type=ModelType.IMAGE_GENERATION) + client = OpenAIClient() # For image generation gen = Generator( model_client=client, model_kwargs={ "model": "dall-e-3", "size": "invalid_size", # Invalid size parameter "quality": "standard", - "n": 1 - } + "n": 1, + }, + model_type=ModelType.IMAGE_GENERATION # Specify image generation type ) - + print("\n=== Testing Invalid DALL-E Parameters ===") response = gen({"input_str": "A cat"}) print(f"Response with invalid DALL-E parameters: {response}") + def test_vision_and_generation(): """Test both vision analysis and image generation""" # 1. Test Vision Analysis with LLM client - vision_client = OpenAIClient() # Default model_type is LLM + vision_client = OpenAIClient() # For vision tasks vision_gen = Generator( model_client=vision_client, model_kwargs={ "model": "gpt-4o-mini", "images": "https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png", - "max_tokens": 300 - } + "max_tokens": 300, + }, + model_type=ModelType.LLM # Vision tasks use LLM type + ) + + vision_response = vision_gen( + {"input_str": "What do you see in this image? Be detailed but concise."} ) - - vision_response = vision_gen({"input_str": "What do you see in this image? Be detailed but concise."}) print("\n=== Vision Analysis ===") print(f"Description: {vision_response.raw_response}") - # 2. Test DALL-E Image Generation with IMAGE_GENERATION client - dalle_client = OpenAIClient(model_type=ModelType.IMAGE_GENERATION) + # 2. Test DALL-E Image Generation + dalle_client = OpenAIClient() # For image generation dalle_gen = Generator( model_client=dalle_client, model_kwargs={ "model": "dall-e-3", "size": "1024x1024", "quality": "standard", - "n": 1 - } + "n": 1, + }, + model_type=ModelType.IMAGE_GENERATION # Specify image generation type ) - + # For image generation, input_str becomes the prompt - response = dalle_gen({"input_str": "A happy siamese cat playing with a red ball of yarn"}) + response = dalle_gen( + {"input_str": "A happy siamese cat playing with a red ball of yarn"} + ) print("\n=== DALL-E Generation ===") print(f"Generated Image URL: {response.data}") + if __name__ == "__main__": print("Starting OpenAI Vision and DALL-E test...\n") - + # Run all tests - they will show errors if API key is invalid/empty test_basic_generation() test_invalid_image_url() test_invalid_image_generation() - test_vision_and_generation() \ No newline at end of file + test_vision_and_generation()