From 605f1f0a243bd4e9fc1e77a9de69e2cf7a3219c8 Mon Sep 17 00:00:00 2001
From: fm1320 <filipmakraduli.10@gmail.com>
Date: Thu, 16 Jan 2025 22:29:52 +0000
Subject: [PATCH 1/5] [WIP] Azure Client integration

---
 .../components/model_client/azure_client.py   | 273 ++++++++++++++++++
 tutorials/azure_example.py                    | 187 ++++++++++++
 2 files changed, 460 insertions(+)
 create mode 100644 adalflow/adalflow/components/model_client/azure_client.py
 create mode 100644 tutorials/azure_example.py
diff --git a/adalflow/adalflow/components/model_client/azure_client.py b/adalflow/adalflow/components/model_client/azure_client.py
new file mode 100644
index 000000000..7bbfe65dc
--- /dev/null
+++ b/adalflow/adalflow/components/model_client/azure_client.py
@@ -0,0 +1,273 @@
+"""Azure OpenAI ModelClient integration."""
+
+import os
+from typing import Dict, Optional, Any, Callable, Literal
+import backoff
+import logging
+
+from adalflow.core.model_client import ModelClient
+from adalflow.core.types import ModelType, CompletionUsage, GeneratorOutput
+
+# optional import
+from adalflow.utils.lazy_import import safe_import, OptionalPackages
+
+openai = safe_import(OptionalPackages.OPENAI.value[0], OptionalPackages.OPENAI.value[1])
+
+from openai import AzureOpenAI, AsyncAzureOpenAI, Stream
+from openai import (
+    APITimeoutError,
+    InternalServerError,
+    RateLimitError,
+    UnprocessableEntityError,
+    BadRequestError,
+)
+from openai.types import (
+    Completion,
+    CreateEmbeddingResponse,
+)
+from openai.types.chat import ChatCompletionChunk, ChatCompletion
+from adalflow.components.model_client.utils import parse_embedding_response
+
+log = logging.getLogger(__name__)
+
+def get_first_message_content(completion: ChatCompletion) -> str:
+    """When we only need the content of the first message.
+    It is the default parser for chat completion."""
+    return completion.choices[0].message.content
+
+def parse_stream_response(completion: ChatCompletionChunk) -> str:
+    """Parse the response of the stream API."""
+    return completion.choices[0].delta.content
+
+def handle_streaming_response(generator: Stream[ChatCompletionChunk]):
+    """Handle the streaming response."""
+    for completion in generator:
+        log.debug(f"Raw chunk completion: {completion}")
+        parsed_content = parse_stream_response(completion)
+        yield parsed_content
+
+class AzureClient(ModelClient):
+    """A component wrapper for the Azure OpenAI API client.
+
+    This client supports both chat completion and embedding APIs through Azure OpenAI.
+    It can be used with both sync and async operations.
+
+    Args:
+        api_key (Optional[str]): Azure OpenAI API key
+        api_version (Optional[str]): API version to use
+        azure_endpoint (Optional[str]): Azure OpenAI endpoint URL (e.g., https://<resource-name>.openai.azure.com/)
+        base_url (Optional[str]): Alternative base URL format (e.g., https://<model-deployment-name>.<region>.models.ai.azure.com)
+        chat_completion_parser (Optional[Callable]): Function to parse chat completions
+        input_type (Literal["text", "messages"]): Format for input
+
+    Environment Variables:
+        AZURE_OPENAI_API_KEY: API key
+        AZURE_OPENAI_ENDPOINT: Endpoint URL (new format)
+        AZURE_BASE_URL: Base URL (alternative format)
+        AZURE_OPENAI_VERSION: API version
+
+    Example:
+        >>> from adalflow.components.model_client import AzureClient
+        >>> client = AzureClient()
+        >>> generator = Generator(
+        ...     model_client=client,
+        ...     model_kwargs={
+        ...         "model": "gpt-4",
+        ...         "temperature": 0.7
+        ...     }
+        ... )
+        >>> response = generator({"input_str": "What is the capital of France?"})
+    """
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        api_version: Optional[str] = None,
+        azure_endpoint: Optional[str] = None,
+        base_url: Optional[str] = None,
+        chat_completion_parser: Callable[[Completion], Any] = None,
+        input_type: Literal["text", "messages"] = "text",
+    ):
+        super().__init__()
+        self._api_key = api_key
+        self._api_version = api_version
+        self._azure_endpoint = azure_endpoint
+        self._base_url = base_url
+        self.sync_client = self.init_sync_client()
+        self.async_client = None
+        self.chat_completion_parser = chat_completion_parser or get_first_message_content
+        self._input_type = input_type
+
+    def _get_endpoint(self) -> str:
+        """Get the appropriate endpoint URL based on available configuration."""
+        # First try the new format endpoint
+        endpoint = self._azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
+        if endpoint:
+            return endpoint
+
+        # Then try the alternative base URL format
+        base_url = self._base_url or os.getenv("AZURE_BASE_URL")
+        if base_url:
+            # If base_url is provided in the format https://<model>.<region>.models.ai.azure.com
+            # we need to extract the model and region
+            if "models.ai.azure.com" in base_url:
+                return base_url.rstrip("/")
+            # If it's just the model name, construct the full URL
+            return f"https://{base_url}.openai.azure.com"
+
+        raise ValueError(
+            "Either AZURE_OPENAI_ENDPOINT or AZURE_BASE_URL must be set. "
+            "Check your deployment page for a URL like: "
+            "https://<resource-name>.openai.azure.com/ or "
+            "https://<model-deployment-name>.<region>.models.ai.azure.com"
+        )
+
+    def init_sync_client(self):
+        api_key = self._api_key or os.getenv("AZURE_OPENAI_API_KEY")
+        api_version = self._api_version or os.getenv("AZURE_OPENAI_VERSION")
+
+        if not api_key:
+            raise ValueError("Environment variable AZURE_OPENAI_API_KEY must be set")
+        if not api_version:
+            raise ValueError("Environment variable AZURE_OPENAI_VERSION must be set")
+
+        endpoint = self._get_endpoint()
+        
+        return AzureOpenAI(
+            api_key=api_key,
+            api_version=api_version,
+            azure_endpoint=endpoint
+        )
+
+    def init_async_client(self):
+        api_key = self._api_key or os.getenv("AZURE_OPENAI_API_KEY")
+        api_version = self._api_version or os.getenv("AZURE_OPENAI_VERSION")
+
+        if not api_key:
+            raise ValueError("Environment variable AZURE_OPENAI_API_KEY must be set")
+        if not api_version:
+            raise ValueError("Environment variable AZURE_OPENAI_VERSION must be set")
+
+        endpoint = self._get_endpoint()
+
+        return AsyncAzureOpenAI(
+            api_key=api_key,
+            api_version=api_version,
+            azure_endpoint=endpoint
+        )
+
+    def convert_inputs_to_api_kwargs(
+        self,
+        input: Optional[Any] = None,
+        model_kwargs: Dict = {},
+        model_type: ModelType = ModelType.UNDEFINED,
+    ) -> Dict:
+        """Convert inputs to Azure OpenAI API kwargs format."""
+        final_model_kwargs = model_kwargs.copy()
+
+        if model_type == ModelType.EMBEDDER:
+            if isinstance(input, str):
+                input = [input]
+            assert isinstance(input, (list, tuple)), "input must be a sequence of text"
+            final_model_kwargs["input"] = input
+        elif model_type == ModelType.LLM:
+            messages = []
+            if input is not None and input != "":
+                if self._input_type == "text":
+                    messages.append({"role": "system", "content": input})
+                else:
+                    messages.extend(input)
+            final_model_kwargs["messages"] = messages
+        else:
+            raise ValueError(f"model_type {model_type} is not supported")
+
+        # Ensure model is specified
+        if "model" not in final_model_kwargs:
+            raise ValueError("model must be specified")
+
+        return final_model_kwargs
+
+    def parse_chat_completion(self, completion: ChatCompletion) -> GeneratorOutput:
+        """Parse chat completion response."""
+        log.debug(f"completion: {completion}")
+        try:
+            data = self.chat_completion_parser(completion)
+            usage = self.track_completion_usage(completion)
+            return GeneratorOutput(data=None, usage=usage, raw_response=data)
+        except Exception as e:
+            log.error(f"Error parsing completion: {e}")
+            return GeneratorOutput(
+                data=None, error=str(e), raw_response=str(completion)
+            )
+
+    def track_completion_usage(self, completion: ChatCompletion) -> CompletionUsage:
+        """Track completion token usage."""
+        usage = completion.usage
+        return CompletionUsage(
+            completion_tokens=usage.completion_tokens,
+            prompt_tokens=usage.prompt_tokens,
+            total_tokens=usage.total_tokens,
+        )
+
+    @backoff.on_exception(
+        backoff.expo,
+        (
+            APITimeoutError,
+            InternalServerError,
+            RateLimitError,
+            UnprocessableEntityError,
+            BadRequestError,
+        ),
+        max_time=5,
+    )
+    def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED):
+        """Make a synchronous call to Azure OpenAI API."""
+        log.info(f"api_kwargs: {api_kwargs}")
+        if model_type == ModelType.EMBEDDER:
+            return self.sync_client.embeddings.create(**api_kwargs)
+        elif model_type == ModelType.LLM:
+            if "stream" in api_kwargs and api_kwargs.get("stream", False):
+                log.debug("streaming call")
+                self.chat_completion_parser = handle_streaming_response
+                return self.sync_client.chat.completions.create(**api_kwargs)
+            return self.sync_client.chat.completions.create(**api_kwargs)
+        else:
+            raise ValueError(f"model_type {model_type} is not supported")
+
+    @backoff.on_exception(
+        backoff.expo,
+        (
+            APITimeoutError,
+            InternalServerError,
+            RateLimitError,
+            UnprocessableEntityError,
+            BadRequestError,
+        ),
+        max_time=5,
+    )
+    async def acall(
+        self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED
+    ):
+        """Make an asynchronous call to Azure OpenAI API."""
+        if self.async_client is None:
+            self.async_client = self.init_async_client()
+        if model_type == ModelType.EMBEDDER:
+            return await self.async_client.embeddings.create(**api_kwargs)
+        elif model_type == ModelType.LLM:
+            return await self.async_client.chat.completions.create(**api_kwargs)
+        else:
+            raise ValueError(f"model_type {model_type} is not supported")
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'AzureClient':
+        """Create an instance from a dictionary."""
+        obj = super().from_dict(data)
+        obj.sync_client = obj.init_sync_client()
+        obj.async_client = obj.init_async_client()
+        return obj
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert the instance to a dictionary."""
+        exclude = ["sync_client", "async_client"]
+        output = super().to_dict(exclude=exclude)
+        return output 
\ No newline at end of file
diff --git a/tutorials/azure_example.py b/tutorials/azure_example.py
new file mode 100644
index 000000000..030cf634f
--- /dev/null
+++ b/tutorials/azure_example.py
@@ -0,0 +1,187 @@
+"""Example script demonstrating Azure OpenAI client usage in AdalFlow."""
+
+import os
+import asyncio
+from adalflow.components.model_client import AzureClient
+from adalflow.core.generator import Generator
+from adalflow.core.types import ModelType
+
+# Demo configuration - Replace these with your actual values
+DEMO_CONFIG = {
+    "api_key": "your-api-key",  # From Azure Portal > Keys and Endpoint
+    "azure_endpoint": "https://your-resource.openai.azure.com/",  # Your Azure OpenAI endpoint
+    "api_version": "2024-02-15-preview",  # Current API version
+    "deployment_name": "gpt-35-turbo"  # Your model deployment name
+}
+
+def setup_environment():
+    """Setup environment variables if not already set."""
+    if not os.getenv("AZURE_OPENAI_API_KEY"):
+        print("Setting up demo environment variables...")
+        os.environ["AZURE_OPENAI_API_KEY"] = DEMO_CONFIG["api_key"]
+        os.environ["AZURE_OPENAI_ENDPOINT"] = DEMO_CONFIG["azure_endpoint"]
+        os.environ["AZURE_OPENAI_VERSION"] = DEMO_CONFIG["api_version"]
+    else:
+        print("Using existing environment variables...")
+
+def test_chat_completion():
+    """Test chat completion with Azure OpenAI."""
+    print("\nTesting chat completion...")
+    client = AzureClient()
+    generator = Generator(
+        model_client=client,
+        model_kwargs={
+            "model": DEMO_CONFIG["deployment_name"],
+            "temperature": 0.7,
+        },
+        model_type=ModelType.LLM
+    )
+
+    # Single turn conversation
+    response = generator("What is the capital of France?")
+    print("\nChat Completion Response:")
+    print(f"Content: {response.raw_response}")
+    print(f"Usage: {response.usage}")
+    print(f"Error: {response.error}")
+
+    # Multi-turn conversation
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "What is the capital of France?"},
+        {"role": "assistant", "content": "The capital of France is Paris."},
+        {"role": "user", "content": "What is its population?"}
+    ]
+    
+    client = AzureClient(input_type="messages")
+    generator = Generator(
+        model_client=client,
+        model_kwargs={
+            "model": DEMO_CONFIG["deployment_name"],
+            "temperature": 0.7,
+        },
+        model_type=ModelType.LLM
+    )
+    
+    response = generator(messages)
+    print("\nMulti-turn Conversation Response:")
+    print(f"Content: {response.raw_response}")
+    print(f"Usage: {response.usage}")
+    print(f"Error: {response.error}")
+
+def test_embeddings():
+    """Test embeddings with Azure OpenAI."""
+    print("\nTesting embeddings...")
+    client = AzureClient()
+    generator = Generator(
+        model_client=client,
+        model_kwargs={
+            "model": "text-embedding-ada-002",  # Standard embedding model name
+        },
+        model_type=ModelType.EMBEDDER
+    )
+
+    # Single text embedding
+    response = generator("Hello, world!")
+    print("\nSingle Text Embedding Response:")
+    print(f"Embedding shape: {len(response.raw_response)}")
+    print(f"Usage: {response.usage}")
+    print(f"Error: {response.error}")
+
+    # Multiple text embeddings
+    texts = ["Hello, world!", "How are you?", "Nice to meet you!"]
+    response = generator(texts)
+    print("\nMultiple Text Embeddings Response:")
+    print(f"Number of embeddings: {len(response.raw_response)}")
+    print(f"Usage: {response.usage}")
+    print(f"Error: {response.error}")
+
+async def test_async_chat():
+    """Test async chat completion with Azure OpenAI."""
+    print("\nTesting async chat completion...")
+    client = AzureClient()
+    generator = Generator(
+        model_client=client,
+        model_kwargs={
+            "model": DEMO_CONFIG["deployment_name"],
+            "temperature": 0.7,
+        },
+        model_type=ModelType.LLM
+    )
+
+    response = await generator.acall("What is the capital of France?")
+    print("\nAsync Chat Completion Response:")
+    print(f"Content: {response.raw_response}")
+    print(f"Usage: {response.usage}")
+    print(f"Error: {response.error}")
+
+def test_streaming():
+    """Test streaming chat completion with Azure OpenAI."""
+    print("\nTesting streaming chat completion...")
+    client = AzureClient()
+    generator = Generator(
+        model_client=client,
+        model_kwargs={
+            "model": DEMO_CONFIG["deployment_name"],
+            "temperature": 0.7,
+            "stream": True
+        },
+        model_type=ModelType.LLM
+    )
+
+    print("\nStreaming Chat Completion Response:")
+    for chunk in generator("Tell me a short story about a cat."):
+        if chunk.raw_response:
+            print(chunk.raw_response, end="", flush=True)
+    print("\n")
+
+def check_requirements():
+    """Check if all required packages are installed."""
+    try:
+        import openai
+        import azure.identity
+        import azure.mgmt.cognitiveservices
+        print("All required packages are installed.")
+        return True
+    except ImportError as e:
+        print(f"Missing required package: {e}")
+        print("Please install required packages:")
+        print("poetry add openai azure-identity azure-mgmt-cognitiveservices")
+        return False
+
+if __name__ == "__main__":
+    print("Azure OpenAI Client Test Script")
+    print("==============================")
+    
+    if not check_requirements():
+        exit(1)
+
+    setup_environment()
+
+    # Check for required environment variables
+    required_vars = ["AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_VERSION"]
+    missing_vars = [var for var in required_vars if not os.getenv(var)]
+    if missing_vars:
+        print(f"Missing required environment variables: {', '.join(missing_vars)}")
+        print("Please set them before running this script.")
+        print("\nYou can set them in your environment:")
+        print("export AZURE_OPENAI_API_KEY='your-key'")
+        print("export AZURE_OPENAI_ENDPOINT='your-endpoint'")
+        print("export AZURE_OPENAI_VERSION='api-version'")
+        print("\nOr update the DEMO_CONFIG in this script.")
+        exit(1)
+
+    print("\nStarting Azure OpenAI tests...")
+    
+    try:
+        # Test synchronous operations
+        test_chat_completion()
+        test_embeddings()
+        test_streaming()
+        
+        # Test asynchronous operations
+        asyncio.run(test_async_chat())
+        
+        print("\nAll tests completed!")
+    except Exception as e:
+        print(f"\nError during testing: {e}")
+        print("\nPlease check your Azure OpenAI setup and credentials.") 
\ No newline at end of file

From fd8097496ab13c9b25c2f66d275b473ba74ea2f1 Mon Sep 17 00:00:00 2001
From: fm1320 <filipmakraduli.10@gmail.com>
Date: Thu, 16 Jan 2025 23:54:00 +0000
Subject: [PATCH 2/5] no subclass for image generation proposal

---
 .../components/model_client/openai_client.py  | 117 +++++++++++++-----
 adalflow/adalflow/core/generator.py           |  15 +++
 .../multimodal_client_testing_examples.py     |  14 +--
 3 files changed, 104 insertions(+), 42 deletions(-)

diff --git a/adalflow/adalflow/components/model_client/openai_client.py b/adalflow/adalflow/components/model_client/openai_client.py
index c3750667d..e3b7d01a7 100644
--- a/adalflow/adalflow/components/model_client/openai_client.py
+++ b/adalflow/adalflow/components/model_client/openai_client.py
@@ -243,7 +243,18 @@ def convert_inputs_to_api_kwargs(
                 - images: Optional image source(s) as path, URL, or list of them
                 - detail: Image detail level ('auto', 'low', or 'high'), defaults to 'auto'
                 - model: The model to use (must support multimodal inputs if images are provided)
-            model_type: The type of model (EMBEDDER or LLM)
+                For image generation:
+                - model: "dall-e-3" or "dall-e-2"
+                - size: "1024x1024", "1024x1792", or "1792x1024" for DALL-E 3; "256x256", "512x512", or "1024x1024" for DALL-E 2
+                - quality: "standard" or "hd" (DALL-E 3 only)
+                - n: Number of images (1 for DALL-E 3, 1-10 for DALL-E 2)
+                - response_format: "url" or "b64_json"
+                For image edits (DALL-E 2 only):
+                - image: Path to the input image
+                - mask: Path to the mask image
+                For variations (DALL-E 2 only):
+                - image: Path to the input image
+            model_type: The type of model (EMBEDDER, LLM, or IMAGE_GENERATION)
 
         Returns:
             Dict: API-specific kwargs for the model call
@@ -308,20 +319,44 @@ def convert_inputs_to_api_kwargs(
             # Ensure model is specified
             if "model" not in final_model_kwargs:
                 raise ValueError("model must be specified for image generation")
-            # Set defaults for DALL-E 3 if not specified
-            final_model_kwargs["size"] = final_model_kwargs.get("size", "1024x1024")
-            final_model_kwargs["quality"] = final_model_kwargs.get("quality", "standard")
-            final_model_kwargs["n"] = final_model_kwargs.get("n", 1)
-            final_model_kwargs["response_format"] = final_model_kwargs.get("response_format", "url")
-
-            # Handle image edits and variations
-            image = final_model_kwargs.get("image")
-            if isinstance(image, str) and os.path.isfile(image):
-                final_model_kwargs["image"] = self._encode_image(image)
             
-            mask = final_model_kwargs.get("mask")
-            if isinstance(mask, str) and os.path.isfile(mask):
-                final_model_kwargs["mask"] = self._encode_image(mask)
+            # Set defaults for image generation
+            if "operation" not in final_model_kwargs:
+                final_model_kwargs["operation"] = "generate"  # Default operation
+            
+            operation = final_model_kwargs.pop("operation")
+            
+            if operation == "generate":
+                # Set defaults for DALL-E 3 if not specified
+                final_model_kwargs["size"] = final_model_kwargs.get("size", "1024x1024")
+                final_model_kwargs["quality"] = final_model_kwargs.get("quality", "standard")
+                final_model_kwargs["n"] = final_model_kwargs.get("n", 1)
+                final_model_kwargs["response_format"] = final_model_kwargs.get("response_format", "url")
+            
+            elif operation in ["edit", "variation"]:
+                if "model" not in final_model_kwargs or final_model_kwargs["model"] != "dall-e-2":
+                    raise ValueError(f"{operation} operation is only available with DALL-E 2")
+                
+                # Handle image input
+                image_path = final_model_kwargs.get("image")
+                if not image_path or not os.path.isfile(image_path):
+                    raise ValueError(f"Valid image path must be provided for {operation}")
+                final_model_kwargs["image"] = open(image_path, "rb")
+                
+                # Handle mask for edit operation
+                if operation == "edit":
+                    mask_path = final_model_kwargs.get("mask")
+                    if not mask_path or not os.path.isfile(mask_path):
+                        raise ValueError("Valid mask path must be provided for edit operation")
+                    final_model_kwargs["mask"] = open(mask_path, "rb")
+                
+                # Set defaults
+                final_model_kwargs["size"] = final_model_kwargs.get("size", "1024x1024")
+                final_model_kwargs["n"] = final_model_kwargs.get("n", 1)
+                final_model_kwargs["response_format"] = final_model_kwargs.get("response_format", "url")
+            
+            else:
+                raise ValueError(f"Invalid operation: {operation}")
         else:
             raise ValueError(f"model_type {model_type} is not supported")
         return final_model_kwargs
@@ -371,18 +406,25 @@ def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINE
                 return self.sync_client.chat.completions.create(**api_kwargs)
             return self.sync_client.chat.completions.create(**api_kwargs)
         elif model_type == ModelType.IMAGE_GENERATION:
-            # Determine which image API to call based on the presence of image/mask
-            if "image" in api_kwargs:
-                if "mask" in api_kwargs:
-                    # Image edit
+            operation = api_kwargs.pop("operation", "generate")
+            
+            try:
+                if operation == "generate":
+                    response = self.sync_client.images.generate(**api_kwargs)
+                elif operation == "edit":
                     response = self.sync_client.images.edit(**api_kwargs)
-                else:
-                    # Image variation
+                elif operation == "variation":
                     response = self.sync_client.images.create_variation(**api_kwargs)
-            else:
-                # Image generation
-                response = self.sync_client.images.generate(**api_kwargs)
-            return response.data
+                else:
+                    raise ValueError(f"Invalid operation: {operation}")
+                
+                return response.data
+            finally:
+                # Clean up file handles if they exist
+                if "image" in api_kwargs and hasattr(api_kwargs["image"], "close"):
+                    api_kwargs["image"].close()
+                if "mask" in api_kwargs and hasattr(api_kwargs["mask"], "close"):
+                    api_kwargs["mask"].close()
         else:
             raise ValueError(f"model_type {model_type} is not supported")
 
@@ -410,18 +452,25 @@ async def acall(
         elif model_type == ModelType.LLM:
             return await self.async_client.chat.completions.create(**api_kwargs)
         elif model_type == ModelType.IMAGE_GENERATION:
-            # Determine which image API to call based on the presence of image/mask
-            if "image" in api_kwargs:
-                if "mask" in api_kwargs:
-                    # Image edit
+            operation = api_kwargs.pop("operation", "generate")
+            
+            try:
+                if operation == "generate":
+                    response = await self.async_client.images.generate(**api_kwargs)
+                elif operation == "edit":
                     response = await self.async_client.images.edit(**api_kwargs)
-                else:
-                    # Image variation
+                elif operation == "variation":
                     response = await self.async_client.images.create_variation(**api_kwargs)
-            else:
-                # Image generation
-                response = await self.async_client.images.generate(**api_kwargs)
-            return response.data
+                else:
+                    raise ValueError(f"Invalid operation: {operation}")
+                
+                return response.data
+            finally:
+                # Clean up file handles if they exist
+                if "image" in api_kwargs and hasattr(api_kwargs["image"], "close"):
+                    api_kwargs["image"].close()
+                if "mask" in api_kwargs and hasattr(api_kwargs["mask"], "close"):
+                    api_kwargs["mask"].close()
         else:
             raise ValueError(f"model_type {model_type} is not supported")
 
diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
index baedd8fb7..a1714bbd1 100644
--- a/adalflow/adalflow/core/generator.py
+++ b/adalflow/adalflow/core/generator.py
@@ -100,6 +100,8 @@ def __init__(
         # args for the cache
         cache_path: Optional[str] = None,
         use_cache: bool = False,
+        # args for model type
+        model_type: ModelType = ModelType.LLM,
     ) -> None:
         r"""The default prompt is set to the DEFAULT_ADALFLOW_SYSTEM_PROMPT. It has the following variables:
         - task_desc_str
@@ -110,6 +112,17 @@ def __init__(
         - steps_str
         You can preset the prompt kwargs to fill in the variables in the prompt using prompt_kwargs.
         But you can replace the prompt and set any variables you want and use the prompt_kwargs to fill in the variables.
+
+        Args:
+            model_client (ModelClient): The model client to use for the generator.
+            model_kwargs (Dict[str, Any], optional): The model kwargs to pass to the model client. Defaults to {}. Please refer to :ref:`ModelClient<components-model_client>` for the details on how to set the model_kwargs for your specific model if it is from our library.
+            template (Optional[str], optional): The template for the prompt.  Defaults to :ref:`DEFAULT_ADALFLOW_SYSTEM_PROMPT<core-default_prompt_template>`.
+            prompt_kwargs (Optional[Dict], optional): The preset prompt kwargs to fill in the variables in the prompt. Defaults to None.
+            output_processors (Optional[Component], optional):  The output processors after model call. It can be a single component or a chained component via ``Sequential``. Defaults to None.
+            name (Optional[str], optional): The name of the generator. Defaults to None.
+            cache_path (Optional[str], optional): The path to save the cache. Defaults to None.
+            use_cache (bool, optional): Whether to use cache. Defaults to False.
+            model_type (ModelType, optional): The type of model (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM.
         """
 
         if not isinstance(model_client, ModelClient):
@@ -133,6 +146,7 @@ def __init__(
         CallbackManager.__init__(self)
 
         self.name = name or self.__class__.__name__
+        self.model_type = model_type
 
         self._init_prompt(template, prompt_kwargs)
 
@@ -163,6 +177,7 @@ def __init__(
             "name": name,
             "cache_path": cache_path,
             "use_cache": use_cache,
+            "model_type": model_type,
         }
         self._teacher: Optional["Generator"] = None
         self._trace_api_kwargs: Dict[str, Any] = (
diff --git a/tutorials/multimodal_client_testing_examples.py b/tutorials/multimodal_client_testing_examples.py
index 44bdbc69e..2c1180de1 100644
--- a/tutorials/multimodal_client_testing_examples.py
+++ b/tutorials/multimodal_client_testing_examples.py
@@ -23,10 +23,6 @@
 from typing import List
 from numpy.linalg import norm
 
-class ImageGenerator(Generator):
-    """Generator subclass for image generation."""
-    model_type = ModelType.IMAGE_GENERATION
-
 def test_basic_generation():
     """Test basic text generation"""
     client = OpenAIClient()
@@ -61,14 +57,15 @@ def test_invalid_image_url():
 def test_invalid_image_generation():
     """Test DALL-E generation with invalid parameters"""
     client = OpenAIClient()
-    gen = ImageGenerator(
+    gen = Generator(
         model_client=client,
         model_kwargs={
             "model": "dall-e-3",
             "size": "invalid_size",  # Invalid size parameter
             "quality": "standard",
             "n": 1
-        }
+        },
+        model_type=ModelType.IMAGE_GENERATION
     )
     
     print("\n=== Testing Invalid DALL-E Parameters ===")
@@ -94,14 +91,15 @@ def test_vision_and_generation():
     print(f"Description: {vision_response.raw_response}")
 
     # 2. Test DALL-E Image Generation
-    dalle_gen = ImageGenerator(
+    dalle_gen = Generator(
         model_client=client,
         model_kwargs={
             "model": "dall-e-3",
             "size": "1024x1024",
             "quality": "standard",
             "n": 1
-        }
+        },
+        model_type=ModelType.IMAGE_GENERATION
     )
     
     # For image generation, input_str becomes the prompt

From 41455959e0905dae44cc6b91741b2ae66f2561b7 Mon Sep 17 00:00:00 2001
From: fm1320 <filipmakraduli.10@gmail.com>
Date: Fri, 17 Jan 2025 03:39:36 +0000
Subject: [PATCH 3/5] undo accidental commit

---
 .../components/model_client/azure_client.py   | 273 ------------------
 tutorials/azure_example.py                    | 187 ------------
 2 files changed, 460 deletions(-)
 delete mode 100644 adalflow/adalflow/components/model_client/azure_client.py
 delete mode 100644 tutorials/azure_example.py

diff --git a/adalflow/adalflow/components/model_client/azure_client.py b/adalflow/adalflow/components/model_client/azure_client.py
deleted file mode 100644
index 7bbfe65dc..000000000
--- a/adalflow/adalflow/components/model_client/azure_client.py
+++ /dev/null
@@ -1,273 +0,0 @@
-"""Azure OpenAI ModelClient integration."""
-
-import os
-from typing import Dict, Optional, Any, Callable, Literal
-import backoff
-import logging
-
-from adalflow.core.model_client import ModelClient
-from adalflow.core.types import ModelType, CompletionUsage, GeneratorOutput
-
-# optional import
-from adalflow.utils.lazy_import import safe_import, OptionalPackages
-
-openai = safe_import(OptionalPackages.OPENAI.value[0], OptionalPackages.OPENAI.value[1])
-
-from openai import AzureOpenAI, AsyncAzureOpenAI, Stream
-from openai import (
-    APITimeoutError,
-    InternalServerError,
-    RateLimitError,
-    UnprocessableEntityError,
-    BadRequestError,
-)
-from openai.types import (
-    Completion,
-    CreateEmbeddingResponse,
-)
-from openai.types.chat import ChatCompletionChunk, ChatCompletion
-from adalflow.components.model_client.utils import parse_embedding_response
-
-log = logging.getLogger(__name__)
-
-def get_first_message_content(completion: ChatCompletion) -> str:
-    """When we only need the content of the first message.
-    It is the default parser for chat completion."""
-    return completion.choices[0].message.content
-
-def parse_stream_response(completion: ChatCompletionChunk) -> str:
-    """Parse the response of the stream API."""
-    return completion.choices[0].delta.content
-
-def handle_streaming_response(generator: Stream[ChatCompletionChunk]):
-    """Handle the streaming response."""
-    for completion in generator:
-        log.debug(f"Raw chunk completion: {completion}")
-        parsed_content = parse_stream_response(completion)
-        yield parsed_content
-
-class AzureClient(ModelClient):
-    """A component wrapper for the Azure OpenAI API client.
-
-    This client supports both chat completion and embedding APIs through Azure OpenAI.
-    It can be used with both sync and async operations.
-
-    Args:
-        api_key (Optional[str]): Azure OpenAI API key
-        api_version (Optional[str]): API version to use
-        azure_endpoint (Optional[str]): Azure OpenAI endpoint URL (e.g., https://<resource-name>.openai.azure.com/)
-        base_url (Optional[str]): Alternative base URL format (e.g., https://<model-deployment-name>.<region>.models.ai.azure.com)
-        chat_completion_parser (Optional[Callable]): Function to parse chat completions
-        input_type (Literal["text", "messages"]): Format for input
-
-    Environment Variables:
-        AZURE_OPENAI_API_KEY: API key
-        AZURE_OPENAI_ENDPOINT: Endpoint URL (new format)
-        AZURE_BASE_URL: Base URL (alternative format)
-        AZURE_OPENAI_VERSION: API version
-
-    Example:
-        >>> from adalflow.components.model_client import AzureClient
-        >>> client = AzureClient()
-        >>> generator = Generator(
-        ...     model_client=client,
-        ...     model_kwargs={
-        ...         "model": "gpt-4",
-        ...         "temperature": 0.7
-        ...     }
-        ... )
-        >>> response = generator({"input_str": "What is the capital of France?"})
-    """
-
-    def __init__(
-        self,
-        api_key: Optional[str] = None,
-        api_version: Optional[str] = None,
-        azure_endpoint: Optional[str] = None,
-        base_url: Optional[str] = None,
-        chat_completion_parser: Callable[[Completion], Any] = None,
-        input_type: Literal["text", "messages"] = "text",
-    ):
-        super().__init__()
-        self._api_key = api_key
-        self._api_version = api_version
-        self._azure_endpoint = azure_endpoint
-        self._base_url = base_url
-        self.sync_client = self.init_sync_client()
-        self.async_client = None
-        self.chat_completion_parser = chat_completion_parser or get_first_message_content
-        self._input_type = input_type
-
-    def _get_endpoint(self) -> str:
-        """Get the appropriate endpoint URL based on available configuration."""
-        # First try the new format endpoint
-        endpoint = self._azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
-        if endpoint:
-            return endpoint
-
-        # Then try the alternative base URL format
-        base_url = self._base_url or os.getenv("AZURE_BASE_URL")
-        if base_url:
-            # If base_url is provided in the format https://<model>.<region>.models.ai.azure.com
-            # we need to extract the model and region
-            if "models.ai.azure.com" in base_url:
-                return base_url.rstrip("/")
-            # If it's just the model name, construct the full URL
-            return f"https://{base_url}.openai.azure.com"
-
-        raise ValueError(
-            "Either AZURE_OPENAI_ENDPOINT or AZURE_BASE_URL must be set. "
-            "Check your deployment page for a URL like: "
-            "https://<resource-name>.openai.azure.com/ or "
-            "https://<model-deployment-name>.<region>.models.ai.azure.com"
-        )
-
-    def init_sync_client(self):
-        api_key = self._api_key or os.getenv("AZURE_OPENAI_API_KEY")
-        api_version = self._api_version or os.getenv("AZURE_OPENAI_VERSION")
-
-        if not api_key:
-            raise ValueError("Environment variable AZURE_OPENAI_API_KEY must be set")
-        if not api_version:
-            raise ValueError("Environment variable AZURE_OPENAI_VERSION must be set")
-
-        endpoint = self._get_endpoint()
-        
-        return AzureOpenAI(
-            api_key=api_key,
-            api_version=api_version,
-            azure_endpoint=endpoint
-        )
-
-    def init_async_client(self):
-        api_key = self._api_key or os.getenv("AZURE_OPENAI_API_KEY")
-        api_version = self._api_version or os.getenv("AZURE_OPENAI_VERSION")
-
-        if not api_key:
-            raise ValueError("Environment variable AZURE_OPENAI_API_KEY must be set")
-        if not api_version:
-            raise ValueError("Environment variable AZURE_OPENAI_VERSION must be set")
-
-        endpoint = self._get_endpoint()
-
-        return AsyncAzureOpenAI(
-            api_key=api_key,
-            api_version=api_version,
-            azure_endpoint=endpoint
-        )
-
-    def convert_inputs_to_api_kwargs(
-        self,
-        input: Optional[Any] = None,
-        model_kwargs: Dict = {},
-        model_type: ModelType = ModelType.UNDEFINED,
-    ) -> Dict:
-        """Convert inputs to Azure OpenAI API kwargs format."""
-        final_model_kwargs = model_kwargs.copy()
-
-        if model_type == ModelType.EMBEDDER:
-            if isinstance(input, str):
-                input = [input]
-            assert isinstance(input, (list, tuple)), "input must be a sequence of text"
-            final_model_kwargs["input"] = input
-        elif model_type == ModelType.LLM:
-            messages = []
-            if input is not None and input != "":
-                if self._input_type == "text":
-                    messages.append({"role": "system", "content": input})
-                else:
-                    messages.extend(input)
-            final_model_kwargs["messages"] = messages
-        else:
-            raise ValueError(f"model_type {model_type} is not supported")
-
-        # Ensure model is specified
-        if "model" not in final_model_kwargs:
-            raise ValueError("model must be specified")
-
-        return final_model_kwargs
-
-    def parse_chat_completion(self, completion: ChatCompletion) -> GeneratorOutput:
-        """Parse chat completion response."""
-        log.debug(f"completion: {completion}")
-        try:
-            data = self.chat_completion_parser(completion)
-            usage = self.track_completion_usage(completion)
-            return GeneratorOutput(data=None, usage=usage, raw_response=data)
-        except Exception as e:
-            log.error(f"Error parsing completion: {e}")
-            return GeneratorOutput(
-                data=None, error=str(e), raw_response=str(completion)
-            )
-
-    def track_completion_usage(self, completion: ChatCompletion) -> CompletionUsage:
-        """Track completion token usage."""
-        usage = completion.usage
-        return CompletionUsage(
-            completion_tokens=usage.completion_tokens,
-            prompt_tokens=usage.prompt_tokens,
-            total_tokens=usage.total_tokens,
-        )
-
-    @backoff.on_exception(
-        backoff.expo,
-        (
-            APITimeoutError,
-            InternalServerError,
-            RateLimitError,
-            UnprocessableEntityError,
-            BadRequestError,
-        ),
-        max_time=5,
-    )
-    def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED):
-        """Make a synchronous call to Azure OpenAI API."""
-        log.info(f"api_kwargs: {api_kwargs}")
-        if model_type == ModelType.EMBEDDER:
-            return self.sync_client.embeddings.create(**api_kwargs)
-        elif model_type == ModelType.LLM:
-            if "stream" in api_kwargs and api_kwargs.get("stream", False):
-                log.debug("streaming call")
-                self.chat_completion_parser = handle_streaming_response
-                return self.sync_client.chat.completions.create(**api_kwargs)
-            return self.sync_client.chat.completions.create(**api_kwargs)
-        else:
-            raise ValueError(f"model_type {model_type} is not supported")
-
-    @backoff.on_exception(
-        backoff.expo,
-        (
-            APITimeoutError,
-            InternalServerError,
-            RateLimitError,
-            UnprocessableEntityError,
-            BadRequestError,
-        ),
-        max_time=5,
-    )
-    async def acall(
-        self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED
-    ):
-        """Make an asynchronous call to Azure OpenAI API."""
-        if self.async_client is None:
-            self.async_client = self.init_async_client()
-        if model_type == ModelType.EMBEDDER:
-            return await self.async_client.embeddings.create(**api_kwargs)
-        elif model_type == ModelType.LLM:
-            return await self.async_client.chat.completions.create(**api_kwargs)
-        else:
-            raise ValueError(f"model_type {model_type} is not supported")
-
-    @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> 'AzureClient':
-        """Create an instance from a dictionary."""
-        obj = super().from_dict(data)
-        obj.sync_client = obj.init_sync_client()
-        obj.async_client = obj.init_async_client()
-        return obj
-
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert the instance to a dictionary."""
-        exclude = ["sync_client", "async_client"]
-        output = super().to_dict(exclude=exclude)
-        return output 
\ No newline at end of file
diff --git a/tutorials/azure_example.py b/tutorials/azure_example.py
deleted file mode 100644
index 030cf634f..000000000
--- a/tutorials/azure_example.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""Example script demonstrating Azure OpenAI client usage in AdalFlow."""
-
-import os
-import asyncio
-from adalflow.components.model_client import AzureClient
-from adalflow.core.generator import Generator
-from adalflow.core.types import ModelType
-
-# Demo configuration - Replace these with your actual values
-DEMO_CONFIG = {
-    "api_key": "your-api-key",  # From Azure Portal > Keys and Endpoint
-    "azure_endpoint": "https://your-resource.openai.azure.com/",  # Your Azure OpenAI endpoint
-    "api_version": "2024-02-15-preview",  # Current API version
-    "deployment_name": "gpt-35-turbo"  # Your model deployment name
-}
-
-def setup_environment():
-    """Setup environment variables if not already set."""
-    if not os.getenv("AZURE_OPENAI_API_KEY"):
-        print("Setting up demo environment variables...")
-        os.environ["AZURE_OPENAI_API_KEY"] = DEMO_CONFIG["api_key"]
-        os.environ["AZURE_OPENAI_ENDPOINT"] = DEMO_CONFIG["azure_endpoint"]
-        os.environ["AZURE_OPENAI_VERSION"] = DEMO_CONFIG["api_version"]
-    else:
-        print("Using existing environment variables...")
-
-def test_chat_completion():
-    """Test chat completion with Azure OpenAI."""
-    print("\nTesting chat completion...")
-    client = AzureClient()
-    generator = Generator(
-        model_client=client,
-        model_kwargs={
-            "model": DEMO_CONFIG["deployment_name"],
-            "temperature": 0.7,
-        },
-        model_type=ModelType.LLM
-    )
-
-    # Single turn conversation
-    response = generator("What is the capital of France?")
-    print("\nChat Completion Response:")
-    print(f"Content: {response.raw_response}")
-    print(f"Usage: {response.usage}")
-    print(f"Error: {response.error}")
-
-    # Multi-turn conversation
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "What is the capital of France?"},
-        {"role": "assistant", "content": "The capital of France is Paris."},
-        {"role": "user", "content": "What is its population?"}
-    ]
-    
-    client = AzureClient(input_type="messages")
-    generator = Generator(
-        model_client=client,
-        model_kwargs={
-            "model": DEMO_CONFIG["deployment_name"],
-            "temperature": 0.7,
-        },
-        model_type=ModelType.LLM
-    )
-    
-    response = generator(messages)
-    print("\nMulti-turn Conversation Response:")
-    print(f"Content: {response.raw_response}")
-    print(f"Usage: {response.usage}")
-    print(f"Error: {response.error}")
-
-def test_embeddings():
-    """Test embeddings with Azure OpenAI."""
-    print("\nTesting embeddings...")
-    client = AzureClient()
-    generator = Generator(
-        model_client=client,
-        model_kwargs={
-            "model": "text-embedding-ada-002",  # Standard embedding model name
-        },
-        model_type=ModelType.EMBEDDER
-    )
-
-    # Single text embedding
-    response = generator("Hello, world!")
-    print("\nSingle Text Embedding Response:")
-    print(f"Embedding shape: {len(response.raw_response)}")
-    print(f"Usage: {response.usage}")
-    print(f"Error: {response.error}")
-
-    # Multiple text embeddings
-    texts = ["Hello, world!", "How are you?", "Nice to meet you!"]
-    response = generator(texts)
-    print("\nMultiple Text Embeddings Response:")
-    print(f"Number of embeddings: {len(response.raw_response)}")
-    print(f"Usage: {response.usage}")
-    print(f"Error: {response.error}")
-
-async def test_async_chat():
-    """Test async chat completion with Azure OpenAI."""
-    print("\nTesting async chat completion...")
-    client = AzureClient()
-    generator = Generator(
-        model_client=client,
-        model_kwargs={
-            "model": DEMO_CONFIG["deployment_name"],
-            "temperature": 0.7,
-        },
-        model_type=ModelType.LLM
-    )
-
-    response = await generator.acall("What is the capital of France?")
-    print("\nAsync Chat Completion Response:")
-    print(f"Content: {response.raw_response}")
-    print(f"Usage: {response.usage}")
-    print(f"Error: {response.error}")
-
-def test_streaming():
-    """Test streaming chat completion with Azure OpenAI."""
-    print("\nTesting streaming chat completion...")
-    client = AzureClient()
-    generator = Generator(
-        model_client=client,
-        model_kwargs={
-            "model": DEMO_CONFIG["deployment_name"],
-            "temperature": 0.7,
-            "stream": True
-        },
-        model_type=ModelType.LLM
-    )
-
-    print("\nStreaming Chat Completion Response:")
-    for chunk in generator("Tell me a short story about a cat."):
-        if chunk.raw_response:
-            print(chunk.raw_response, end="", flush=True)
-    print("\n")
-
-def check_requirements():
-    """Check if all required packages are installed."""
-    try:
-        import openai
-        import azure.identity
-        import azure.mgmt.cognitiveservices
-        print("All required packages are installed.")
-        return True
-    except ImportError as e:
-        print(f"Missing required package: {e}")
-        print("Please install required packages:")
-        print("poetry add openai azure-identity azure-mgmt-cognitiveservices")
-        return False
-
-if __name__ == "__main__":
-    print("Azure OpenAI Client Test Script")
-    print("==============================")
-    
-    if not check_requirements():
-        exit(1)
-
-    setup_environment()
-
-    # Check for required environment variables
-    required_vars = ["AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_VERSION"]
-    missing_vars = [var for var in required_vars if not os.getenv(var)]
-    if missing_vars:
-        print(f"Missing required environment variables: {', '.join(missing_vars)}")
-        print("Please set them before running this script.")
-        print("\nYou can set them in your environment:")
-        print("export AZURE_OPENAI_API_KEY='your-key'")
-        print("export AZURE_OPENAI_ENDPOINT='your-endpoint'")
-        print("export AZURE_OPENAI_VERSION='api-version'")
-        print("\nOr update the DEMO_CONFIG in this script.")
-        exit(1)
-
-    print("\nStarting Azure OpenAI tests...")
-    
-    try:
-        # Test synchronous operations
-        test_chat_completion()
-        test_embeddings()
-        test_streaming()
-        
-        # Test asynchronous operations
-        asyncio.run(test_async_chat())
-        
-        print("\nAll tests completed!")
-    except Exception as e:
-        print(f"\nError during testing: {e}")
-        print("\nPlease check your Azure OpenAI setup and credentials.") 
\ No newline at end of file

From d121c18bcfb911e6600b9ffb3ddc96fd2c9135af Mon Sep 17 00:00:00 2001
From: fm1320 <filipmakraduli.10@gmail.com>
Date: Mon, 20 Jan 2025 10:54:34 +0100
Subject: [PATCH 4/5] fix kwargs implementation

---
 .../components/model_client/openai_client.py  | 28 ++++++++-------
 adalflow/adalflow/core/generator.py           | 34 +++----------------
 .../multimodal_client_testing_examples.py     | 24 ++++++-------
 3 files changed, 31 insertions(+), 55 deletions(-)

diff --git a/adalflow/adalflow/components/model_client/openai_client.py b/adalflow/adalflow/components/model_client/openai_client.py
index e3b7d01a7..4a13fe8d7 100644
--- a/adalflow/adalflow/components/model_client/openai_client.py
+++ b/adalflow/adalflow/components/model_client/openai_client.py
@@ -106,6 +106,12 @@ class OpenAIClient(ModelClient):
     Users (1) simplify use ``Embedder`` and ``Generator`` components by passing OpenAIClient() as the model_client.
     (2) can use this as an example to create their own API client or extend this class(copying and modifing the code) in their own project.
 
+    Args:
+        api_key (Optional[str], optional): OpenAI API key. Defaults to None.
+        chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None.
+        input_type (Literal["text", "messages"], optional): The type of input to use. Defaults to "text".
+        model_type (ModelType, optional): The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM.
+
     Note:
         We suggest users not to use `response_format` to enforce output data type or `tools` and `tool_choice`  in your model_kwargs when calling the API.
         We do not know how OpenAI is doing the formating or what prompt they have added.
@@ -120,14 +126,9 @@ class OpenAIClient(ModelClient):
         - prompt: Text description of the image to generate
         - size: "1024x1024", "1024x1792", or "1792x1024" for DALL-E 3; "256x256", "512x512", or "1024x1024" for DALL-E 2
         - quality: "standard" or "hd" (DALL-E 3 only)
-        - n: Number of images to generate (1 for DALL-E 3, 1-10 for DALL-E 2)
+        - n: Number of images (1 for DALL-E 3, 1-10 for DALL-E 2)
         - response_format: "url" or "b64_json"
 
-    Args:
-        api_key (Optional[str], optional): OpenAI API key. Defaults to None.
-        chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None.
-            Default is `get_first_message_content`.
-
     References:
         - Embeddings models: https://platform.openai.com/docs/guides/embeddings
         - Chat models: https://platform.openai.com/docs/guides/text-generation
@@ -141,11 +142,15 @@ def __init__(
         api_key: Optional[str] = None,
         chat_completion_parser: Callable[[Completion], Any] = None,
         input_type: Literal["text", "messages"] = "text",
+        model_type: ModelType = ModelType.LLM,
     ):
         r"""It is recommended to set the OPENAI_API_KEY environment variable instead of passing it as an argument.
 
         Args:
             api_key (Optional[str], optional): OpenAI API key. Defaults to None.
+            chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None.
+            input_type (Literal["text", "messages"], optional): The type of input to use. Defaults to "text".
+            model_type (ModelType, optional): The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM.
         """
         super().__init__()
         self._api_key = api_key
@@ -155,6 +160,7 @@ def __init__(
             chat_completion_parser or get_first_message_content
         )
         self._input_type = input_type
+        self.model_type = model_type
 
     def init_sync_client(self):
         api_key = self._api_key or os.getenv("OPENAI_API_KEY")
@@ -229,7 +235,6 @@ def convert_inputs_to_api_kwargs(
         self,
         input: Optional[Any] = None,
         model_kwargs: Dict = {},
-        model_type: ModelType = ModelType.UNDEFINED,
     ) -> Dict:
         r"""
         Specify the API input type and output api_kwargs that will be used in _call and _acall methods.
@@ -254,21 +259,20 @@ def convert_inputs_to_api_kwargs(
                 - mask: Path to the mask image
                 For variations (DALL-E 2 only):
                 - image: Path to the input image
-            model_type: The type of model (EMBEDDER, LLM, or IMAGE_GENERATION)
 
         Returns:
             Dict: API-specific kwargs for the model call
         """
 
         final_model_kwargs = model_kwargs.copy()
-        if model_type == ModelType.EMBEDDER:
+        if self.model_type == ModelType.EMBEDDER:
             if isinstance(input, str):
                 input = [input]
             # convert input to input
             if not isinstance(input, Sequence):
                 raise TypeError("input must be a sequence of text")
             final_model_kwargs["input"] = input
-        elif model_type == ModelType.LLM:
+        elif self.model_type == ModelType.LLM:
             # convert input to messages
             messages: List[Dict[str, str]] = []
             images = final_model_kwargs.pop("images", None)
@@ -313,7 +317,7 @@ def convert_inputs_to_api_kwargs(
                 else:
                     messages.append({"role": "system", "content": input})
             final_model_kwargs["messages"] = messages
-        elif model_type == ModelType.IMAGE_GENERATION:
+        elif self.model_type == ModelType.IMAGE_GENERATION:
             # For image generation, input is the prompt
             final_model_kwargs["prompt"] = input
             # Ensure model is specified
@@ -358,7 +362,7 @@ def convert_inputs_to_api_kwargs(
             else:
                 raise ValueError(f"Invalid operation: {operation}")
         else:
-            raise ValueError(f"model_type {model_type} is not supported")
+            raise ValueError(f"model_type {self.model_type} is not supported")
         return final_model_kwargs
 
     def parse_image_generation_response(self, response: List[Image]) -> GeneratorOutput:
diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
index a1714bbd1..1070da8ef 100644
--- a/adalflow/adalflow/core/generator.py
+++ b/adalflow/adalflow/core/generator.py
@@ -70,21 +70,11 @@ class Generator(GradComponent, CachedEngine, CallbackManager):
         template (Optional[str], optional): The template for the prompt.  Defaults to :ref:`DEFAULT_ADALFLOW_SYSTEM_PROMPT<core-default_prompt_template>`.
         prompt_kwargs (Optional[Dict], optional): The preset prompt kwargs to fill in the variables in the prompt. Defaults to None.
         output_processors (Optional[Component], optional):  The output processors after model call. It can be a single component or a chained component via ``Sequential``. Defaults to None.
-        trainable_params (Optional[List[str]], optional): The list of trainable parameters. Defaults to [].
-
-    Note:
-        The output_processors will be applied to the string output of the model completion. And the result will be stored in the data field of the output.
-        And we encourage you to only use it to parse the response to data format you will use later.
+        name (Optional[str], optional): The name of the generator. Defaults to None.
+        cache_path (Optional[str], optional): The path to save the cache. Defaults to None.
+        use_cache (bool, optional): Whether to use cache. Defaults to False.
     """
 
-    model_type: ModelType = ModelType.LLM
-    model_client: ModelClient  # for better type checking
-
-    _use_cache: bool = False
-    _kwargs: Dict[str, Any] = (
-        {}
-    )  # to create teacher generator from student TODO: might reaccess this
-
     def __init__(
         self,
         *,
@@ -100,8 +90,6 @@ def __init__(
         # args for the cache
         cache_path: Optional[str] = None,
         use_cache: bool = False,
-        # args for model type
-        model_type: ModelType = ModelType.LLM,
     ) -> None:
         r"""The default prompt is set to the DEFAULT_ADALFLOW_SYSTEM_PROMPT. It has the following variables:
         - task_desc_str
@@ -112,17 +100,6 @@ def __init__(
         - steps_str
         You can preset the prompt kwargs to fill in the variables in the prompt using prompt_kwargs.
         But you can replace the prompt and set any variables you want and use the prompt_kwargs to fill in the variables.
-
-        Args:
-            model_client (ModelClient): The model client to use for the generator.
-            model_kwargs (Dict[str, Any], optional): The model kwargs to pass to the model client. Defaults to {}. Please refer to :ref:`ModelClient<components-model_client>` for the details on how to set the model_kwargs for your specific model if it is from our library.
-            template (Optional[str], optional): The template for the prompt.  Defaults to :ref:`DEFAULT_ADALFLOW_SYSTEM_PROMPT<core-default_prompt_template>`.
-            prompt_kwargs (Optional[Dict], optional): The preset prompt kwargs to fill in the variables in the prompt. Defaults to None.
-            output_processors (Optional[Component], optional):  The output processors after model call. It can be a single component or a chained component via ``Sequential``. Defaults to None.
-            name (Optional[str], optional): The name of the generator. Defaults to None.
-            cache_path (Optional[str], optional): The path to save the cache. Defaults to None.
-            use_cache (bool, optional): Whether to use cache. Defaults to False.
-            model_type (ModelType, optional): The type of model (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM.
         """
 
         if not isinstance(model_client, ModelClient):
@@ -134,7 +111,6 @@ def __init__(
         template = template or DEFAULT_ADALFLOW_SYSTEM_PROMPT
 
         # create the cache path and initialize the cache engine
-
         self.set_cache_path(
             cache_path, model_client, model_kwargs.get("model", "default")
         )
@@ -146,7 +122,7 @@ def __init__(
         CallbackManager.__init__(self)
 
         self.name = name or self.__class__.__name__
-        self.model_type = model_type
+        self.model_type = model_client.model_type  # Get model type from client
 
         self._init_prompt(template, prompt_kwargs)
 
@@ -177,7 +153,6 @@ def __init__(
             "name": name,
             "cache_path": cache_path,
             "use_cache": use_cache,
-            "model_type": model_type,
         }
         self._teacher: Optional["Generator"] = None
         self._trace_api_kwargs: Dict[str, Any] = (
@@ -351,7 +326,6 @@ def _pre_call(self, prompt_kwargs: Dict, model_kwargs: Dict) -> Dict[str, Any]:
         api_kwargs = self.model_client.convert_inputs_to_api_kwargs(
             input=prompt_str,
             model_kwargs=composed_model_kwargs,
-            model_type=self.model_type,
         )
         return api_kwargs
 
diff --git a/tutorials/multimodal_client_testing_examples.py b/tutorials/multimodal_client_testing_examples.py
index 2c1180de1..d94f8e4e1 100644
--- a/tutorials/multimodal_client_testing_examples.py
+++ b/tutorials/multimodal_client_testing_examples.py
@@ -25,7 +25,7 @@
 
 def test_basic_generation():
     """Test basic text generation"""
-    client = OpenAIClient()
+    client = OpenAIClient()  # Default model_type is LLM
     gen = Generator(
         model_client=client,
         model_kwargs={
@@ -40,7 +40,7 @@ def test_basic_generation():
 
 def test_invalid_image_url():
     """Test Generator output with invalid image URL"""
-    client = OpenAIClient()
+    client = OpenAIClient()  # Default model_type is LLM
     gen = Generator(
         model_client=client,
         model_kwargs={
@@ -56,7 +56,7 @@ def test_invalid_image_url():
 
 def test_invalid_image_generation():
     """Test DALL-E generation with invalid parameters"""
-    client = OpenAIClient()
+    client = OpenAIClient(model_type=ModelType.IMAGE_GENERATION)
     gen = Generator(
         model_client=client,
         model_kwargs={
@@ -64,8 +64,7 @@ def test_invalid_image_generation():
             "size": "invalid_size",  # Invalid size parameter
             "quality": "standard",
             "n": 1
-        },
-        model_type=ModelType.IMAGE_GENERATION
+        }
     )
     
     print("\n=== Testing Invalid DALL-E Parameters ===")
@@ -74,11 +73,10 @@ def test_invalid_image_generation():
 
 def test_vision_and_generation():
     """Test both vision analysis and image generation"""
-    client = OpenAIClient()
-    
-    # 1. Test Vision Analysis
+    # 1. Test Vision Analysis with LLM client
+    vision_client = OpenAIClient()  # Default model_type is LLM
     vision_gen = Generator(
-        model_client=client,
+        model_client=vision_client,
         model_kwargs={
             "model": "gpt-4o-mini",
             "images": "https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png",
@@ -90,16 +88,16 @@ def test_vision_and_generation():
     print("\n=== Vision Analysis ===")
     print(f"Description: {vision_response.raw_response}")
 
-    # 2. Test DALL-E Image Generation
+    # 2. Test DALL-E Image Generation with IMAGE_GENERATION client
+    dalle_client = OpenAIClient(model_type=ModelType.IMAGE_GENERATION)
     dalle_gen = Generator(
-        model_client=client,
+        model_client=dalle_client,
         model_kwargs={
             "model": "dall-e-3",
             "size": "1024x1024",
             "quality": "standard",
             "n": 1
-        },
-        model_type=ModelType.IMAGE_GENERATION
+        }
     )
     
     # For image generation, input_str becomes the prompt

From 922681a4ac84313e43541f3ab08c9dc994198f32 Mon Sep 17 00:00:00 2001
From: fm1320 <filipmakraduli.10@gmail.com>
Date: Sun, 26 Jan 2025 20:11:49 +0100
Subject: [PATCH 5/5] Fixed tests and made adjustments

---
 .../components/model_client/openai_client.py  |  28 ++---
 adalflow/adalflow/core/generator.py           |   5 +-
 adalflow/tests/test_generator.py              |   5 +-
 docs/source/tutorials/multimodal_client.rst   | 107 ++++++++++++++++++
 tests/test_generator.py                       |   1 +
 .../multimodal_client_testing_examples.py     |  64 ++++++-----
 6 files changed, 167 insertions(+), 43 deletions(-)
 create mode 100644 docs/source/tutorials/multimodal_client.rst
 create mode 100644 tests/test_generator.py

diff --git a/adalflow/adalflow/components/model_client/openai_client.py b/adalflow/adalflow/components/model_client/openai_client.py
index 4a13fe8d7..69b419229 100644
--- a/adalflow/adalflow/components/model_client/openai_client.py
+++ b/adalflow/adalflow/components/model_client/openai_client.py
@@ -110,7 +110,6 @@ class OpenAIClient(ModelClient):
         api_key (Optional[str], optional): OpenAI API key. Defaults to None.
         chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None.
         input_type (Literal["text", "messages"], optional): The type of input to use. Defaults to "text".
-        model_type (ModelType, optional): The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM.
 
     Note:
         We suggest users not to use `response_format` to enforce output data type or `tools` and `tool_choice`  in your model_kwargs when calling the API.
@@ -142,7 +141,6 @@ def __init__(
         api_key: Optional[str] = None,
         chat_completion_parser: Callable[[Completion], Any] = None,
         input_type: Literal["text", "messages"] = "text",
-        model_type: ModelType = ModelType.LLM,
     ):
         r"""It is recommended to set the OPENAI_API_KEY environment variable instead of passing it as an argument.
 
@@ -150,7 +148,6 @@ def __init__(
             api_key (Optional[str], optional): OpenAI API key. Defaults to None.
             chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None.
             input_type (Literal["text", "messages"], optional): The type of input to use. Defaults to "text".
-            model_type (ModelType, optional): The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM.
         """
         super().__init__()
         self._api_key = api_key
@@ -160,7 +157,6 @@ def __init__(
             chat_completion_parser or get_first_message_content
         )
         self._input_type = input_type
-        self.model_type = model_type
 
     def init_sync_client(self):
         api_key = self._api_key or os.getenv("OPENAI_API_KEY")
@@ -235,6 +231,7 @@ def convert_inputs_to_api_kwargs(
         self,
         input: Optional[Any] = None,
         model_kwargs: Dict = {},
+        model_type: ModelType = ModelType.UNDEFINED,  # Now required in practice
     ) -> Dict:
         r"""
         Specify the API input type and output api_kwargs that will be used in _call and _acall methods.
@@ -259,20 +256,23 @@ def convert_inputs_to_api_kwargs(
                 - mask: Path to the mask image
                 For variations (DALL-E 2 only):
                 - image: Path to the input image
+            model_type: The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Required.
 
         Returns:
             Dict: API-specific kwargs for the model call
         """
+        if model_type == ModelType.UNDEFINED:
+            raise ValueError("model_type must be specified")
 
         final_model_kwargs = model_kwargs.copy()
-        if self.model_type == ModelType.EMBEDDER:
+        if model_type == ModelType.EMBEDDER:
             if isinstance(input, str):
                 input = [input]
             # convert input to input
             if not isinstance(input, Sequence):
                 raise TypeError("input must be a sequence of text")
             final_model_kwargs["input"] = input
-        elif self.model_type == ModelType.LLM:
+        elif model_type == ModelType.LLM:
             # convert input to messages
             messages: List[Dict[str, str]] = []
             images = final_model_kwargs.pop("images", None)
@@ -317,7 +317,7 @@ def convert_inputs_to_api_kwargs(
                 else:
                     messages.append({"role": "system", "content": input})
             final_model_kwargs["messages"] = messages
-        elif self.model_type == ModelType.IMAGE_GENERATION:
+        elif model_type == ModelType.IMAGE_GENERATION:
             # For image generation, input is the prompt
             final_model_kwargs["prompt"] = input
             # Ensure model is specified
@@ -362,7 +362,7 @@ def convert_inputs_to_api_kwargs(
             else:
                 raise ValueError(f"Invalid operation: {operation}")
         else:
-            raise ValueError(f"model_type {self.model_type} is not supported")
+            raise ValueError(f"model_type {model_type} is not supported")
         return final_model_kwargs
 
     def parse_image_generation_response(self, response: List[Image]) -> GeneratorOutput:
@@ -379,11 +379,7 @@ def parse_image_generation_response(self, response: List[Image]) -> GeneratorOut
             )
         except Exception as e:
             log.error(f"Error parsing image generation response: {e}")
-            return GeneratorOutput(
-                data=None,
-                error=str(e),
-                raw_response=str(response)
-            )
+            return GeneratorOutput(data=None, error=str(e), raw_response=str(response))
 
     @backoff.on_exception(
         backoff.expo,
@@ -400,6 +396,9 @@ def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINE
         """
         kwargs is the combined input and model_kwargs.  Support streaming call.
         """
+        if model_type == ModelType.UNDEFINED:
+            raise ValueError("model_type must be specified")
+
         log.info(f"api_kwargs: {api_kwargs}")
         if model_type == ModelType.EMBEDDER:
             return self.sync_client.embeddings.create(**api_kwargs)
@@ -449,6 +448,9 @@ async def acall(
         """
         kwargs is the combined input and model_kwargs
         """
+        if model_type == ModelType.UNDEFINED:
+            raise ValueError("model_type must be specified")
+
         if self.async_client is None:
             self.async_client = self.init_async_client()
         if model_type == ModelType.EMBEDDER:
diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
index 1070da8ef..b2bb072b7 100644
--- a/adalflow/adalflow/core/generator.py
+++ b/adalflow/adalflow/core/generator.py
@@ -73,6 +73,7 @@ class Generator(GradComponent, CachedEngine, CallbackManager):
         name (Optional[str], optional): The name of the generator. Defaults to None.
         cache_path (Optional[str], optional): The path to save the cache. Defaults to None.
         use_cache (bool, optional): Whether to use cache. Defaults to False.
+        model_type (ModelType, optional): The type of the model. Defaults to ModelType.LLM.
     """
 
     def __init__(
@@ -90,6 +91,7 @@ def __init__(
         # args for the cache
         cache_path: Optional[str] = None,
         use_cache: bool = False,
+        model_type: ModelType = ModelType.LLM,  # Add model_type parameter with default
     ) -> None:
         r"""The default prompt is set to the DEFAULT_ADALFLOW_SYSTEM_PROMPT. It has the following variables:
         - task_desc_str
@@ -122,7 +124,7 @@ def __init__(
         CallbackManager.__init__(self)
 
         self.name = name or self.__class__.__name__
-        self.model_type = model_client.model_type  # Get model type from client
+        self.model_type = model_type  # Use the passed model_type instead of getting from client
 
         self._init_prompt(template, prompt_kwargs)
 
@@ -326,6 +328,7 @@ def _pre_call(self, prompt_kwargs: Dict, model_kwargs: Dict) -> Dict[str, Any]:
         api_kwargs = self.model_client.convert_inputs_to_api_kwargs(
             input=prompt_str,
             model_kwargs=composed_model_kwargs,
+            model_type=self.model_type,
         )
         return api_kwargs
 
diff --git a/adalflow/tests/test_generator.py b/adalflow/tests/test_generator.py
index a15c302a5..e6631f10f 100644
--- a/adalflow/tests/test_generator.py
+++ b/adalflow/tests/test_generator.py
@@ -15,6 +15,7 @@
 from adalflow.core.model_client import ModelClient
 from adalflow.components.model_client.groq_client import GroqAPIClient
 from adalflow.tracing import GeneratorStateLogger
+from adalflow.core.types import ModelType
 
 
 class TestGenerator(IsolatedAsyncioTestCase):
@@ -32,7 +33,7 @@ def setUp(self):
             )
             self.mock_api_client = mock_api_client
 
-            self.generator = Generator(model_client=mock_api_client)
+            self.generator = Generator(model_client=mock_api_client, model_type=ModelType.LLM)
             self.save_dir = "./tests/log"
             self.project_name = "TestGenerator"
             self.filename = "prompt_logger_test.json"
@@ -182,7 +183,7 @@ def test_groq_client_call(self, mock_call):
         template = "Hello, {{ input_str }}!"
 
         # Initialize the Generator with the mocked client
-        generator = Generator(model_client=self.client, template=template)
+        generator = Generator(model_client=self.client, template=template, model_type=ModelType.LLM)
 
         # Call the generator and get the output
         output = generator.call(prompt_kwargs=prompt_kwargs, model_kwargs=model_kwargs)
diff --git a/docs/source/tutorials/multimodal_client.rst b/docs/source/tutorials/multimodal_client.rst
new file mode 100644
index 000000000..a27547406
--- /dev/null
+++ b/docs/source/tutorials/multimodal_client.rst
@@ -0,0 +1,107 @@
+Multimodal Client Tutorial
+=======================
+
+This tutorial demonstrates how to use the OpenAI client for different types of tasks: text generation, vision analysis, and image generation.
+
+Model Types
+----------
+
+The OpenAI client supports three types of operations:
+
+1. Text/Chat Completion (``ModelType.LLM``)
+   - Standard text generation
+   - Vision analysis (with GPT-4V)
+2. Image Generation (``ModelType.IMAGE_GENERATION``)
+   - DALL-E image generation
+3. Embeddings (``ModelType.EMBEDDER``)
+   - Text embeddings
+
+Basic Usage
+----------
+
+The model type is specified when creating a ``Generator`` instance:
+
+.. code-block:: python
+
+    from adalflow.core import Generator
+    from adalflow.components.model_client.openai_client import OpenAIClient
+    from adalflow.core.types import ModelType
+
+    # Create the client
+    client = OpenAIClient()
+
+    # For text generation
+    gen = Generator(
+        model_client=client,
+        model_kwargs={"model": "gpt-4", "max_tokens": 100},
+        model_type=ModelType.LLM  # Specify LLM type
+    )
+    response = gen({"input_str": "Hello, world!"})
+
+Vision Tasks
+-----------
+
+Vision tasks use ``ModelType.LLM`` since they are handled by GPT-4V:
+
+.. code-block:: python
+
+    # Vision analysis
+    vision_gen = Generator(
+        model_client=client,
+        model_kwargs={
+            "model": "gpt-4o-mini",
+            "images": "path/to/image.jpg",
+            "max_tokens": 300,
+        },
+        model_type=ModelType.LLM  # Vision uses LLM type
+    )
+    response = vision_gen({"input_str": "What do you see in this image?"})
+
+Image Generation
+--------------
+
+For DALL-E image generation, use ``ModelType.IMAGE_GENERATION``:
+
+.. code-block:: python
+
+    # Image generation with DALL-E
+    dalle_gen = Generator(
+        model_client=client,
+        model_kwargs={
+            "model": "dall-e-3",
+            "size": "1024x1024",
+            "quality": "standard",
+            "n": 1,
+        },
+        model_type=ModelType.IMAGE_GENERATION  # Specify image generation type
+    )
+    response = dalle_gen({"input_str": "A cat playing with yarn"})
+
+Backward Compatibility
+--------------------
+
+For backward compatibility with existing code:
+
+1. ``model_type`` defaults to ``ModelType.LLM`` if not specified
+2. Older models that only support text continue to work with ``ModelType.LLM``
+3. The OpenAI client handles the appropriate API endpoints based on the model type
+
+Error Handling
+-------------
+
+The client includes error handling for:
+
+1. Invalid model types for operations
+2. Invalid image URLs or file paths
+3. Unsupported model capabilities
+4. API errors and rate limits
+
+Complete Example
+--------------
+
+See the complete example in ``tutorials/multimodal_client_testing_examples.py``, which demonstrates:
+
+1. Basic text generation
+2. Vision analysis with image input
+3. DALL-E image generation
+4. Error handling for invalid inputs 
\ No newline at end of file
diff --git a/tests/test_generator.py b/tests/test_generator.py
new file mode 100644
index 000000000..0519ecba6
--- /dev/null
+++ b/tests/test_generator.py
@@ -0,0 +1 @@
+ 
\ No newline at end of file
diff --git a/tutorials/multimodal_client_testing_examples.py b/tutorials/multimodal_client_testing_examples.py
index d94f8e4e1..dd7edcf07 100644
--- a/tutorials/multimodal_client_testing_examples.py
+++ b/tutorials/multimodal_client_testing_examples.py
@@ -25,91 +25,101 @@
 
 def test_basic_generation():
     """Test basic text generation"""
-    client = OpenAIClient()  # Default model_type is LLM
+    client = OpenAIClient()  # For text/chat completion
     gen = Generator(
         model_client=client,
-        model_kwargs={
-            "model": "gpt-4o-mini",
-            "max_tokens": 100
-        }
+        model_kwargs={"model": "gpt-4o-mini", "max_tokens": 100},
+        model_type=ModelType.LLM  # Explicitly specify model type
     )
-    
+
     print("\n=== Testing Basic Generation ===")
     response = gen({"input_str": "Hello, world!"})
     print(f"Response: {response}")
 
+
 def test_invalid_image_url():
     """Test Generator output with invalid image URL"""
-    client = OpenAIClient()  # Default model_type is LLM
+    client = OpenAIClient()  # For vision tasks
     gen = Generator(
         model_client=client,
         model_kwargs={
             "model": "gpt-4o-mini",
             "images": "https://invalid.url/nonexistent.jpg",
-            "max_tokens": 300
-        }
+            "max_tokens": 300,
+        },
+        model_type=ModelType.LLM  # Vision tasks use LLM type
     )
-    
+
     print("\n=== Testing Invalid Image URL ===")
     response = gen({"input_str": "What do you see in this image?"})
     print(f"Response with invalid image URL: {response}")
 
+
 def test_invalid_image_generation():
     """Test DALL-E generation with invalid parameters"""
-    client = OpenAIClient(model_type=ModelType.IMAGE_GENERATION)
+    client = OpenAIClient()  # For image generation
     gen = Generator(
         model_client=client,
         model_kwargs={
             "model": "dall-e-3",
             "size": "invalid_size",  # Invalid size parameter
             "quality": "standard",
-            "n": 1
-        }
+            "n": 1,
+        },
+        model_type=ModelType.IMAGE_GENERATION  # Specify image generation type
     )
-    
+
     print("\n=== Testing Invalid DALL-E Parameters ===")
     response = gen({"input_str": "A cat"})
     print(f"Response with invalid DALL-E parameters: {response}")
 
+
 def test_vision_and_generation():
     """Test both vision analysis and image generation"""
     # 1. Test Vision Analysis with LLM client
-    vision_client = OpenAIClient()  # Default model_type is LLM
+    vision_client = OpenAIClient()  # For vision tasks
     vision_gen = Generator(
         model_client=vision_client,
         model_kwargs={
             "model": "gpt-4o-mini",
             "images": "https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png",
-            "max_tokens": 300
-        }
+            "max_tokens": 300,
+        },
+        model_type=ModelType.LLM  # Vision tasks use LLM type
+    )
+
+    vision_response = vision_gen(
+        {"input_str": "What do you see in this image? Be detailed but concise."}
     )
-    
-    vision_response = vision_gen({"input_str": "What do you see in this image? Be detailed but concise."})
     print("\n=== Vision Analysis ===")
     print(f"Description: {vision_response.raw_response}")
 
-    # 2. Test DALL-E Image Generation with IMAGE_GENERATION client
-    dalle_client = OpenAIClient(model_type=ModelType.IMAGE_GENERATION)
+    # 2. Test DALL-E Image Generation
+    dalle_client = OpenAIClient()  # For image generation
     dalle_gen = Generator(
         model_client=dalle_client,
         model_kwargs={
             "model": "dall-e-3",
             "size": "1024x1024",
             "quality": "standard",
-            "n": 1
-        }
+            "n": 1,
+        },
+        model_type=ModelType.IMAGE_GENERATION  # Specify image generation type
     )
-    
+
     # For image generation, input_str becomes the prompt
-    response = dalle_gen({"input_str": "A happy siamese cat playing with a red ball of yarn"})
+    response = dalle_gen(
+        {"input_str": "A happy siamese cat playing with a red ball of yarn"}
+    )
     print("\n=== DALL-E Generation ===")
     print(f"Generated Image URL: {response.data}")
 
+
 if __name__ == "__main__":
     print("Starting OpenAI Vision and DALL-E test...\n")
-    
+
     # Run all tests - they will show errors if API key is invalid/empty
     test_basic_generation()
     test_invalid_image_url()
     test_invalid_image_generation()
-    test_vision_and_generation() 
\ No newline at end of file
+    test_vision_and_generation()