|
2043 | 2043 | "build_custom_model_client()" |
2044 | 2044 | ] |
2045 | 2045 | }, |
| 2046 | + { |
| 2047 | + "cell_type": "markdown", |
| 2048 | + "metadata": {}, |
| 2049 | + "source": [ |
| 2050 | + "# Adalflow multimodal model client" |
| 2051 | + ] |
| 2052 | + }, |
| 2053 | + { |
| 2054 | + "cell_type": "code", |
| 2055 | + "execution_count": null, |
| 2056 | + "metadata": {}, |
| 2057 | + "outputs": [], |
| 2058 | + "source": [ |
| 2059 | + "def analyze_single_image():\n", |
| 2060 | + " \"\"\"Example of analyzing a single image with GPT-4 Vision\"\"\"\n", |
| 2061 | + " client = OpenAIClient()\n", |
| 2062 | + " \n", |
| 2063 | + " gen = Generator(\n", |
| 2064 | + " model_client=client,\n", |
| 2065 | + " model_kwargs={\n", |
| 2066 | + " \"model\": \"gpt-4o-mini\",\n", |
| 2067 | + " \"images\": \"https://raw.githubusercontent.com/openai/openai-cookbook/main/examples/images/happy_cat.jpg\",\n", |
| 2068 | + " \"max_tokens\": 300\n", |
| 2069 | + " }\n", |
| 2070 | + " )\n", |
| 2071 | + " \n", |
| 2072 | + " response = gen({\"input_str\": \"What do you see in this image? Be detailed but concise.\"})\n", |
| 2073 | + " print(\"\\n=== Single Image Analysis ===\")\n", |
| 2074 | + " print(f\"Description: {response.raw_response}\")\n", |
| 2075 | + "\n", |
| 2076 | + "def analyze_multiple_images():\n", |
| 2077 | + " \"\"\"Example of analyzing multiple images in one prompt\"\"\"\n", |
| 2078 | + " client = OpenAIClient()\n", |
| 2079 | + " \n", |
| 2080 | + " # List of images to analyze together\n", |
| 2081 | + " images = [\n", |
| 2082 | + " \"https://raw.githubusercontent.com/openai/openai-cookbook/main/examples/images/happy_cat.jpg\",\n", |
| 2083 | + " \"https://raw.githubusercontent.com/openai/openai-cookbook/main/examples/images/sad_cat.jpg\"\n", |
| 2084 | + " ]\n", |
| 2085 | + " \n", |
| 2086 | + " gen = Generator(\n", |
| 2087 | + " model_client=client,\n", |
| 2088 | + " model_kwargs={\n", |
| 2089 | + " \"model\": \"gpt-4o-mini\",\n", |
| 2090 | + " \"images\": images,\n", |
| 2091 | + " \"max_tokens\": 300\n", |
| 2092 | + " }\n", |
| 2093 | + " )\n", |
| 2094 | + " \n", |
| 2095 | + " response = gen({\"input_str\": \"Compare and contrast these two images. What are the main differences?\"})\n", |
| 2096 | + " print(\"\\n=== Multiple Images Analysis ===\")\n", |
| 2097 | + " print(f\"Comparison: {response.raw_response}\")\n", |
| 2098 | + "\n", |
| 2099 | + "def generate_art_with_dalle():\n", |
| 2100 | + " \"\"\"Example of generating art using DALL-E 3\"\"\"\n", |
| 2101 | + " client = OpenAIClient()\n", |
| 2102 | + " \n", |
| 2103 | + " gen = Generator(\n", |
| 2104 | + " model_client=client,\n", |
| 2105 | + " model_kwargs={\n", |
| 2106 | + " \"model\": \"dall-e-3\",\n", |
| 2107 | + " \"size\": \"1024x1024\",\n", |
| 2108 | + " \"quality\": \"standard\",\n", |
| 2109 | + " \"n\": 1\n", |
| 2110 | + " }\n", |
| 2111 | + " )\n", |
| 2112 | + " \n", |
| 2113 | + " response = gen({\n", |
| 2114 | + " \"input_str\": \"A serene Japanese garden with a small bridge over a koi pond, cherry blossoms falling gently in the breeze\"\n", |
| 2115 | + " })\n", |
| 2116 | + " print(\"\\n=== Art Generation with DALL-E 3 ===\")\n", |
| 2117 | + " print(f\"Generated Image URL: {response.data}\")\n", |
| 2118 | + "\n", |
| 2119 | + "def create_image_variations(image_path=\"path/to/your/image.jpg\"):\n", |
| 2120 | + " \"\"\"Example of creating variations of an existing image\"\"\"\n", |
| 2121 | + " client = OpenAIClient()\n", |
| 2122 | + " \n", |
| 2123 | + " gen = Generator(\n", |
| 2124 | + " model_client=client,\n", |
| 2125 | + " model_kwargs={\n", |
| 2126 | + " \"model\": \"dall-e-2\",\n", |
| 2127 | + " \"image\": image_path,\n", |
| 2128 | + " \"n\": 2, # Generate 2 variations\n", |
| 2129 | + " \"size\": \"1024x1024\"\n", |
| 2130 | + " }\n", |
| 2131 | + " )\n", |
| 2132 | + " \n", |
| 2133 | + " response = gen({\"input_str\": \"\"})\n", |
| 2134 | + " print(\"\\n=== Image Variations ===\")\n", |
| 2135 | + " print(f\"Variation URLs: {response.data}\")\n", |
| 2136 | + "\n", |
| 2137 | + "def edit_image_with_mask(image_path=\"path/to/image.jpg\", mask_path=\"path/to/mask.jpg\"):\n", |
| 2138 | + " \"\"\"Example of editing specific parts of an image using a mask\"\"\"\n", |
| 2139 | + " client = OpenAIClient()\n", |
| 2140 | + " \n", |
| 2141 | + " gen = Generator(\n", |
| 2142 | + " model_client=client,\n", |
| 2143 | + " model_kwargs={\n", |
| 2144 | + " \"model\": \"dall-e-2\",\n", |
| 2145 | + " \"image\": image_path,\n", |
| 2146 | + " \"mask\": mask_path,\n", |
| 2147 | + " \"n\": 1,\n", |
| 2148 | + " \"size\": \"1024x1024\"\n", |
| 2149 | + " }\n", |
| 2150 | + " )\n", |
| 2151 | + " \n", |
| 2152 | + " response = gen({\n", |
| 2153 | + " \"input_str\": \"Replace the masked area with a beautiful sunset\"\n", |
| 2154 | + " })\n", |
| 2155 | + " print(\"\\n=== Image Editing ===\")\n", |
| 2156 | + " print(f\"Edited Image URL: {response.data}\")\n", |
| 2157 | + "\n", |
| 2158 | + "def mixed_image_text_conversation():\n", |
| 2159 | + " \"\"\"Example of having a conversation that includes both images and text\"\"\"\n", |
| 2160 | + " client = OpenAIClient()\n", |
| 2161 | + " \n", |
| 2162 | + " gen = Generator(\n", |
| 2163 | + " model_client=client,\n", |
| 2164 | + " model_kwargs={\n", |
| 2165 | + " \"model\": \"gpt-4o-mini\",\n", |
| 2166 | + " \"images\": [\n", |
| 2167 | + " \"https://raw.githubusercontent.com/openai/openai-cookbook/main/examples/images/happy_cat.jpg\",\n", |
| 2168 | + " \"https://path/to/local/image.jpg\" # Replace with your local image path\n", |
| 2169 | + " ],\n", |
| 2170 | + " \"max_tokens\": 300\n", |
| 2171 | + " }\n", |
| 2172 | + " )\n", |
| 2173 | + " \n", |
| 2174 | + " conversation = \"\"\"<START_OF_SYSTEM_PROMPT>You are a helpful assistant skilled in analyzing images and providing detailed descriptions.</END_OF_SYSTEM_PROMPT>\n", |
| 2175 | + " <START_OF_USER_PROMPT>I'm showing you two images. Please analyze them and tell me what emotions they convey.</END_OF_USER_PROMPT>\"\"\"\n", |
| 2176 | + " \n", |
| 2177 | + " response = gen({\"input_str\": conversation})\n", |
| 2178 | + " print(\"\\n=== Mixed Image-Text Conversation ===\")\n", |
| 2179 | + " print(f\"Assistant's Analysis: {response.raw_response}\")\n", |
| 2180 | + "\n", |
| 2181 | + "\n" |
| 2182 | + ] |
| 2183 | + }, |
| 2184 | + { |
| 2185 | + "cell_type": "code", |
| 2186 | + "execution_count": null, |
| 2187 | + "metadata": {}, |
| 2188 | + "outputs": [], |
| 2189 | + "source": [ |
| 2190 | + "if __name__ == \"__main__\":\n", |
| 2191 | + " print(\"OpenAI Image Processing Examples\\n\")\n", |
| 2192 | + " \n", |
| 2193 | + " # Basic image analysis\n", |
| 2194 | + " analyze_single_image()\n", |
| 2195 | + " \n", |
| 2196 | + " # Multiple image analysis\n", |
| 2197 | + " analyze_multiple_images()\n", |
| 2198 | + " \n", |
| 2199 | + " # Image generation\n", |
| 2200 | + " generate_art_with_dalle()\n", |
| 2201 | + " \n", |
| 2202 | + " # create_image_variations(<path_to_image>)\n", |
| 2203 | + " # edit_image_with_mask(<path_to_image>, <path_to_mask>)\n", |
| 2204 | + " # mixed_image_text_conversation(<conversation_prompt>)" |
| 2205 | + ] |
| 2206 | + }, |
| 2207 | + { |
| 2208 | + "cell_type": "markdown", |
| 2209 | + "metadata": {}, |
| 2210 | + "source": [ |
| 2211 | + "# Image generation with Dall E and image understanding" |
| 2212 | + ] |
| 2213 | + }, |
| 2214 | + { |
| 2215 | + "cell_type": "code", |
| 2216 | + "execution_count": null, |
| 2217 | + "metadata": {}, |
| 2218 | + "outputs": [], |
| 2219 | + "source": [ |
| 2220 | + "from adalflow.core import Generator\n", |
| 2221 | + "from adalflow.components.model_client.openai_client import OpenAIClient\n", |
| 2222 | + "from adalflow.core.types import ModelType" |
| 2223 | + ] |
| 2224 | + }, |
| 2225 | + { |
| 2226 | + "cell_type": "code", |
| 2227 | + "execution_count": null, |
| 2228 | + "metadata": {}, |
| 2229 | + "outputs": [], |
| 2230 | + "source": [ |
| 2231 | + "class ImageGenerator(Generator):\n", |
| 2232 | + " \"\"\"Generator subclass for image generation.\"\"\"\n", |
| 2233 | + " model_type = ModelType.IMAGE_GENERATION\n", |
| 2234 | + "\n", |
| 2235 | + "def test_vision_and_generation():\n", |
| 2236 | + " \"\"\"Test both vision analysis and image generation\"\"\"\n", |
| 2237 | + " client = OpenAIClient()\n", |
| 2238 | + " \n", |
| 2239 | + " # 1. Test Vision Analysis\n", |
| 2240 | + " vision_gen = Generator(\n", |
| 2241 | + " model_client=client,\n", |
| 2242 | + " model_kwargs={\n", |
| 2243 | + " \"model\": \"gpt-4o-mini\",\n", |
| 2244 | + " \"images\": \"https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png\",\n", |
| 2245 | + " \"max_tokens\": 300\n", |
| 2246 | + " }\n", |
| 2247 | + " )\n", |
| 2248 | + " \n", |
| 2249 | + " vision_response = vision_gen({\"input_str\": \"What do you see in this image? Be detailed but concise.\"})\n", |
| 2250 | + " print(\"\\n=== Vision Analysis ===\")\n", |
| 2251 | + " print(f\"Description: {vision_response.raw_response}\")\n", |
| 2252 | + "\n", |
| 2253 | + " # 2. Test DALL-E Image Generation\n", |
| 2254 | + " dalle_gen = ImageGenerator(\n", |
| 2255 | + " model_client=client,\n", |
| 2256 | + " model_kwargs={\n", |
| 2257 | + " \"model\": \"dall-e-3\",\n", |
| 2258 | + " \"size\": \"1024x1024\",\n", |
| 2259 | + " \"quality\": \"standard\",\n", |
| 2260 | + " \"n\": 1\n", |
| 2261 | + " }\n", |
| 2262 | + " )\n", |
| 2263 | + " \n", |
| 2264 | + " # For image generation, input_str becomes the prompt\n", |
| 2265 | + " response = dalle_gen({\"input_str\": \"A happy siamese cat playing with a red ball of yarn\"})\n", |
| 2266 | + " print(\"\\n=== DALL-E Generation ===\")\n", |
| 2267 | + " print(f\"Generated Image URL: {response.data}\")" |
| 2268 | + ] |
| 2269 | + }, |
| 2270 | + { |
| 2271 | + "cell_type": "markdown", |
| 2272 | + "metadata": {}, |
| 2273 | + "source": [ |
| 2274 | + "# Invalid image url - Generator output still works!" |
| 2275 | + ] |
| 2276 | + }, |
| 2277 | + { |
| 2278 | + "cell_type": "code", |
| 2279 | + "execution_count": null, |
| 2280 | + "metadata": {}, |
| 2281 | + "outputs": [], |
| 2282 | + "source": [ |
| 2283 | + "def test_invalid_image_url():\n", |
| 2284 | + " \"\"\"Test Generator output with invalid image URL\"\"\"\n", |
| 2285 | + " client = OpenAIClient()\n", |
| 2286 | + " gen = Generator(\n", |
| 2287 | + " model_client=client,\n", |
| 2288 | + " model_kwargs={\n", |
| 2289 | + " \"model\": \"gpt-4o-mini\",\n", |
| 2290 | + " \"images\": \"https://invalid.url/nonexistent.jpg\",\n", |
| 2291 | + " \"max_tokens\": 300\n", |
| 2292 | + " }\n", |
| 2293 | + " )\n", |
| 2294 | + " \n", |
| 2295 | + " print(\"\\n=== Testing Invalid Image URL ===\")\n", |
| 2296 | + " response = gen({\"input_str\": \"What do you see in this image?\"})\n", |
| 2297 | + " print(f\"Response with invalid image URL: {response}\")" |
| 2298 | + ] |
| 2299 | + }, |
| 2300 | + { |
| 2301 | + "cell_type": "code", |
| 2302 | + "execution_count": null, |
| 2303 | + "metadata": {}, |
| 2304 | + "outputs": [], |
| 2305 | + "source": [ |
| 2306 | + "if __name__ == \"__main__\":\n", |
| 2307 | + " print(\"Starting OpenAI Vision and DALL-E test...\\n\")\n", |
| 2308 | + " test_invalid_image_url()\n", |
| 2309 | + " test_vision_and_generation() " |
| 2310 | + ] |
| 2311 | + }, |
2046 | 2312 | { |
2047 | 2313 | "cell_type": "markdown", |
2048 | 2314 | "metadata": { |
|
0 commit comments