Merge pull request #104 from ks6088ts-labs/feature/issue-103_image-inference-flow

ks6088ts · web-flow · commit ae906b06a634 · 2024-08-30T17:20:12.000+09:00
add image qa flow with custom llm tool
diff --git a/apps/11_promptflow/README.md b/apps/11_promptflow/README.md
@@ -177,6 +177,32 @@ $ pf run create \
 $ pf run show-details --name $RUN_NAME
 ```
 
+### image_qa
+
+To run the image QA flow with GPT-4o, we customize an LLM tool.
+Following documents provide more details:
+
+- docs: [Customizing an LLM Tool](https://microsoft.github.io/promptflow/how-to-guides/develop-a-tool/customize_an_llm_tool.html)
+- example codes: [promptflow/examples/flows/chat/chat-with-image](https://github.com/microsoft/promptflow/tree/main/examples/flows/chat/chat-with-image)
+
+With the image QA flow sample, you can ask questions about an image and get answers from the model.
+
+```shell
+cd apps/11_promptflow/image_qa
+
+# Create run with multiple lines data
+$ RUN_NAME=image_qa-$(date +%s)
+$ pf run create \
+    --name $RUN_NAME \
+    --flow . \
+    --data ./data.jsonl \
+    --column-mapping image='${data.image}' \
+    --stream
+
+# Show run details
+$ pf run show-details --name $RUN_NAME
+```
+
 ## References
 
 - [Prompt flow > repos](https://github.com/microsoft/promptflow)
diff --git a/apps/11_promptflow/image_qa/.gitignore b/apps/11_promptflow/image_qa/.gitignore
@@ -0,0 +1,5 @@
+.env
+__pycache__/
+.promptflow/*
+!.promptflow/flow.tools.json
+.runs/
diff --git a/apps/11_promptflow/image_qa/.promptflow/flow.tools.json b/apps/11_promptflow/image_qa/.promptflow/flow.tools.json
@@ -0,0 +1,37 @@
+{
+  "package": {},
+  "code": {
+    "hello.py": {
+      "type": "python",
+      "inputs": {
+        "connection": {
+          "type": [
+            "AzureOpenAIConnection"
+          ]
+        },
+        "image": {
+          "type": [
+            "image"
+          ]
+        },
+        "model": {
+          "type": [
+            "string"
+          ]
+        },
+        "system_prompt": {
+          "type": [
+            "string"
+          ]
+        },
+        "user_prompt": {
+          "type": [
+            "string"
+          ]
+        }
+      },
+      "source": "hello.py",
+      "function": "my_python_tool"
+    }
+  }
+}
diff --git a/apps/11_promptflow/image_qa/data.jsonl b/apps/11_promptflow/image_qa/data.jsonl
@@ -0,0 +1 @@
+{"image": "../../../datasets/contoso-receipt.png"}
diff --git a/apps/11_promptflow/image_qa/flow.dag.yaml b/apps/11_promptflow/image_qa/flow.dag.yaml
@@ -0,0 +1,32 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+environment:
+  python_requirements_txt: requirements.txt
+inputs:
+  user_prompt:
+    type: string
+    default: Please extract texts from the image
+  system_prompt:
+    type: string
+    default: You are an excellent OCR tool
+  image:
+    type: image
+    default: ../../../datasets/contoso-receipt.png
+  model:
+    type: string
+    default: gpt-4o
+outputs:
+  output_prompt:
+    type: string
+    reference: ${image_qa.output}
+nodes:
+- name: image_qa
+  type: python
+  source:
+    type: code
+    path: hello.py
+  inputs:
+    connection: open_ai_connection
+    image: ${inputs.image}
+    system_prompt: ${inputs.system_prompt}
+    user_prompt: ${inputs.user_prompt}
+    model: ${inputs.model}
diff --git a/apps/11_promptflow/image_qa/hello.py b/apps/11_promptflow/image_qa/hello.py
@@ -0,0 +1,48 @@
+import base64
+import io
+
+from openai import AzureOpenAI
+from promptflow.connections import AzureOpenAIConnection
+from promptflow.contracts.multimedia import Image
+from promptflow.core import tool
+
+
+@tool
+def my_python_tool(
+    connection: AzureOpenAIConnection,
+    image: Image,
+    model: str,
+    system_prompt: str,
+    user_prompt: str,
+) -> str:
+    image_stream = io.BytesIO(image)
+    encoded_image = base64.b64encode(image_stream.read()).decode("utf-8")
+
+    client = AzureOpenAI(
+        api_key=connection.api_key,
+        api_version=connection.api_version,
+        azure_endpoint=connection.api_base,
+    )
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {
+                "role": "system",
+                "content": system_prompt,
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},
+                    },
+                    {
+                        "type": "text",
+                        "text": user_prompt,
+                    },
+                ],
+            },
+        ],
+    )
+    return response.choices[0].message.content
diff --git a/apps/11_promptflow/image_qa/requirements.txt b/apps/11_promptflow/image_qa/requirements.txt
@@ -0,0 +1 @@
+promptflow
diff --git a/datasets/contoso-receipt.png b/datasets/contoso-receipt.png

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"image": "../../../datasets/contoso-receipt.png"}`