diff --git a/apps/11_promptflow/README.md b/apps/11_promptflow/README.md index 9fc7ccb..4a30f58 100644 --- a/apps/11_promptflow/README.md +++ b/apps/11_promptflow/README.md @@ -177,6 +177,32 @@ $ pf run create \ $ pf run show-details --name $RUN_NAME ``` +### image_qa + +To run the image QA flow with GPT-4o, we customize an LLM tool. +Following documents provide more details: + +- docs: [Customizing an LLM Tool](https://microsoft.github.io/promptflow/how-to-guides/develop-a-tool/customize_an_llm_tool.html) +- example codes: [promptflow/examples/flows/chat/chat-with-image](https://github.com/microsoft/promptflow/tree/main/examples/flows/chat/chat-with-image) + +With the image QA flow sample, you can ask questions about an image and get answers from the model. + +```shell +cd apps/11_promptflow/image_qa + +# Create run with multiple lines data +$ RUN_NAME=image_qa-$(date +%s) +$ pf run create \ + --name $RUN_NAME \ + --flow . \ + --data ./data.jsonl \ + --column-mapping image='${data.image}' \ + --stream + +# Show run details +$ pf run show-details --name $RUN_NAME +``` + ## References - [Prompt flow > repos](https://github.com/microsoft/promptflow) diff --git a/apps/11_promptflow/image_qa/.gitignore b/apps/11_promptflow/image_qa/.gitignore new file mode 100644 index 0000000..61bd725 --- /dev/null +++ b/apps/11_promptflow/image_qa/.gitignore @@ -0,0 +1,5 @@ +.env +__pycache__/ +.promptflow/* +!.promptflow/flow.tools.json +.runs/ diff --git a/apps/11_promptflow/image_qa/.promptflow/flow.tools.json b/apps/11_promptflow/image_qa/.promptflow/flow.tools.json new file mode 100644 index 0000000..7cf2b94 --- /dev/null +++ b/apps/11_promptflow/image_qa/.promptflow/flow.tools.json @@ -0,0 +1,37 @@ +{ + "package": {}, + "code": { + "hello.py": { + "type": "python", + "inputs": { + "connection": { + "type": [ + "AzureOpenAIConnection" + ] + }, + "image": { + "type": [ + "image" + ] + }, + "model": { + "type": [ + "string" + ] + }, + "system_prompt": { + "type": [ + "string" + ] + }, + "user_prompt": { + "type": [ + "string" + ] + } + }, + "source": "hello.py", + "function": "my_python_tool" + } + } +} diff --git a/apps/11_promptflow/image_qa/data.jsonl b/apps/11_promptflow/image_qa/data.jsonl new file mode 100644 index 0000000..2500c35 --- /dev/null +++ b/apps/11_promptflow/image_qa/data.jsonl @@ -0,0 +1 @@ +{"image": "../../../datasets/contoso-receipt.png"} \ No newline at end of file diff --git a/apps/11_promptflow/image_qa/flow.dag.yaml b/apps/11_promptflow/image_qa/flow.dag.yaml new file mode 100644 index 0000000..b2c9758 --- /dev/null +++ b/apps/11_promptflow/image_qa/flow.dag.yaml @@ -0,0 +1,32 @@ +$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json +environment: + python_requirements_txt: requirements.txt +inputs: + user_prompt: + type: string + default: Please extract texts from the image + system_prompt: + type: string + default: You are an excellent OCR tool + image: + type: image + default: ../../../datasets/contoso-receipt.png + model: + type: string + default: gpt-4o +outputs: + output_prompt: + type: string + reference: ${image_qa.output} +nodes: +- name: image_qa + type: python + source: + type: code + path: hello.py + inputs: + connection: open_ai_connection + image: ${inputs.image} + system_prompt: ${inputs.system_prompt} + user_prompt: ${inputs.user_prompt} + model: ${inputs.model} diff --git a/apps/11_promptflow/image_qa/hello.py b/apps/11_promptflow/image_qa/hello.py new file mode 100644 index 0000000..a59a278 --- /dev/null +++ b/apps/11_promptflow/image_qa/hello.py @@ -0,0 +1,48 @@ +import base64 +import io + +from openai import AzureOpenAI +from promptflow.connections import AzureOpenAIConnection +from promptflow.contracts.multimedia import Image +from promptflow.core import tool + + +@tool +def my_python_tool( + connection: AzureOpenAIConnection, + image: Image, + model: str, + system_prompt: str, + user_prompt: str, +) -> str: + image_stream = io.BytesIO(image) + encoded_image = base64.b64encode(image_stream.read()).decode("utf-8") + + client = AzureOpenAI( + api_key=connection.api_key, + api_version=connection.api_version, + azure_endpoint=connection.api_base, + ) + response = client.chat.completions.create( + model=model, + messages=[ + { + "role": "system", + "content": system_prompt, + }, + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}, + }, + { + "type": "text", + "text": user_prompt, + }, + ], + }, + ], + ) + return response.choices[0].message.content diff --git a/apps/11_promptflow/image_qa/requirements.txt b/apps/11_promptflow/image_qa/requirements.txt new file mode 100644 index 0000000..7a54870 --- /dev/null +++ b/apps/11_promptflow/image_qa/requirements.txt @@ -0,0 +1 @@ +promptflow diff --git a/datasets/contoso-receipt.png b/datasets/contoso-receipt.png new file mode 100644 index 0000000..33e85d6 Binary files /dev/null and b/datasets/contoso-receipt.png differ