|
| 1 | +# Copyright (c) Microsoft. All rights reserved. |
| 2 | +import asyncio |
| 3 | +import os |
| 4 | +import tempfile |
| 5 | + |
| 6 | +from semantic_kernel.agents import OpenAIResponsesAgent |
| 7 | +from semantic_kernel.connectors.ai.open_ai import OpenAISettings |
| 8 | +from semantic_kernel.contents.binary_content import BinaryContent |
| 9 | +from semantic_kernel.contents.chat_message_content import ChatMessageContent |
| 10 | +from semantic_kernel.contents.text_content import TextContent |
| 11 | +from semantic_kernel.contents.utils.author_role import AuthorRole |
| 12 | + |
| 13 | +""" |
| 14 | +The following sample demonstrates how to upload PDF and text files using BinaryContent |
| 15 | +with an OpenAI Responses Agent. This shows how to create BinaryContent objects from files |
| 16 | +and compose multi-modal messages that combine text and binary content. |
| 17 | +
|
| 18 | +The sample demonstrates: |
| 19 | +1. Creating BinaryContent from a PDF file |
| 20 | +2. Creating BinaryContent from a text file |
| 21 | +3. Composing multi-modal messages with mixed content types (text + binary) |
| 22 | +4. Sending complex messages directly to the agent via the messages parameter |
| 23 | +5. Having the agent process and respond to questions about the uploaded files |
| 24 | +
|
| 25 | +This approach differs from simple string-based interactions by showing how to combine |
| 26 | +multiple content types within a single message, which is useful for rich media interactions. |
| 27 | +
|
| 28 | +Note: This sample uses the existing employees.pdf file from the resources directory. |
| 29 | +""" |
| 30 | + |
| 31 | +# Sample follow-up questions to demonstrate continued conversation |
| 32 | +USER_INPUTS = [ |
| 33 | + "What specific types of files did I just upload?", |
| 34 | + "Can you tell me about the content in the PDF file?", |
| 35 | + "What does the text file contain?", |
| 36 | + "Can you provide a summary of both documents?", |
| 37 | +] |
| 38 | + |
| 39 | + |
| 40 | +def create_sample_text_content() -> str: |
| 41 | + """Create sample text content for demonstration purposes. |
| 42 | +
|
| 43 | + Returns: |
| 44 | + str: A sample company policy document in text format. |
| 45 | + """ |
| 46 | + return """Company Policy Document - Remote Work Guidelines |
| 47 | +
|
| 48 | +This document outlines our company's remote work policies and procedures. |
| 49 | +
|
| 50 | +Remote Work Eligibility: |
| 51 | +- Full-time employees with at least 6 months tenure |
| 52 | +- Managers approval required |
| 53 | +- Home office setup must meet security requirements |
| 54 | +
|
| 55 | +Work Schedule: |
| 56 | +- Core hours: 10 AM - 3 PM local time |
| 57 | +- Flexible start/end times outside core hours |
| 58 | +- Maximum 3 remote days per week for hybrid roles |
| 59 | +
|
| 60 | +Communication Requirements: |
| 61 | +- Daily check-ins with team lead |
| 62 | +- Weekly video conference participation |
| 63 | +- Response time: within 4 hours during business hours |
| 64 | +
|
| 65 | +Equipment and Security: |
| 66 | +- Company-provided laptop and VPN access |
| 67 | +- Secure Wi-Fi connection required |
| 68 | +- No public Wi-Fi for work activities |
| 69 | +
|
| 70 | +For questions about remote work policies, contact HR at [email protected] |
| 71 | +""" |
| 72 | + |
| 73 | + |
| 74 | +async def main(): |
| 75 | + # 1. Initialize the OpenAI client |
| 76 | + client = OpenAIResponsesAgent.create_client() |
| 77 | + |
| 78 | + # 2. Prepare file paths and create sample content |
| 79 | + pdf_file_path = os.path.join( |
| 80 | + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), |
| 81 | + "resources", |
| 82 | + "file_search", |
| 83 | + "employees.pdf", |
| 84 | + ) |
| 85 | + |
| 86 | + # Create a temporary text file for demonstration purposes |
| 87 | + with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as text_file: |
| 88 | + text_content = create_sample_text_content() |
| 89 | + text_file.write(text_content) |
| 90 | + text_file_path = text_file.name |
| 91 | + |
| 92 | + try: |
| 93 | + # 3. Create BinaryContent objects from files using different methods |
| 94 | + print("Creating BinaryContent from files...") |
| 95 | + |
| 96 | + # Method 1: Create BinaryContent from an existing PDF file |
| 97 | + pdf_binary_content = BinaryContent.from_file(file_path=pdf_file_path, mime_type="application/pdf") |
| 98 | + print(f"Created PDF BinaryContent: {pdf_binary_content.mime_type}, can_read: {pdf_binary_content.can_read}") |
| 99 | + |
| 100 | + # Method 2: Create BinaryContent from the temporary text file |
| 101 | + text_binary_content = BinaryContent.from_file(file_path=text_file_path, mime_type="text/plain") |
| 102 | + print(f"Created text BinaryContent: {text_binary_content.mime_type}, can_read: {text_binary_content.can_read}") |
| 103 | + |
| 104 | + # Method 3: Create BinaryContent directly from in-memory data |
| 105 | + # This approach allows creating BinaryContent without file I/O operations |
| 106 | + alternative_text_content = BinaryContent( |
| 107 | + data=text_content.encode("utf-8"), mime_type="text/plain", data_format="base64" |
| 108 | + ) |
| 109 | + print(f"Alternative text BinaryContent: {alternative_text_content.mime_type}") |
| 110 | + |
| 111 | + # 4. Initialize the OpenAI Responses Agent with file analysis capabilities |
| 112 | + # Configure the AI model for responses |
| 113 | + settings = OpenAISettings() |
| 114 | + responses_model = settings.responses_model_id or "gpt-4o" |
| 115 | + |
| 116 | + agent = OpenAIResponsesAgent( |
| 117 | + ai_model_id=responses_model, |
| 118 | + client=client, |
| 119 | + instructions=( |
| 120 | + "You are a helpful assistant that can analyze uploaded files. " |
| 121 | + "When users upload files, examine their content and provide helpful insights. " |
| 122 | + "You can identify file types, summarize content, and answer questions about the files." |
| 123 | + ), |
| 124 | + name="FileAnalyzer", |
| 125 | + ) |
| 126 | + |
| 127 | + # 5. Demonstrate multi-modal message composition |
| 128 | + # This showcases combining text and binary content in a single message |
| 129 | + |
| 130 | + # Compose a message containing both text instructions and file attachments |
| 131 | + # This pattern is ideal for scenarios requiring rich, mixed-content interactions |
| 132 | + initial_message = ChatMessageContent( |
| 133 | + role=AuthorRole.USER, |
| 134 | + items=[ |
| 135 | + TextContent(text="I'm uploading a PDF document and a text file for you to analyze."), |
| 136 | + pdf_binary_content, |
| 137 | + text_binary_content, |
| 138 | + ], |
| 139 | + ) |
| 140 | + |
| 141 | + # 6. Conduct a conversation with the agent about the uploaded files |
| 142 | + thread = None |
| 143 | + |
| 144 | + # Send the initial multi-modal message containing file uploads |
| 145 | + print("\n# User: 'I'm uploading a PDF document and a text file for you to analyze.'") |
| 146 | + first_chunk = True |
| 147 | + async for response in agent.invoke_stream(messages=initial_message, thread=thread): |
| 148 | + thread = response.thread |
| 149 | + if first_chunk: |
| 150 | + print(f"# {response.name}: ", end="", flush=True) |
| 151 | + first_chunk = False |
| 152 | + print(response.content, end="", flush=True) |
| 153 | + print() # New line after response |
| 154 | + |
| 155 | + # Continue the conversation with text-based follow-up questions |
| 156 | + for user_input in USER_INPUTS: |
| 157 | + print(f"\n# User: '{user_input}'") |
| 158 | + |
| 159 | + # Process follow-up questions using standard text input |
| 160 | + first_chunk = True |
| 161 | + async for response in agent.invoke_stream(messages=user_input, thread=thread): |
| 162 | + thread = response.thread |
| 163 | + if first_chunk: |
| 164 | + print(f"# {response.name}: ", end="", flush=True) |
| 165 | + first_chunk = False |
| 166 | + print(response.content, end="", flush=True) |
| 167 | + print() # New line after response |
| 168 | + |
| 169 | + finally: |
| 170 | + # 7. Clean up temporary resources |
| 171 | + if os.path.exists(text_file_path): |
| 172 | + os.unlink(text_file_path) |
| 173 | + |
| 174 | + print("\n" + "=" * 60) |
| 175 | + print("Sample completed!") |
| 176 | + print("\nKey points about BinaryContent:") |
| 177 | + print("1. Use BinaryContent.from_file() to create from existing files") |
| 178 | + print("2. Use BinaryContent(data=...) to create from bytes/string data") |
| 179 | + print("3. Specify appropriate mime_type for proper handling") |
| 180 | + print("4. BinaryContent can be included in chat messages alongside text") |
| 181 | + print("5. The OpenAI Responses API will process supported file types") |
| 182 | + print("\nSupported file types include:") |
| 183 | + print("- PDF documents (application/pdf)") |
| 184 | + print("- Text files (text/plain)") |
| 185 | + |
| 186 | + |
| 187 | +if __name__ == "__main__": |
| 188 | + asyncio.run(main()) |
0 commit comments