|
3 | 3 |
|
4 | 4 | import aiohttp
|
5 | 5 | import pytest
|
| 6 | +from openai.types import CompletionUsage |
| 7 | +from openai.types.chat import ChatCompletion, ChatCompletionMessage |
| 8 | +from openai.types.chat.chat_completion import Choice |
6 | 9 |
|
7 |
| -from prepdocslib.mediadescriber import ContentUnderstandingDescriber |
| 10 | +from prepdocslib.mediadescriber import ( |
| 11 | + ContentUnderstandingDescriber, |
| 12 | + MultimodalModelDescriber, |
| 13 | +) |
8 | 14 |
|
9 | 15 | from .mocks import MockAzureCredential, MockResponse
|
10 | 16 |
|
@@ -133,3 +139,115 @@ def mock_put(self, *args, **kwargs):
|
133 | 139 | )
|
134 | 140 | with pytest.raises(Exception):
|
135 | 141 | await describer_bad_analyze.describe_image(b"imagebytes")
|
| 142 | + |
| 143 | + |
| 144 | +class MockAsyncOpenAI: |
| 145 | + def __init__(self, test_response): |
| 146 | + self.chat = type("MockChat", (), {})() |
| 147 | + self.chat.completions = MockChatCompletions(test_response) |
| 148 | + |
| 149 | + |
| 150 | +class MockChatCompletions: |
| 151 | + def __init__(self, test_response): |
| 152 | + self.test_response = test_response |
| 153 | + self.create_calls = [] |
| 154 | + |
| 155 | + async def create(self, *args, **kwargs): |
| 156 | + self.create_calls.append(kwargs) |
| 157 | + return self.test_response |
| 158 | + |
| 159 | + |
| 160 | +@pytest.mark.asyncio |
| 161 | +@pytest.mark.parametrize( |
| 162 | + "model, deployment, expected_model_param", |
| 163 | + [ |
| 164 | + ("gpt-4o-mini", None, "gpt-4o-mini"), # Test with model name only |
| 165 | + ("gpt-4-vision-preview", "my-vision-deployment", "my-vision-deployment"), # Test with deployment name |
| 166 | + ], |
| 167 | +) |
| 168 | +async def test_multimodal_model_describer(monkeypatch, model, deployment, expected_model_param): |
| 169 | + # Sample image bytes - a minimal valid PNG |
| 170 | + image_bytes = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x04\x00\x00\x00\xb5\x1c\x0c\x02\x00\x00\x00\x0bIDATx\xdac\xfc\xff\xff?\x00\x05\xfe\x02\xfe\xa3\xb8\xfb\x26\x00\x00\x00\x00IEND\xaeB`\x82" |
| 171 | + |
| 172 | + # Expected description from the model |
| 173 | + expected_description = "This is a chart showing financial data trends over time." |
| 174 | + |
| 175 | + # Create a mock OpenAI chat completion response |
| 176 | + mock_response = ChatCompletion( |
| 177 | + id="chatcmpl-123", |
| 178 | + choices=[ |
| 179 | + Choice( |
| 180 | + index=0, |
| 181 | + message=ChatCompletionMessage(content=expected_description, role="assistant"), |
| 182 | + finish_reason="stop", |
| 183 | + ) |
| 184 | + ], |
| 185 | + created=1677652288, |
| 186 | + model=expected_model_param, |
| 187 | + object="chat.completion", |
| 188 | + usage=CompletionUsage(completion_tokens=25, prompt_tokens=50, total_tokens=75), |
| 189 | + ) |
| 190 | + |
| 191 | + # Create mock OpenAI client |
| 192 | + mock_openai_client = MockAsyncOpenAI(mock_response) |
| 193 | + |
| 194 | + # Create the describer with the mock client |
| 195 | + describer = MultimodalModelDescriber(openai_client=mock_openai_client, model=model, deployment=deployment) |
| 196 | + |
| 197 | + # Call the method under test |
| 198 | + result = await describer.describe_image(image_bytes) |
| 199 | + |
| 200 | + # Verify the result matches our expected description |
| 201 | + assert result == expected_description |
| 202 | + |
| 203 | + # Verify the API was called with the correct parameters |
| 204 | + assert len(mock_openai_client.chat.completions.create_calls) == 1 |
| 205 | + call_args = mock_openai_client.chat.completions.create_calls[0] |
| 206 | + |
| 207 | + # Check model parameter - should be either the model or deployment based on our test case |
| 208 | + assert call_args["model"] == expected_model_param |
| 209 | + |
| 210 | + # Check that max_tokens was set |
| 211 | + assert call_args["max_tokens"] == 500 |
| 212 | + |
| 213 | + # Check system message |
| 214 | + messages = call_args["messages"] |
| 215 | + assert len(messages) == 2 |
| 216 | + assert messages[0]["role"] == "system" |
| 217 | + assert "helpful assistant" in messages[0]["content"] |
| 218 | + |
| 219 | + # Check user message with image |
| 220 | + assert messages[1]["role"] == "user" |
| 221 | + assert len(messages[1]["content"]) == 2 |
| 222 | + assert messages[1]["content"][0]["type"] == "text" |
| 223 | + assert "Describe image" in messages[1]["content"][0]["text"] |
| 224 | + assert messages[1]["content"][1]["type"] == "image_url" |
| 225 | + assert "data:image/png;base64," in messages[1]["content"][1]["image_url"]["url"] |
| 226 | + |
| 227 | + |
| 228 | +@pytest.mark.asyncio |
| 229 | +async def test_multimodal_model_describer_empty_response(monkeypatch): |
| 230 | + # Sample image bytes |
| 231 | + image_bytes = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x04\x00\x00\x00\xb5\x1c\x0c\x02\x00\x00\x00\x0bIDATx\xdac\xfc\xff\xff?\x00\x05\xfe\x02\xfe\xa3\xb8\xfb\x26\x00\x00\x00\x00IEND\xaeB`\x82" |
| 232 | + |
| 233 | + # Create mock response with empty content |
| 234 | + mock_response = ChatCompletion( |
| 235 | + id="chatcmpl-789", |
| 236 | + choices=[], # Empty choices array |
| 237 | + created=1677652288, |
| 238 | + model="gpt-4o-mini", |
| 239 | + object="chat.completion", |
| 240 | + usage=CompletionUsage(completion_tokens=0, prompt_tokens=50, total_tokens=50), |
| 241 | + ) |
| 242 | + |
| 243 | + # Create mock OpenAI client |
| 244 | + mock_openai_client = MockAsyncOpenAI(mock_response) |
| 245 | + |
| 246 | + # Create the describer |
| 247 | + describer = MultimodalModelDescriber(openai_client=mock_openai_client, model="gpt-4o-mini", deployment=None) |
| 248 | + |
| 249 | + # Call the method under test |
| 250 | + result = await describer.describe_image(image_bytes) |
| 251 | + |
| 252 | + # Verify that an empty string is returned when no choices in response |
| 253 | + assert result == "" |
0 commit comments