tests for VLM calls

HendrikStrobelt · HendrikStrobelt · commit 542b9f15fc01 · 2025-09-10T11:59:42.000-04:00
diff --git a/test/stdlib_basics/test_vision.py b/test/stdlib_basics/test_vision.py
@@ -0,0 +1,101 @@
+import base64
+import os
+from io import BytesIO
+
+import numpy as np
+from PIL import Image
+import pytest
+
+from mellea import start_session, MelleaSession
+from mellea.backends import ModelOption
+from mellea.stdlib.base import ImageBlock, ModelOutputThunk
+from mellea.stdlib.chat import Message
+
+
+@pytest.fixture(scope="module")
+def m_session(gh_run):
+    if gh_run == 1:
+        m = start_session(
+            "openai",
+            model_id="llama3.2:1b",
+            base_url=f"http://{os.environ.get('OLLAMA_HOST', 'localhost:11434')}/v1",
+            api_key="ollama",
+            model_options={ModelOption.MAX_NEW_TOKENS: 5},
+        )
+    else:
+        m = start_session(
+            "ollama",
+            model_id="granite3.2-vision",
+            model_options={ModelOption.MAX_NEW_TOKENS: 5},
+        )
+    yield m
+    del m
+
+
+@pytest.fixture(scope="module")
+def pil_image():
+    width = 200
+    height = 150
+    random_pixel_data = np.random.randint(0, 256, size=(height, width, 3), dtype=np.uint8)
+    random_image = Image.fromarray(random_pixel_data, 'RGB')
+    yield random_image
+    del random_image
+
+
+def test_image_block_construction(pil_image: Image.Image):
+    # create base64 PNG string from image:
+    buffered = BytesIO()
+    pil_image.save(buffered, format="PNG")
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+
+    image_block = ImageBlock(img_str)
+    assert isinstance(image_block, ImageBlock)
+    assert isinstance(image_block._value, str)
+
+
+def test_image_block_construction_from_pil(pil_image: Image.Image):
+    image_block = ImageBlock.from_pil_image(pil_image)
+    assert isinstance(image_block, ImageBlock)
+    assert isinstance(image_block._value, str)
+    assert ImageBlock.is_valid_base64_png(str(image_block))
+
+
+def test_image_block_in_instruction(m_session: MelleaSession, pil_image: Image.Image, gh_run: int):
+    image_block = ImageBlock.from_pil_image(pil_image)
+    instr = m_session.instruct("Is this image mainly blue? Answer yes or no.", images=[image_block])
+    assert isinstance(instr, ModelOutputThunk)
+
+    # if not on GH
+    if not gh_run == 1:
+        assert "yes" in instr.value.lower() or "no" in instr.value.lower()
+
+    # make sure you get the last prompt
+    lp = m_session.last_prompt()
+    assert isinstance(lp, list)
+    assert "images" in lp[0]
+
+    # first image in image list should be the same as the image block
+    image0_str = lp[0]["images"][0]
+    assert image0_str == image_block._value
+
+
+def test_image_block_in_chat(m_session: MelleaSession, pil_image: Image.Image, gh_run: int):
+    ct = m_session.chat("Is this image mainly blue? Answer yes or no.", images=[pil_image])
+    assert isinstance(ct, Message)
+
+    # if not on GH
+    if not gh_run == 1:
+        assert "yes" in ct.content.lower() or "no" in ct.content.lower()
+
+    # make sure you get the last prompt
+    lp = m_session.last_prompt()
+    assert isinstance(lp, list)
+    assert "images" in lp[0]
+
+    # first image in image list should be the same as the image block
+    image0_str = lp[0]["images"][0]
+    assert image0_str == ImageBlock.from_pil_image(pil_image)._value
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])