refactor: extracted common logic for vlm tasks inputs and answers to base ImageReasoningTaskInput and ImageReasoningAnswer classes

Magdalena Kotynia · Magdalena Kotynia · commit 89c8bbfa2469 · 2025-08-12T13:20:05.000+02:00
diff --git a/src/rai_bench/rai_bench/vlm_benchmark/interfaces.py b/src/rai_bench/rai_bench/vlm_benchmark/interfaces.py
@@ -18,7 +18,7 @@
 
 from langchain_core.messages import BaseMessage
 from langchain_core.runnables.config import DEFAULT_RECURSION_LIMIT
-from pydantic import BaseModel, ConfigDict, ValidationError
+from pydantic import BaseModel, ConfigDict, Field, ValidationError
 
 loggers_type = logging.Logger
 
@@ -55,6 +55,29 @@ class TaskValidationError(Exception):
     pass
 
 
+AnswerT = TypeVar("AnswerT")
+
+
+class ImageReasoningTaskInput(BaseModel, Generic[AnswerT]):
+    """Base input for an image reasoning task."""
+
+    question: str = Field(..., description="The question to be answered.")
+    images_paths: List[str] = Field(
+        ...,
+        description="List of image file paths to be used for answering the question.",
+    )
+    expected_answer: AnswerT = Field(
+        ..., description="The expected answer to the question."
+    )
+
+
+class ImageReasoningAnswer(BaseModel, Generic[AnswerT]):
+    """Base answer for an image reasoning task."""
+
+    answer: AnswerT = Field(..., description="The answer to the question.")
+    justification: str = Field(..., description="Justification for the answer.")
+
+
 class ImageReasoningTask(ABC, Generic[BaseModelT]):
     complexity: Literal["easy", "medium", "hard"]
     recursion_limit: int = DEFAULT_RECURSION_LIMIT
diff --git a/src/rai_bench/rai_bench/vlm_benchmark/tasks/tasks.py b/src/rai_bench/rai_bench/vlm_benchmark/tasks/tasks.py
@@ -16,76 +16,45 @@
 import logging
 from typing import List, Type
 
-from pydantic import BaseModel, Field
+from pydantic import Field
 from rai.messages import preprocess_image
 
-from rai_bench.vlm_benchmark.interfaces import ImageReasoningTask
+from rai_bench.vlm_benchmark.interfaces import (
+    ImageReasoningAnswer,
+    ImageReasoningTask,
+    ImageReasoningTaskInput,
+)
 
 loggers_type = logging.Logger
 
 
-class BoolAnswerWithJustification(BaseModel):
+class BoolAnswerWithJustification(ImageReasoningAnswer[bool]):
     """A boolean answer to the user question along with justification for the answer."""
 
-    answer: bool
-    justification: str
 
+class QuantityAnswerWithJustification(ImageReasoningAnswer[int]):
+    """A quantity answer telling the number of objects to the user question along with justification for the answer."""
 
-class QuantityAnswerWithJustification(BaseModel):
-    """A quantity answer to the user question along with justification for the answer."""
 
-    answer: int
-    justification: str
-
-
-class MultipleChoiceAnswerWithJustification(BaseModel):
+class MultipleChoiceAnswerWithJustification(ImageReasoningAnswer[List[str]]):
     """A multiple choice answer to the user question along with justification for the answer."""
 
-    answer: List[str]
-    justification: str
 
-
-class BoolImageTaskInput(BaseModel):
-    question: str = Field(..., description="The question to be answered.")
-    images_paths: List[str] = Field(
-        ...,
-        description="List of image file paths to be used for answering the question.",
-    )
-    expected_answer: bool = Field(
-        ..., description="The expected answer to the question."
-    )
+class BoolImageTaskInput(ImageReasoningTaskInput[bool]):
+    """Input for a task that requires a boolean answer to a question about an image."""
 
 
-class QuantityImageTaskInput(BaseModel):
+class QuantityImageTaskInput(ImageReasoningTaskInput[int]):
     """Input for a task that requires counting objects in an image."""
 
-    question: str = Field(..., description="The question to be answered.")
-    images_paths: List[str] = Field(
-        ...,
-        description="List of image file paths to be used for answering the question.",
-    )
-    expected_answer: int = Field(
-        ...,
-        description="The expected number of objects to be counted in the image.",
-    )
 
-
-class MultipleChoiceImageTaskInput(BaseModel):
+class MultipleChoiceImageTaskInput(ImageReasoningTaskInput[List[str]]):
     """Input for a task that requires selecting one or more answers from a list of options."""
 
-    question: str = Field(..., description="The question to be answered.")
-    images_paths: List[str] = Field(
-        ...,
-        description="List of image file paths to be used for answering the question.",
-    )
     options: List[str] = Field(
         ...,
         description="List of possible answers to the question.",
     )
-    expected_answer: List[str] = Field(
-        ...,
-        description="The expected answer to the question being a list of strings chosen from the options.",
-    )
 
 
 class BoolImageTask(ImageReasoningTask[BoolAnswerWithJustification]):