@@ -60,14 +60,32 @@ def _process_images(images: torch.Tensor) -> List[Any]:
6060
6161# Pydantic models for structured generation
6262class VQAnswer (BaseModel ):
63- """Structured output for VQA."""
63+ """
64+ Structured output for VQA.
65+
66+ Parameters
67+ ----------
68+ answer : str
69+ The VQA answer text.
70+ confidence : float, optional
71+ Confidence score. Default is 1.0.
72+ """
6473
6574 answer : str
6675 confidence : float = 1.0
6776
6877
6978class ScoreOutput (BaseModel ):
70- """Structured output for scoring metrics."""
79+ """
80+ Structured output for scoring metrics.
81+
82+ Parameters
83+ ----------
84+ score : float
85+ The numeric score.
86+ reasoning : str | None, optional
87+ Optional reasoning for the score.
88+ """
7189
7290 score : float
7391 reasoning : Optional [str ] = None
@@ -89,6 +107,8 @@ class VQAMetric(StatefulMetric):
89107
90108 Parameters
91109 ----------
110+ *args : Any
111+ Additional positional arguments.
92112 vlm_type : {"litellm", "transformers"}, optional
93113 VLM backend to use. Default is "litellm".
94114 model_name : str, optional
@@ -101,6 +121,8 @@ class VQAMetric(StatefulMetric):
101121 Device for transformers VLM.
102122 api_key : str | None, optional
103123 API key for litellm.
124+ call_type : str, optional
125+ Call type for the metric.
104126 **kwargs : Any
105127 Additional arguments.
106128 """
@@ -190,10 +212,22 @@ class AlignmentScoreMetric(StatefulMetric):
190212
191213 Parameters
192214 ----------
215+ *args : Any
216+ Additional positional arguments.
193217 vlm_type : {"litellm", "transformers"}, optional
194218 VLM backend. Default is "litellm".
219+ model_name : str, optional
220+ Model name. Default is "gpt-4o".
195221 structured_output : bool, optional
196222 Use structured generation. Default is True.
223+ use_outlines : bool, optional
224+ Use outlines for transformers. Default is False.
225+ device : str | torch.device | None, optional
226+ Device for transformers VLM.
227+ api_key : str | None, optional
228+ API key for litellm.
229+ call_type : str, optional
230+ Call type for the metric.
197231 **kwargs : Any
198232 Additional arguments.
199233 """
@@ -277,6 +311,27 @@ class ImageEditScoreMetric(StatefulMetric):
277311 Reference
278312 ----------
279313 VieScore: https://github.com/ByteDance/IEA-eval
314+
315+ Parameters
316+ ----------
317+ *args : Any
318+ Additional positional arguments.
319+ vlm_type : {"litellm", "transformers"}, optional
320+ VLM backend. Default is "litellm".
321+ model_name : str, optional
322+ Model name. Default is "gpt-4o".
323+ structured_output : bool, optional
324+ Use structured generation. Default is True.
325+ use_outlines : bool, optional
326+ Use outlines for transformers. Default is False.
327+ device : str | torch.device | None, optional
328+ Device for transformers VLM.
329+ api_key : str | None, optional
330+ API key for litellm.
331+ call_type : str, optional
332+ Call type for the metric.
333+ **kwargs : Any
334+ Additional arguments.
280335 """
281336
282337 scores : List [float ]
@@ -361,6 +416,27 @@ class QAAccuracyMetric(StatefulMetric):
361416
362417 Uses VLM to answer questions about images.
363418 Higher scores indicate better image understanding.
419+
420+ Parameters
421+ ----------
422+ *args : Any
423+ Additional positional arguments.
424+ vlm_type : {"litellm", "transformers"}, optional
425+ VLM backend. Default is "litellm".
426+ model_name : str, optional
427+ Model name. Default is "gpt-4o".
428+ structured_output : bool, optional
429+ Use structured generation. Default is True.
430+ use_outlines : bool, optional
431+ Use outlines for transformers. Default is False.
432+ device : str | torch.device | None, optional
433+ Device for transformers VLM.
434+ api_key : str | None, optional
435+ API key for litellm.
436+ call_type : str, optional
437+ Call type for the metric.
438+ **kwargs : Any
439+ Additional arguments.
364440 """
365441
366442 scores : List [float ]
@@ -437,6 +513,27 @@ class TextScoreMetric(StatefulMetric):
437513
438514 Uses VLM for OCR to extract text and compare with ground truth.
439515 Lower scores (edit distance) are better.
516+
517+ Parameters
518+ ----------
519+ *args : Any
520+ Additional positional arguments.
521+ vlm_type : {"litellm", "transformers"}, optional
522+ VLM backend. Default is "litellm".
523+ model_name : str, optional
524+ Model name. Default is "gpt-4o".
525+ structured_output : bool, optional
526+ Use structured generation. Default is True.
527+ use_outlines : bool, optional
528+ Use outlines for transformers. Default is False.
529+ device : str | torch.device | None, optional
530+ Device for transformers VLM.
531+ api_key : str | None, optional
532+ API key for litellm.
533+ call_type : str, optional
534+ Call type for the metric.
535+ **kwargs : Any
536+ Additional arguments.
440537 """
441538
442539 scores : List [float ]
@@ -522,6 +619,27 @@ class VieScoreMetric(StatefulMetric):
522619 - Semantic score: How well image follows prompt
523620 - Quality score: Naturalness and artifacts
524621 - Overall: Geometric mean of semantic and quality
622+
623+ Parameters
624+ ----------
625+ *args : Any
626+ Additional positional arguments.
627+ vlm_type : {"litellm", "transformers"}, optional
628+ VLM backend. Default is "litellm".
629+ model_name : str, optional
630+ Model name. Default is "gpt-4o".
631+ structured_output : bool, optional
632+ Use structured generation. Default is True.
633+ use_outlines : bool, optional
634+ Use outlines for transformers. Default is False.
635+ device : str | torch.device | None, optional
636+ Device for transformers VLM.
637+ api_key : str | None, optional
638+ API key for litellm.
639+ call_type : str, optional
640+ Call type for the metric.
641+ **kwargs : Any
642+ Additional arguments.
525643 """
526644
527645 scores : List [float ]
0 commit comments