forked from Vijayavallabh/ISRO-GeoNLI
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapi_models.py
More file actions
85 lines (67 loc) · 2.65 KB
/
api_models.py
File metadata and controls
85 lines (67 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""
API Request/Response Models
Pydantic models for FastAPI endpoints supporting captioning, grounding, and VQA tasks.
"""
from typing import Optional, Dict, Any
from pydantic import BaseModel
# ============================================================================
# Request/Response Models
# ============================================================================
class ImageMetadata(BaseModel):
"""Optional metadata about the input image."""
width: Optional[int] = None
height: Optional[int] = None
spatial_resolution_m: Optional[float] = None
class InputImage(BaseModel):
"""
Input image specification supporting three input methods:
- image_url: HTTP/HTTPS URL to download image
- image_base64: Base64-encoded image data (with or without data URL prefix)
- image_path: Local filesystem path to image file
"""
image_id: Optional[str] = None
image_url: Optional[str] = None
image_base64: Optional[str] = None
image_path: Optional[str] = None # Local file system path
metadata: Optional[ImageMetadata] = None
class CaptionQuery(BaseModel):
"""Request for image caption generation."""
instruction: str = "Generate a detailed caption."
class GroundingQuery(BaseModel):
"""Request for object detection/grounding in image."""
instruction: str
class AttributeQuery(BaseModel):
"""
Request for Visual Question Answering (VQA).
Supports three types:
- binary: Yes/No questions
- numeric: Questions about counts/quantities
- semantic: Questions about attributes/types/colors
"""
binary: Optional[Dict[str, str]] = None
numeric: Optional[Dict[str, str]] = None
semantic: Optional[Dict[str, str]] = None
spatial_resolution_m : Optional[float] = None
class Queries(BaseModel):
"""Collection of query types that can be processed together."""
caption_query: Optional[CaptionQuery] = None
grounding_query: Optional[GroundingQuery] = None
attribute_query: Optional[AttributeQuery] = None
class StructuredRequest(BaseModel):
"""
Structured input format matching query.json schema.
Used by /process endpoint for explicit query type specification.
"""
input_image: InputImage
queries: Queries
class SimpleRequest(BaseModel):
"""
Simple text query format - will be automatically classified.
Used by /query endpoint for natural language queries.
The query will be classified using LLM into appropriate task type.
"""
query: str
image_url: Optional[str] = None
image_base64: Optional[str] = None
image_path: Optional[str] = None # Local file system path
spatial_resolution_m : Optional[float] = None