Skip to content

Commit 6b0e432

Browse files
committed
feat: Add support for image-to-image and image-to-video generation; enhance upload functionality and create test scripts for validation
1 parent 5d9cea9 commit 6b0e432

File tree

3 files changed

+67
-11
lines changed

3 files changed

+67
-11
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,5 @@ QUICK_FIX_GUIDE.md
110110
# Don't include cloudflare-worker files
111111
cloudflare-worker
112112
# network.json
113-
# network-*.json
113+
# network-*.json
114+
test_api.html

src/metaai_api/api_server.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ class ImageRequest(BaseModel):
162162
media_ids: Optional[list] = None
163163
attachment_metadata: Optional[dict] = None # {'file_size': int, 'mime_type': str}
164164
orientation: Optional[str] = None # 'VERTICAL', 'LANDSCAPE' (not HORIZONTAL), or 'SQUARE'
165+
num_images: int = Field(1, ge=1, le=4) # Number of images to generate (1-4)
165166

166167

167168
class VideoRequest(BaseModel):
@@ -314,13 +315,16 @@ async def image(body: ImageRequest) -> Dict[str, Any]:
314315
)
315316
ai = _meta_ai_instance
316317
try:
318+
# Determine number of images: use 4 for image-to-image, 1 for text-to-image
319+
num_images = 4 if body.media_ids else body.num_images
320+
317321
# Use the new generation API with timeout protection
318322
result = await asyncio.wait_for(
319323
run_in_threadpool(
320324
ai.generate_image_new,
321325
prompt=body.prompt,
322326
orientation=body.orientation or "VERTICAL",
323-
num_images=1,
327+
num_images=num_images,
324328
media_ids=body.media_ids,
325329
attachment_metadata=body.attachment_metadata
326330
),

src/metaai_api/generation.py

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,52 @@ def _build_base_variables(
138138
if media_ids:
139139
attachments_v2 = [str(mid) for mid in media_ids]
140140

141+
# Determine operation type based on media_ids presence
142+
is_image_to_image = operation == "TEXT_TO_IMAGE" and media_ids and len(media_ids) > 0
143+
is_image_to_video = operation == "TEXT_TO_VIDEO" and media_ids and len(media_ids) > 0
144+
145+
# Build imagineOperationRequest based on operation type
146+
if is_image_to_image:
147+
# Use imageToImageParams for image-to-image generation
148+
num_media = kwargs.get('num_images', 4)
149+
imagine_request = {
150+
"operation": "IMAGE_TO_IMAGE",
151+
"imageToImageParams": {
152+
"sourceMediaEntId": str(media_ids[0]),
153+
"instruction": prompt,
154+
"imageSource": "USER_UPLOADED",
155+
"imageUploadType": "GENAI_UPLOADED_FILE",
156+
"mediaType": "UPLOADED_IMAGE",
157+
"numMedia": num_media
158+
}
159+
}
160+
elif is_image_to_video:
161+
# Use imageToVideoParams for image-to-video generation
162+
imagine_request = {
163+
"operation": "IMAGE_TO_VIDEO",
164+
"imageToVideoParams": {
165+
"sourceMediaEntId": str(media_ids[0]),
166+
"prompt": prompt,
167+
"numMedia": 1
168+
}
169+
}
170+
else:
171+
# Use textToImageParams or textToVideoParams for text-based generation
172+
if operation == "TEXT_TO_VIDEO":
173+
imagine_request = {
174+
"operation": operation,
175+
"textToVideoParams": {
176+
"prompt": prompt
177+
}
178+
}
179+
else:
180+
imagine_request = {
181+
"operation": operation,
182+
"textToImageParams": {
183+
"prompt": prompt
184+
}
185+
}
186+
141187
variables = {
142188
"conversationId": conversation_id,
143189
"content": content,
@@ -153,12 +199,7 @@ def _build_base_variables(
153199
"mentions": None,
154200
"clippyIp": None,
155201
"isNewConversation": kwargs.get('is_new_conversation', True),
156-
"imagineOperationRequest": {
157-
"operation": operation,
158-
"textToImageParams": {
159-
"prompt": prompt
160-
}
161-
},
202+
"imagineOperationRequest": imagine_request,
162203
"qplJoinId": None,
163204
"clientTimezone": kwargs.get('timezone', "UTC"),
164205
"developerOverridesForMessage": None,
@@ -208,9 +249,19 @@ def generate_image(
208249
)
209250

210251
# Add image-specific parameters
211-
variables["imagineOperationRequest"]["textToImageParams"]["orientation"] = self._normalize_orientation(orientation)
212-
if num_images > 1:
213-
self.logger.warning("num_images > 1 is not supported by this endpoint; generating a single image")
252+
# Check if we're doing image-to-image generation
253+
media_ids = kwargs.get('media_ids')
254+
is_image_to_image = media_ids and len(media_ids) > 0
255+
256+
if is_image_to_image:
257+
# For image-to-image, orientation is not in imageToImageParams
258+
# numMedia is already set in _build_base_variables
259+
self.logger.info(f"Using IMAGE_TO_IMAGE operation with source media: {media_ids[0]}")
260+
else:
261+
# For text-to-image, add orientation to textToImageParams
262+
variables["imagineOperationRequest"]["textToImageParams"]["orientation"] = self._normalize_orientation(orientation)
263+
if num_images > 1:
264+
self.logger.warning("num_images > 1 is not supported by this endpoint; generating a single image")
214265

215266
payload = {
216267
"doc_id": self.IMAGE_DOC_ID,

0 commit comments

Comments
 (0)