Skip to content

Commit e4b943f

Browse files
mose-zmmose-x.zm
andauthored
Support wan2.6 video (#85)
* support wan2.6 video generation * support wan2.6 video generation * support wan2.6 video generation --------- Co-authored-by: mose-x.zm <[email protected]>
1 parent 03e5568 commit e4b943f

File tree

3 files changed

+50
-9
lines changed

3 files changed

+50
-9
lines changed

dashscope/aigc/video_synthesis.py

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Copyright (c) Alibaba, Inc. and its affiliates.
22

3-
from typing import Any, Dict, Union
3+
from typing import Any, Dict, Union, List
44

55
from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
66
VideoSynthesisResponse)
77
from dashscope.client.base_api import BaseAsyncApi, BaseAsyncAioApi
8-
from dashscope.common.constants import PROMPT
8+
from dashscope.common.constants import PROMPT, REFERENCE_VIDEO_URLS
99
from dashscope.common.utils import _get_task_group_and_task
1010
from dashscope.utils.oss_utils import check_and_upload_local
1111

@@ -39,6 +39,8 @@ def call(cls,
3939
template: str = None,
4040
img_url: str = None,
4141
audio_url: str = None,
42+
reference_video_urls: List[str] = None,
43+
reference_video_description: List[str] = None,
4244
api_key: str = None,
4345
extra_input: Dict = None,
4446
workspace: str = None,
@@ -58,6 +60,8 @@ def call(cls,
5860
template (str): LoRa input, such as gufeng, katong, etc.
5961
img_url (str): The input image url, Generate the URL of the image referenced by the video.
6062
audio_url (str): The input audio url
63+
reference_video_urls (List[str]): list of character reference video file urls uploaded by the user
64+
reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported
6165
api_key (str, optional): The api api_key. Defaults to None.
6266
workspace (str): The dashscope workspace id.
6367
extra_input (Dict): The extra input parameters.
@@ -79,6 +83,8 @@ def call(cls,
7983
prompt,
8084
img_url=img_url,
8185
audio_url=audio_url,
86+
reference_video_urls=reference_video_urls,
87+
reference_video_description=reference_video_description,
8288
api_key=api_key,
8389
extend_prompt=extend_prompt,
8490
negative_prompt=negative_prompt,
@@ -98,6 +104,8 @@ def _get_input(cls,
98104
prompt: Any = None,
99105
img_url: str = None,
100106
audio_url: str = None,
107+
reference_video_urls: List[str] = None,
108+
reference_video_description: List[str] = None,
101109
# """@deprecated, use prompt_extend in parameters """
102110
extend_prompt: bool = True,
103111
negative_prompt: str = None,
@@ -119,6 +127,8 @@ def _get_input(cls,
119127
inputs['template'] = template
120128
if function:
121129
inputs['function'] = function
130+
if reference_video_description:
131+
inputs['reference_video_description'] = reference_video_description
122132

123133
has_upload = False
124134
upload_certificate = None
@@ -165,6 +175,17 @@ def _get_input(cls,
165175
has_upload = True
166176
inputs['last_frame_url'] = res_last_frame_url
167177

178+
if (reference_video_urls is not None
179+
and reference_video_urls and len(reference_video_urls) > 0):
180+
new_videos = []
181+
for video in reference_video_urls:
182+
is_upload, new_video, upload_certificate = check_and_upload_local(
183+
model, video, api_key, upload_certificate)
184+
if is_upload:
185+
has_upload = True
186+
new_videos.append(new_video)
187+
inputs[REFERENCE_VIDEO_URLS] = new_videos
188+
168189
if extra_input is not None and extra_input:
169190
inputs = {**inputs, **extra_input}
170191
if has_upload:
@@ -185,6 +206,8 @@ def async_call(cls,
185206
prompt: Any = None,
186207
img_url: str = None,
187208
audio_url: str = None,
209+
reference_video_urls: List[str] = None,
210+
reference_video_description: List[str] = None,
188211
# """@deprecated, use prompt_extend in parameters """
189212
extend_prompt: bool = True,
190213
negative_prompt: str = None,
@@ -208,6 +231,8 @@ def async_call(cls,
208231
template (str): LoRa input, such as gufeng, katong, etc.
209232
img_url (str): The input image url, Generate the URL of the image referenced by the video.
210233
audio_url (str): The input audio url.
234+
reference_video_urls (List[str]): list of character reference video file urls uploaded by the user
235+
reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported
211236
api_key (str, optional): The api api_key. Defaults to None.
212237
workspace (str): The dashscope workspace id.
213238
extra_input (Dict): The extra input parameters.
@@ -229,7 +254,8 @@ def async_call(cls,
229254
task_group, function = _get_task_group_and_task(__name__)
230255

231256
inputs, kwargs, task = cls._get_input(
232-
model, prompt, img_url, audio_url, extend_prompt, negative_prompt, template, api_key,
257+
model, prompt, img_url, audio_url, reference_video_urls, reference_video_description,
258+
extend_prompt, negative_prompt, template, api_key,
233259
extra_input, task, function, head_frame, tail_frame,
234260
first_frame_url, last_frame_url, **kwargs)
235261

@@ -354,6 +380,8 @@ async def call(cls,
354380
prompt: Any = None,
355381
img_url: str = None,
356382
audio_url: str = None,
383+
reference_video_urls: List[str] = None,
384+
reference_video_description: List[str] = None,
357385
# """@deprecated, use prompt_extend in parameters """
358386
extend_prompt: bool = True,
359387
negative_prompt: str = None,
@@ -377,6 +405,8 @@ async def call(cls,
377405
template (str): LoRa input, such as gufeng, katong, etc.
378406
img_url (str): The input image url, Generate the URL of the image referenced by the video.
379407
audio_url (str): The input audio url.
408+
reference_video_urls (List[str]): list of character reference video file urls uploaded by the user
409+
reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported
380410
api_key (str, optional): The api api_key. Defaults to None.
381411
workspace (str): The dashscope workspace id.
382412
extra_input (Dict): The extra input parameters.
@@ -396,7 +426,8 @@ async def call(cls,
396426
"""
397427
task_group, f = _get_task_group_and_task(__name__)
398428
inputs, kwargs, task = VideoSynthesis._get_input(
399-
model, prompt, img_url, audio_url, extend_prompt, negative_prompt, template, api_key,
429+
model, prompt, img_url, audio_url, reference_video_urls, reference_video_description,
430+
extend_prompt, negative_prompt, template, api_key,
400431
extra_input, task, f, head_frame, tail_frame,
401432
first_frame_url, last_frame_url, **kwargs)
402433
response = await super().call(model, inputs, task_group, task, f, api_key, workspace, **kwargs)
@@ -408,6 +439,8 @@ async def async_call(cls,
408439
prompt: Any = None,
409440
img_url: str = None,
410441
audio_url: str = None,
442+
reference_video_urls: List[str] = None,
443+
reference_video_description: List[str] = None,
411444
# """@deprecated, use prompt_extend in parameters """
412445
extend_prompt: bool = True,
413446
negative_prompt: str = None,
@@ -431,6 +464,8 @@ async def async_call(cls,
431464
template (str): LoRa input, such as gufeng, katong, etc.
432465
img_url (str): The input image url, Generate the URL of the image referenced by the video.
433466
audio_url (str): The input audio url.
467+
reference_video_urls (List[str]): list of character reference video file urls uploaded by the user
468+
reference_video_description (List[str]): For the description information of the picture and sound of the reference video, corresponding to ref video, it needs to be in the order of the url. If the quantity is different, an error will be reported
434469
api_key (str, optional): The api api_key. Defaults to None.
435470
workspace (str): The dashscope workspace id.
436471
extra_input (Dict): The extra input parameters.
@@ -452,7 +487,8 @@ async def async_call(cls,
452487
task_group, function = _get_task_group_and_task(__name__)
453488

454489
inputs, kwargs, task = VideoSynthesis._get_input(
455-
model, prompt, img_url, audio_url, extend_prompt, negative_prompt, template, api_key,
490+
model, prompt, img_url, audio_url, reference_video_urls, reference_video_description,
491+
extend_prompt, negative_prompt, template, api_key,
456492
extra_input, task, function, head_frame, tail_frame,
457493
first_frame_url, last_frame_url, **kwargs)
458494

dashscope/common/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
HISTORY = 'history'
2626
CUSTOMIZED_MODEL_ID = 'customized_model_id'
2727
IMAGES = 'images'
28+
REFERENCE_VIDEO_URLS = 'reference_video_urls'
2829
TEXT_EMBEDDING_INPUT_KEY = 'texts'
2930
SERVICE_503_MESSAGE = 'Service temporarily unavailable, possibly overloaded or not ready.' # noqa E501
3031
WEBSOCKET_ERROR_CODE = 44

samples/test_video_synthesis.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,21 @@
22
from dashscope import VideoSynthesis
33
import os
44

5-
prompt = "一幅史诗级可爱的场景。一只小巧可爱的卡通小猫将军,身穿细节精致的金色盔甲,头戴一个稍大的头盔,勇敢地站在悬崖上。他骑着一匹虽小但英勇的战马。悬崖下方,一支由老鼠组成的、数量庞大、无穷无尽的军队正带着临时制作的武器向前冲锋。这是一个戏剧性的、大规模的战斗场景,灵感来自中国古代的战争史诗。远处的雪山上空,天空乌云密布。整体氛围是“可爱”与“霸气”的搞笑和史诗般的融合"
5+
prompt = "一只小猫在月光下奔跑"
66
audio_url = 'https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250925/ozwpvi/rap.mp3'
7+
reference_video_urls = ["https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/with_human_voice_11s.mov"]
78
api_key = os.getenv("DASHSCOPE_API_KEY")
89

910

1011
def simple_call():
1112
print('----sync call, please wait a moment----')
1213
rsp = VideoSynthesis.call(api_key=api_key,
13-
model="wan2.5-t2v-preview",
14-
prompt=prompt,
15-
audio_url=audio_url)
14+
model="wan2.6-r2v",
15+
reference_video_urls=reference_video_urls,
16+
shot_type="multi",
17+
audio=True,
18+
watermark=True,
19+
prompt=prompt)
1620
if rsp.status_code == HTTPStatus.OK:
1721

1822
print('response: %s' % rsp)

0 commit comments

Comments
 (0)