Support wan2.5 (#65)

mose-zm · mose-x.zm · web-flow · commit c4542fa412b1 · 2025-10-20T10:53:01.000+08:00
Co-authored-by: mose-x.zm &lt;zm02074348@alibaba-inc.com&gt;
diff --git a/dashscope/aigc/video_synthesis.py b/dashscope/aigc/video_synthesis.py
@@ -38,6 +38,7 @@ def call(cls,
              negative_prompt: str = None,
              template: str = None,
              img_url: str = None,
+             audio_url: str = None,
              api_key: str = None,
              extra_input: Dict = None,
              workspace: str = None,
@@ -56,6 +57,7 @@ def call(cls,
             negative_prompt (str): The negative prompt is the opposite of the prompt meaning.
             template (str): LoRa input, such as gufeng, katong, etc.
             img_url (str): The input image url, Generate the URL of the image referenced by the video.
+            audio_url (str): The input audio url
             api_key (str, optional): The api api_key. Defaults to None.
             workspace (str): The dashscope workspace id.
             extra_input (Dict): The extra input parameters.
@@ -76,6 +78,7 @@ def call(cls,
         return super().call(model,
                             prompt,
                             img_url=img_url,
+                            audio_url=audio_url,
                             api_key=api_key,
                             extend_prompt=extend_prompt,
                             negative_prompt=negative_prompt,
@@ -94,6 +97,7 @@ def _get_input(cls,
                    model: str,
                    prompt: Any = None,
                    img_url: str = None,
+                   audio_url: str = None,
                    # """@deprecated, use prompt_extend in parameters """
                    extend_prompt: bool = True,
                    negative_prompt: str = None,
@@ -125,6 +129,13 @@ def _get_input(cls,
                 has_upload = True
             inputs['img_url'] = res_img_url
 
+        if audio_url is not None and audio_url:
+            is_upload, res_audio_url = check_and_upload_local(
+                model, audio_url, api_key)
+            if is_upload:
+                has_upload = True
+            inputs['audio_url'] = res_audio_url
+
         if head_frame is not None and head_frame:
             is_upload, res_head_frame = check_and_upload_local(
                 model, head_frame, api_key)
@@ -172,6 +183,7 @@ def async_call(cls,
                    model: str,
                    prompt: Any = None,
                    img_url: str = None,
+                   audio_url: str = None,
                    # """@deprecated, use prompt_extend in parameters """
                    extend_prompt: bool = True,
                    negative_prompt: str = None,
@@ -194,6 +206,7 @@ def async_call(cls,
             negative_prompt (str): The negative prompt is the opposite of the prompt meaning.
             template (str): LoRa input, such as gufeng, katong, etc.
             img_url (str): The input image url, Generate the URL of the image referenced by the video.
+            audio_url (str): The input audio url.
             api_key (str, optional): The api api_key. Defaults to None.
             workspace (str): The dashscope workspace id.
             extra_input (Dict): The extra input parameters.
@@ -215,7 +228,7 @@ def async_call(cls,
         task_group, function = _get_task_group_and_task(__name__)
 
         inputs, kwargs, task = cls._get_input(
-            model, prompt, img_url, extend_prompt, negative_prompt, template, api_key,
+            model, prompt, img_url, audio_url, extend_prompt, negative_prompt, template, api_key,
             extra_input, task, function, head_frame, tail_frame,
             first_frame_url, last_frame_url, **kwargs)
 
@@ -339,6 +352,7 @@ async def call(cls,
                    model: str,
                    prompt: Any = None,
                    img_url: str = None,
+                   audio_url: str = None,
                    # """@deprecated, use prompt_extend in parameters """
                    extend_prompt: bool = True,
                    negative_prompt: str = None,
@@ -361,6 +375,7 @@ async def call(cls,
             negative_prompt (str): The negative prompt is the opposite of the prompt meaning.
             template (str): LoRa input, such as gufeng, katong, etc.
             img_url (str): The input image url, Generate the URL of the image referenced by the video.
+            audio_url (str): The input audio url.
             api_key (str, optional): The api api_key. Defaults to None.
             workspace (str): The dashscope workspace id.
             extra_input (Dict): The extra input parameters.
@@ -380,7 +395,7 @@ async def call(cls,
         """
         task_group, f = _get_task_group_and_task(__name__)
         inputs, kwargs, task = VideoSynthesis._get_input(
-            model, prompt, img_url, extend_prompt, negative_prompt, template, api_key,
+            model, prompt, img_url, audio_url, extend_prompt, negative_prompt, template, api_key,
             extra_input, task, f, head_frame, tail_frame,
             first_frame_url, last_frame_url, **kwargs)
         response = await super().call(model, inputs, task_group, task, f, api_key, workspace, **kwargs)
@@ -391,6 +406,7 @@ async def async_call(cls,
                    model: str,
                    prompt: Any = None,
                    img_url: str = None,
+                   audio_url: str = None,
                    # """@deprecated, use prompt_extend in parameters """
                    extend_prompt: bool = True,
                    negative_prompt: str = None,
@@ -413,6 +429,7 @@ async def async_call(cls,
             negative_prompt (str): The negative prompt is the opposite of the prompt meaning.
             template (str): LoRa input, such as gufeng, katong, etc.
             img_url (str): The input image url, Generate the URL of the image referenced by the video.
+            audio_url (str): The input audio url.
             api_key (str, optional): The api api_key. Defaults to None.
             workspace (str): The dashscope workspace id.
             extra_input (Dict): The extra input parameters.
@@ -434,7 +451,7 @@ async def async_call(cls,
         task_group, function = _get_task_group_and_task(__name__)
 
         inputs, kwargs, task = VideoSynthesis._get_input(
-            model, prompt, img_url, extend_prompt, negative_prompt, template, api_key,
+            model, prompt, img_url, audio_url, extend_prompt, negative_prompt, template, api_key,
             extra_input, task, function, head_frame, tail_frame,
             first_frame_url, last_frame_url, **kwargs)
 
diff --git a/samples/test_video_synthesis.py b/samples/test_video_synthesis.py
@@ -0,0 +1,25 @@
+from http import HTTPStatus
+from dashscope import VideoSynthesis
+import os
+
+prompt = "一幅史诗级可爱的场景。一只小巧可爱的卡通小猫将军，身穿细节精致的金色盔甲，头戴一个稍大的头盔，勇敢地站在悬崖上。他骑着一匹虽小但英勇的战马。悬崖下方，一支由老鼠组成的、数量庞大、无穷无尽的军队正带着临时制作的武器向前冲锋。这是一个戏剧性的、大规模的战斗场景，灵感来自中国古代的战争史诗。远处的雪山上空，天空乌云密布。整体氛围是“可爱”与“霸气”的搞笑和史诗般的融合"
+audio_url = 'https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250925/ozwpvi/rap.mp3'
+api_key = os.getenv("DASHSCOPE_API_KEY")
+
+
+def simple_call():
+    print('----sync call, please wait a moment----')
+    rsp = VideoSynthesis.call(api_key=api_key,
+                              model="wan2.5-t2v-preview",
+                              prompt=prompt,
+                              audio_url=audio_url)
+    if rsp.status_code == HTTPStatus.OK:
+
+        print('response: %s' % rsp)
+    else:
+        print('sync_call Failed, status_code: %s, code: %s, message: %s' %
+              (rsp.status_code, rsp.code, rsp.message))
+
+
+if __name__ == '__main__':
+    simple_call()