feat: kwai kolors (#262)

timerring · web-flow · commit b97f291f5398 · 2025-04-05T20:15:38.000+08:00
* feat: kwai kolors
* docs: update docs
* chore: format
diff --git a/README.md b/README.md
@@ -18,7 +18,8 @@
   <img src="assets/qwen-color.svg" alt="Qwen-2.5-72B-Instruct" width="60" height="60" />
   <img src="assets/minimax-color.svg" alt="Minimax" width="20" height="60" />
   <img src="assets/minimax-text.svg" alt="Minimax" width="60" height="60" />
-
+  <img src="assets/siliconcloud-color.svg" alt="SiliconFlow" width="15" height="60" />
+  <img src="assets/siliconcloud-text.svg" alt="SiliconFlow" width="100" height="60" />
 </div>
 
 ##  1. Introduction
@@ -45,6 +46,7 @@
 - **( :tada: NEW)自动多平台循环直播推流**：该工具已经开源 [looplive](https://github.com/timerring/looplive) 是一个 7 x 24 小时全自动**循环多平台同时推流**直播工具。
 - **( :tada: NEW)自动生成风格变换的视频封面**：采用图生图多模态模型，自动获取视频截图并上传风格变换后的视频封面。
   - `Minimax image-01`
+  - `Kwai Kolors`
 
 项目架构流程如下：
 
@@ -195,6 +197,12 @@ MLLM 模型主要用于自动切片后的切片标题生成，此功能默认关
 
 在项目的自动切片功能需要使用到 Minimax 模型，请自行[注册账号](https://www.minimax.chat/)并申请 API Key，填写到 `bilive.toml` 文件中对应的 `MINIMAX_API_KEY` 中。
 
+##### 3.2.5 Kwai Kolors 模型
+
+> 如需使用 Kwai Kolors 模型，请将 `bilive.toml` 文件中 `generate_cover` 参数设置为 `true`，并将 `IMAGE_GEN_MODEL` 参数设置为 `siliconflow`，采用 siliconflow 部署的 Kolors 模型。
+
+请自行[注册账号](https://cloud.siliconflow.cn/i/3Szr5BVg)并申请 API Key，填写到 `bilive.toml` 文件中对应的 `SILICONFLOW_API_KEY` 中。
+
 #### 4. bilitool 登录
 
 > 由于一般日志打印不出二维码效果（docker 的日志不确定是否能打印，等发布新image时再修改，docker 版本请先参考文档 [bilive](https://bilive.timerring.com)，本 README 只针对源码部署），所以这步需要提前在机器上安装 [bilitool](https://github.com/timerring/bilitool):
diff --git a/assets/siliconcloud-color.svg b/assets/siliconcloud-color.svg
@@ -0,0 +1 @@
+<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>SiliconCloud</title><path clip-rule="evenodd" d="M20.663 0h-1.741c-5.575 0-8.788 3.56-8.788 9.018v.937a7.161 7.161 0 105.043 5.451h5.486a2.623 2.623 0 100-5.246h-5.458V8.787c0-2.09 1.51-3.6 3.717-3.6h1.741a2.594 2.594 0 000-5.187zM10.29 16.839a2.13 2.13 0 10-4.258-.094 2.13 2.13 0 004.258.094z" fill="#7C3AED" fill-rule="evenodd"></path></svg>
diff --git a/assets/siliconcloud-text.svg b/assets/siliconcloud-text.svg
@@ -0,0 +1 @@
+<svg fill="currentColor" fill-rule="evenodd" height="1em" style="flex:none;line-height:1" viewBox="0 0 230 24" xmlns="http://www.w3.org/2000/svg"><title>SiliconCloud</title><path clip-rule="evenodd" d="M25.504 22V2h3.481v20h-3.481zM35 2v15.668H52.49V22H33.806c-.623 0-1.16-.274-1.611-.82-.45-.547-.675-1.215-.675-2.003V2h3.481zM55.91 22V2h3.48v20h-3.48zm69.727-4.332V2h3.325v20h-3.325L111.373 6.444V22h-3.326V3.556c0-.393.104-.732.312-1.017.207-.285.467-.427.78-.427h2.234l14.266 15.555h-.002zM159.319 2v15.668h17.487V22h-18.682c-.624 0-1.16-.274-1.61-.82-.45-.547-.675-1.215-.675-2.003V2h3.48zm67.061 17.637c1.247-1.575 1.87-3.457 1.87-5.645V2h-3.429v11.992c0 1.007-.286 1.86-.857 2.56s-1.255 1.05-2.052 1.05h-4.727V2h-3.325v15.602h-4.78c-.796 0-1.473-.35-2.026-1.05-.554-.7-.832-1.553-.832-2.56V2h-3.428v11.992c0 2.188.615 4.07 1.843 5.645 1.23 1.575 2.712 2.363 4.443 2.363h12.832c1.732 0 3.222-.788 4.468-2.363zm-156.556-1.97h13.124V22H69.824c-1.42 0-2.736-.449-3.949-1.345-1.213-.898-2.172-2.1-2.883-3.611-.71-1.509-1.064-3.162-1.064-4.955 0-1.838.355-3.513 1.065-5.022.71-1.51 1.67-2.712 2.882-3.61 1.213-.896 2.529-1.345 3.95-1.345h13.123v4.332H69.824c-1.213 0-2.25.547-3.117 1.641-.866 1.094-1.3 2.429-1.3 4.004 0 1.531.434 2.843 1.3 3.937.866 1.095 1.904 1.642 3.117 1.642zm33.843 2.003c-1.23 1.554-2.727 2.33-4.493 2.33H90.88c-1.731 0-3.211-.776-4.441-2.33s-1.845-3.425-1.845-5.613V10.12c0-2.187.615-4.07 1.845-5.645 1.23-1.575 2.71-2.362 4.441-2.362h8.293c1.766 0 3.263.787 4.493 2.362 1.23 1.575 1.845 3.456 1.845 5.645v3.938c0 2.187-.615 4.058-1.845 5.612zm48.748-17.558v4.332h-17.566v3.412h17.566v4.333h-17.486V22h-3.43V11.666h-.009V2.625h.009v-.513h20.916zm45.999 17.558c-1.229 1.554-2.728 2.33-4.494 2.33h-8.292c-1.732 0-3.212-.776-4.442-2.33s-1.844-3.425-1.844-5.613V10.12c0-2.187.614-4.07 1.844-5.645 1.23-1.575 2.71-2.362 4.442-2.362h8.292c1.766 0 3.265.787 4.494 2.362 1.23 1.575 1.845 3.456 1.845 5.645v3.938c0 2.187-.615 4.058-1.845 5.612zm-176.848.557C20.631 21.41 19.505 22 18.19 22H2v-4.332H18.19c.381 0 .701-.163.962-.492.259-.328.388-.733.388-1.215 0-.481-.129-.886-.388-1.213-.261-.33-.581-.493-.962-.493H6.78c-1.316 0-2.441-.591-3.378-1.773C2.467 11.303 2 9.88 2 8.216c0-1.662.467-3.085 1.402-4.266.936-1.181 2.062-1.772 3.378-1.772h15.305V6.51H6.78c-.381 0-.701.164-.961.493-.26.327-.39.732-.39 1.213 0 .482.13.886.39 1.214.26.329.58.492.961.492h11.409c1.316 0 2.442.592 3.377 1.773.935 1.181 1.403 2.604 1.403 4.266 0 1.663-.468 3.085-1.403 4.266zm79.66-12.667c-.572-.7-1.256-1.05-2.052-1.05H90.88c-.796 0-1.471.35-2.025 1.05-.555.7-.832 1.554-.832 2.56v3.938c.001 1.006.277 1.86.832 2.56.554.7 1.229 1.05 2.025 1.05h8.293c.796 0 1.481-.35 2.052-1.05.571-.7.858-1.553.858-2.56V10.12c0-1.006-.287-1.86-.858-2.56zm94.747 0c-.572-.7-1.257-1.05-2.053-1.05h-8.292c-.797 0-1.472.35-2.025 1.05-.555.7-.832 1.554-.832 2.56v3.938c0 1.006.277 1.86.832 2.56.553.7 1.228 1.05 2.025 1.05h8.292c.796 0 1.481-.35 2.053-1.05.571-.7.857-1.553.857-2.56V10.12c0-1.006-.286-1.86-.857-2.56z"></path></svg>
diff --git a/bilive.toml b/bilive.toml
@@ -37,5 +37,6 @@ qwen_api_key = "" # Apply for your own Qwen API key at https://bailian.console.a
 
 [cover]
 generate_cover = false # whether to generate cover
-image_gen_model = "minimax" # the image generation model, can be "minimax"
+image_gen_model = "minimax" # the image generation model, can be "minimax" or "siliconflow"
 minimax_api_key = "" # Apply for your own Minimax API key at https://platform.minimaxi.com/user-center/basic-information/interface-key
+siliconflow_api_key = "" # Apply for your own SiliconFlow API key at https://cloud.siliconflow.cn/i/3Szr5BVg
diff --git a/src/config.py b/src/config.py
@@ -75,3 +75,4 @@ def get_interface_config():
 GENERATE_COVER = config.get('cover', {}).get('generate_cover')
 IMAGE_GEN_MODEL = config.get('cover', {}).get('image_gen_model')
 MINIMAX_API_KEY = config.get('cover', {}).get('minimax_api_key')
+SILICONFLOW_API_KEY = config.get('cover', {}).get('siliconflow_api_key')
diff --git a/src/cover/cover_generator.py b/src/cover/cover_generator.py
@@ -3,6 +3,7 @@
 from src.config import IMAGE_GEN_MODEL
 import subprocess
 
+
 def cut_cover_use_ffmpeg(video_path):
     """Cut cover use ffmpeg
     Args:
@@ -13,10 +14,20 @@ def cut_cover_use_ffmpeg(video_path):
     upload_log.info("begin to generate cover")
     cover_path = video_path[:-4] + ".jpg"
     ffmpeg_command = [
-        'ffmpeg', '-y', '-i', video_path, '-t', '1', '-r', '1', cover_path
+        "ffmpeg",
+        "-y",
+        "-i",
+        video_path,
+        "-t",
+        "1",
+        "-r",
+        "1",
+        cover_path,
     ]
     try:
-        result = subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
+        result = subprocess.run(
+            ffmpeg_command, check=True, capture_output=True, text=True
+        )
         upload_log.debug(f"FFmpeg output: {result.stdout}")
         if result.stderr:
             upload_log.debug(f"FFmpeg debug: {result.stderr}")
@@ -33,6 +44,7 @@ def cover_generator(model_type):
     Returns:
         function: wrapped title generation function
     """
+
     def decorator(func):
         def wrapper(video_path):
             cover_path = cut_cover_use_ffmpeg(video_path)
@@ -41,13 +53,21 @@ def wrapper(video_path):
                 return None
             if model_type == "minimax":
                 from .image_model_sdk.minimax_sdk import minimax_generate_cover
+
                 return minimax_generate_cover(cover_path)
+            elif model_type == "siliconflow":
+                from .image_model_sdk.kolors_sdk import kolors_generate_cover
+
+                return kolors_generate_cover(cover_path)
             else:
                 upload_log.error(f"Unsupported model type: {model_type}")
                 return None
+
         return wrapper
+
     return decorator
 
+
 @cover_generator(IMAGE_GEN_MODEL)
 def generate_cover(video_path):
     """Generate cover for video
@@ -56,4 +76,4 @@ def generate_cover(video_path):
     Returns:
         str: generated cover
     """
-    pass  # The actual implementation is handled by the decorator
+    pass  # The actual implementation is handled by the decorator
diff --git a/src/cover/image_model_sdk/kolors_sdk.py b/src/cover/image_model_sdk/kolors_sdk.py
@@ -0,0 +1,49 @@
+import requests
+import base64
+import time
+import os
+from src.config import SILICONFLOW_API_KEY
+
+
+def kolors_generate_cover(your_file_path):
+    """Generater cover image using SiliconFlow api of Kolors(Kwai)
+    Args:
+        your_file_path: str, path to the image file
+    Returns:
+        str, local download path of the generated cover image file
+    """
+    with open(your_file_path, "rb") as image_file:
+        data = base64.b64encode(image_file.read()).decode("utf-8")
+
+    payload = {
+        "model": "Kwai-Kolors/Kolors",
+        "prompt": "这是一个视频截图，请尝试生成对应的日本动漫类型的封面",
+        "image_size": "1024x1024",
+        "batch_size": 1,
+        "num_inference_steps": 20,
+        "guidance_scale": 7.5,
+        "image": f"data:image/webp;base64,{data}",
+    }
+    headers = {
+        "Authorization": f"Bearer {SILICONFLOW_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    url = "https://api.siliconflow.cn/v1/images/generations"
+    response = requests.request("POST", url, json=payload, headers=headers)
+    if response.status_code == 200:
+        image_url = response.json()["images"][0]["url"]
+        img_data = requests.get(image_url).content
+        cover_name = time.strftime("%Y%m%d%H%M%S") + ".png"
+        temp_cover_path = os.path.join(os.path.dirname(your_file_path), cover_name)
+        with open(temp_cover_path, "wb") as handler:
+            handler.write(img_data)
+        os.remove(your_file_path)
+        return temp_cover_path
+    else:
+        print(response.text, flush=True)
+        return None
+
+
+if __name__ == "__main__":
+    your_file_path = ""
+    print(kolors_generate_cover(your_file_path))
diff --git a/src/cover/image_model_sdk/minimax_sdk.py b/src/cover/image_model_sdk/minimax_sdk.py
@@ -13,41 +13,41 @@ def minimax_generate_cover(your_file_path):
     Returns:
         str, local download path of the generated cover image file
     """
-    cover_name = time.strftime("%Y%m%d%H%M%S") + ".png"
-    temp_cover_path = os.path.join(os.path.dirname(your_file_path), cover_name)
 
     with open(your_file_path, "rb") as image_file:
-        data = base64.b64encode(image_file.read()).decode('utf-8')
+        data = base64.b64encode(image_file.read()).decode("utf-8")
 
-    payload = json.dumps({
-        "model": "image-01",
-        "prompt": "这是一个视频截图，请生成其对应的吉普力风格的图片",
-        "subject_reference": [
-            {
-                "type": "character",
-                "image_file": f"data:image/jpeg;base64,{data}"
-            }
-        ],
-        "n": 2
-    })
+    payload = json.dumps(
+        {
+            "model": "image-01",
+            "prompt": "这是一个视频截图，请生成其对应的吉普力风格的图片",
+            "subject_reference": [
+                {"type": "character", "image_file": f"data:image/jpeg;base64,{data}"}
+            ],
+            "n": 2,
+        }
+    )
     headers = {
-        'Authorization': f'Bearer {MINIMAX_API_KEY}',
-        'Content-Type': 'application/json'
+        "Authorization": f"Bearer {MINIMAX_API_KEY}",
+        "Content-Type": "application/json",
     }
 
     url = "https://api.minimax.chat/v1/image_generation"
     response = requests.request("POST", url, headers=headers, data=payload).json()
-    if response['base_resp']['status_code'] == 0:
-        image_url = response['data']['image_urls'][0]
+    if response["base_resp"]["status_code"] == 0:
+        image_url = response["data"]["image_urls"][0]
         img_data = requests.get(image_url).content
-        with open(temp_cover_path, 'wb') as handler:
+        cover_name = time.strftime("%Y%m%d%H%M%S") + ".png"
+        temp_cover_path = os.path.join(os.path.dirname(your_file_path), cover_name)
+        with open(temp_cover_path, "wb") as handler:
             handler.write(img_data)
         os.remove(your_file_path)
         return temp_cover_path
     else:
-        print(response['base_resp']['error_msg'])
+        print(response["base_resp"]["error_msg"], flush=True)
         return None
 
+
 if __name__ == "__main__":
     your_file_path = ""
-    print(minimax_generate_cover(your_file_path))
+    print(minimax_generate_cover(your_file_path))