feat: stable diffusion (#265)

timerring · web-flow · commit 944c53cba2ac · 2025-04-06T16:05:13.000+08:00
* feat: stable diffusion
* docs: update docs
diff --git a/README.md b/README.md
@@ -22,6 +22,7 @@
   <img src="assets/siliconcloud-color.svg" alt="SiliconFlow" width="15" height="60" />
   <img src="assets/siliconcloud-text.svg" alt="SiliconFlow" width="100" height="60" />
   <img src="assets/wenxin-color.svg" alt="Baidu ERNIE" width="60" height="60" />
+  <img src="assets/stability-brand-color.svg" alt="Stability AI" width="80" height="60" />
 </div>
 
 ##  1. Introduction
@@ -50,7 +51,8 @@
   - `Minimax image-01`
   - `Kwai Kolors`
   - `Tencent Hunyuan`
-  - `Baidu ERNIE`
+  - `Baidu ERNIE irag-1.0`
+  - `Stable Diffusion 3.5 large turbo`
 
 项目架构流程如下：
 
@@ -223,6 +225,12 @@ MLLM 模型主要用于自动切片后的切片标题生成，此功能默认关
 
 请自行[注册账号](https://console.bce.baidu.com/iam/key/list)并申请 API Key，填写到 `bilive.toml` 文件中对应的 `BAIDU_API_KEY` 中。
 
+##### 3.3.5 Stability SD 3.5 large turbo 模型
+
+> 如需使用 Stability SD 3.5 large turbo  模型，请将 `IMAGE_GEN_MODEL` 参数设置为 `stability`。
+
+请自行[注册账号](https://platform.stability.ai/account/keys)并申请 API Key，填写到 `bilive.toml` 文件中对应的 `STABILITY_API_KEY` 中。
+
 #### 4. bilitool 登录
 
 > 由于一般日志打印不出二维码效果（docker 的日志不确定是否能打印，等发布新image时再修改，docker 版本请先参考文档 [bilive](https://bilive.timerring.com)，本 README 只针对源码部署），所以这步需要提前在机器上安装 [bilitool](https://github.com/timerring/bilitool):
diff --git a/assets/stability-brand-color.svg b/assets/stability-brand-color.svg
@@ -0,0 +1 @@
+<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 128 24" xmlns="http://www.w3.org/2000/svg"><title>Stability</title><defs><linearGradient id="lobe-icons-stability-brand-fill" x1="50%" x2="50%" y1="0%" y2="100%"><stop offset="0%" stop-color="#9D39FF"></stop><stop offset="100%" stop-color="#A380FF"></stop></linearGradient></defs><g fill="none" fill-rule="nonzero"><path d="M6.96 19.315c3.195 0 5.247-1.607 5.247-4.148 0-1.9-1.183-3.244-3.294-3.803l-1.615-.396-.2-.046c-1.138-.272-1.696-.573-1.696-1.431 0-.752.598-1.167 1.61-1.167 3.22 0 4.388 1.167 4.388 1.167V6.964l-.042-.041c-.233-.216-1.47-1.217-4.371-1.217C3.922 5.706 2 7.261 2 9.698c0 1.935 1.253 3.27 3.512 3.832l1.423.341c1.194.286 1.792.597 1.792 1.504 0 .83-.65 1.296-1.792 1.296C3.643 16.67 2 14.915 2 14.915v3.031l.042.045c.238.235 1.52 1.324 4.919 1.324zm15.723-.155v-2.818l-.822.011c-.217 0-.475 0-.774-.005l-.345-.006c-1.41-.024-1.9-.688-1.9-2.244V8.804h3.534v-2.84h-3.534v-2.83h-3.316v2.83h-1.872v2.84h1.872v5.516c0 3.26 1.584 4.84 4.825 4.84h2.332zm62.67 0v-2.818l-.821.011c-.217 0-.476 0-.775-.005l-.344-.006c-1.41-.024-1.9-.688-1.9-2.244V8.804h3.534v-2.84h-3.535v-2.83h-3.315v2.83h-1.872v2.84h1.872v5.516c0 3.26 1.583 4.84 4.824 4.84h2.332zM34.445 5.988V7.91c-.873-1.41-2.494-2.204-4.342-2.204-3.672 0-6.214 2.768-6.214 6.74 0 3.972 2.516 6.714 6.137 6.714 1.874 0 3.52-.794 4.419-2.204v1.922h3.104V5.988h-3.104zm-3.793 10.435c-2.087 0-3.478-1.61-3.478-3.858 0-2.22 1.418-3.857 3.478-3.857 2.087 0 3.612 1.637 3.612 3.857 0 2.248-1.552 3.858-3.612 3.858zm17.67-10.717c-2.078 0-3.321 1.256-4.187 2.564V.074H40.82V18.89h3.216v-1.877c.841 1.358 2.48 2.073 4.286 2.073 3.464 0 5.988-2.717 5.988-6.52 0-3.778-2.202-6.859-5.988-6.859zM47.605 16.2c-2.122 0-3.599-1.609-3.599-3.78 0-2.144 1.612-3.672 3.68-3.672 2.095 0 3.438 1.5 3.438 3.672 0 2.171-1.478 3.78-3.519 3.78zM58.85 3.709c1.031 0 1.799-.745 1.799-1.752 0-1.03-.746-1.752-1.799-1.752-1.03 0-1.777.723-1.777 1.752s.746 1.751 1.777 1.751zM57.203 19.09h3.316V6.093h-3.316v12.998zM72.41 3.708c1.03 0 1.798-.744 1.798-1.751 0-1.03-.745-1.752-1.798-1.752-1.031 0-1.777.723-1.777 1.752s.746 1.751 1.777 1.751zm-1.647 15.383h3.315V6.093h-3.315v12.998zm-6.818-.156h3.417V0h-3.417v18.935zM88.811 24h3.669l7.047-17.98H96.09l-3.08 8.765-3.09-8.765h-3.81L91.22 18.57 88.811 24zm27.214-18.012V7.91c-.873-1.41-2.493-2.204-4.342-2.204-3.672 0-6.214 2.768-6.214 6.74 0 3.972 2.517 6.714 6.137 6.714 1.874 0 3.52-.794 4.419-2.204v1.922h3.104V5.988h-3.104zm-3.793 10.435c-2.087 0-3.478-1.61-3.478-3.858 0-2.22 1.418-3.857 3.478-3.857 2.087 0 3.612 1.637 3.612 3.857 0 2.248-1.551 3.858-3.612 3.858zm11.97-12.715c1.03 0 1.798-.744 1.798-1.751 0-1.03-.746-1.752-1.799-1.752-1.03 0-1.776.723-1.776 1.752s.745 1.751 1.776 1.751zm-1.647 15.383h3.315V6.093h-3.315v12.998z" fill="url(#lobe-icons-stability-brand-fill)"></path><path d="M101.485 19.258c1.136 0 1.982-.82 1.982-1.93 0-1.134-.822-1.93-1.982-1.93-1.137 0-1.958.796-1.958 1.93s.821 1.93 1.958 1.93z" fill="#E80000"></path></g></svg>
diff --git a/bilive.toml b/bilive.toml
@@ -37,9 +37,10 @@ qwen_api_key = "" # Apply for your own Qwen API key at https://bailian.console.a
 
 [cover]
 generate_cover = false # whether to generate cover
-image_gen_model = "minimax" # the image generation model, can be "minimax" or "siliconflow" or "tencent" or "baidu"
+image_gen_model = "minimax" # the image generation model, can be "minimax" or "siliconflow" or "tencent" or "baidu" or "stability"
 minimax_api_key = "" # Apply for your own Minimax API key at https://platform.minimaxi.com/user-center/basic-information/interface-key
 siliconflow_api_key = "" # Apply for your own SiliconFlow API key at https://cloud.siliconflow.cn/i/3Szr5BVg
 tencent_secret_id = "" # Apply for your own Tencent Cloud API key at https://console.cloud.tencent.com/cam/capi
 tencent_secret_key = "" # Apply for your own Tencent Cloud secret key as above
 baidu_api_key = "" # Apply for your own Baidu API key at https://console.bce.baidu.com/iam/key/list
+stability_api_key = "" # Apply for your own Stability API key at https://platform.stability.ai/account/keys
diff --git a/src/config.py b/src/config.py
@@ -78,4 +78,5 @@ def get_interface_config():
 SILICONFLOW_API_KEY = config.get('cover', {}).get('siliconflow_api_key')
 TENCENT_SECRET_ID = config.get('cover', {}).get('tencent_secret_id')
 TENCENT_SECRET_KEY = config.get('cover', {}).get('tencent_secret_key')
-BAIDU_API_KEY = config.get('cover', {}).get('baidu_api_key')
+BAIDU_API_KEY = config.get('cover', {}).get('baidu_api_key')
+STABILITY_API_KEY = config.get('cover', {}).get('stability_api_key')
diff --git a/src/cover/cover_generator.py b/src/cover/cover_generator.py
@@ -67,6 +67,12 @@ def wrapper(video_path):
                 from .image_model_sdk.baidu_sdk import baidu_generate_cover
 
                 return baidu_generate_cover(cover_path)
+            elif model_type == "stability":
+                from .image_model_sdk.stability_sdk import (
+                    stable_diffusion_generate_cover,
+                )
+
+                return stable_diffusion_generate_cover(cover_path)
             else:
                 upload_log.error(f"Unsupported model type: {model_type}")
                 return None
diff --git a/src/cover/image_model_sdk/baidu_sdk.py b/src/cover/image_model_sdk/baidu_sdk.py
@@ -8,6 +8,7 @@
 from src.upload.bilitool.bilitool.model.model import Model
 from src.config import BAIDU_API_KEY
 
+
 def cover_up(img: str):
     """Upload the cover image
     Parameters
@@ -20,14 +21,15 @@ def cover_up(img: str):
     """
     from PIL import Image
     from io import BytesIO
+
     request = requests.Session()
     request.headers = {
-        'user-agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/63.0.3239.108",
-        'referer': "https://www.bilibili.com/",
-        'connection': 'keep-alive'
+        "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/63.0.3239.108",
+        "referer": "https://www.bilibili.com/",
+        "connection": "keep-alive",
     }
     model = Model().get_config()
-    request.cookies.set('SESSDATA', model['cookies']['SESSDATA'])
+    request.cookies.set("SESSDATA", model["cookies"]["SESSDATA"])
     with Image.open(img) as im:
         # you should keep the image ratio 16:10
         xsize, ysize = im.size
@@ -44,7 +46,7 @@ def cover_up(img: str):
         data={
             "cover": b"data:image/jpeg;base64,"
             + (base64.b64encode(buffered.getvalue())),
-            "csrf": model['cookies']['bili_jct']
+            "csrf": model["cookies"]["bili_jct"],
         },
         timeout=30,
     )
@@ -55,24 +57,33 @@ def cover_up(img: str):
     print(res["data"]["url"], flush=True)
     return res["data"]["url"]
 
+
 def baidu_generate_cover(your_file_path):
+    """Generater cover image using baidu api
+    Args:
+        your_file_path: str, path to the image file
+    Returns:
+        str, local download path of the generated cover image file
+    """
     try:
         cover_url = cover_up(your_file_path)
 
         url = "https://qianfan.baidubce.com/v2/images/generations"
-        payload = json.dumps({
-        "model": "irag-1.0",
-        "prompt": "这是视频截图，请根据该图生成对应的动漫类型的封面",
-        "refer_image": cover_url
-    })
+        payload = json.dumps(
+            {
+                "model": "irag-1.0",
+                "prompt": "这是视频截图，请根据该图生成对应的动漫类型的封面",
+                "refer_image": cover_url,
+            }
+        )
         headers = {
-            'Content-Type': 'application/json',
-            'Authorization': f'Bearer {BAIDU_API_KEY}'
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {BAIDU_API_KEY}",
         }
-        
+
         response = requests.request("POST", url, headers=headers, data=payload)
         if response.status_code == 200:
-            image_url=response.json()['data'][0]['url']
+            image_url = response.json()["data"][0]["url"]
             img_data = requests.get(image_url).content
             cover_name = time.strftime("%Y%m%d%H%M%S") + ".png"
             temp_cover_path = os.path.join(os.path.dirname(your_file_path), cover_name)
@@ -87,5 +98,6 @@ def baidu_generate_cover(your_file_path):
         print(e, flush=True)
         return None
 
-if __name__ == '__main__':
-    print(baidu_generate_cover(""))
+
+if __name__ == "__main__":
+    print(baidu_generate_cover(""))
diff --git a/src/cover/image_model_sdk/stability_sdk.py b/src/cover/image_model_sdk/stability_sdk.py
@@ -0,0 +1,46 @@
+import requests
+from src.config import STABILITY_API_KEY
+import time
+import os
+
+
+def stable_diffusion_generate_cover(your_file_path):
+    """Generater cover image using stability api
+    Args:
+        image_path: str, path to the image file
+    Returns:
+        str, local download path of the generated cover image file
+    """
+
+    cover_name = time.strftime("%Y%m%d%H%M%S") + ".jpeg"
+    temp_cover_path = os.path.join(os.path.dirname(your_file_path), cover_name)
+
+    with open(your_file_path, "rb") as img_file:
+        response = requests.post(
+            f"https://api.stability.ai/v2beta/stable-image/generate/sd3",
+            headers={
+                "authorization": f"Bearer {STABILITY_API_KEY}",
+                "accept": "image/*",
+            },
+            files={"image": ("image.jpg", img_file, "image/jpeg")},
+            data={
+                "prompt": "This is a video screenshot, please generate a cover in the style of a manga",  # English only
+                "strength": 0.75,
+                "output_format": "jpeg",
+                "mode": "image-to-image",
+                "model": "sd3.5-large-turbo",
+            },
+        )
+
+    if response.status_code == 200:
+        with open(temp_cover_path, "wb") as file:
+            file.write(response.content)
+        os.remove(your_file_path)
+        return temp_cover_path
+    else:
+        raise Exception(str(response.json()))
+        return None
+
+
+if __name__ == "__main__":
+    print(stable_diffusion_generate_cover(""))