feat: ideogram v2 (#267)

timerring · web-flow · commit e9876f915b4c · 2025-04-06T17:53:20.000+08:00
* feat: ideogram v2
* docs: update icon
diff --git a/README.md b/README.md
@@ -25,6 +25,7 @@
   <img src="assets/stability-brand-color.svg" alt="Stability AI" width="80" height="60" />
   <img src="assets/luma-color.svg" alt="Luma Photon" width="20" height="60" />
   <img src="assets/luma-text.svg" alt="Luma Photon" width="60" height="60" />
+  <img src="assets/ideogram.svg" alt="Ideogram V_2" width="50" height="60" />
 </div>
 
 ##  1. Introduction
@@ -56,6 +57,9 @@
   - `Baidu ERNIE irag-1.0`
   - `Stable Diffusion 3.5 large turbo`
   - `Luma Photon`
+  - `Ideogram V_2`
+
+
 项目架构流程如下：
 
 ```mermaid
@@ -239,6 +243,12 @@ MLLM 模型主要用于自动切片后的切片标题生成，此功能默认关
 
 请自行[注册账号](https://lumalabs.ai/api/keys)并申请 API Key，填写到 `bilive.toml` 文件中对应的 `LUMA_API_KEY` 中。
 
+##### 3.3.7 Ideogram V_2 模型
+
+> 如需使用 Ideogram V_2 模型，请将 `IMAGE_GEN_MODEL` 参数设置为 `ideogram`。
+
+请自行[注册账号](https://ideogram.ai/manage-api)并申请 API Key，填写到 `bilive.toml` 文件中对应的 `IDEOGRAM_API_KEY` 中。
+
 #### 4. bilitool 登录
 
 > 由于一般日志打印不出二维码效果（docker 的日志不确定是否能打印，等发布新image时再修改，docker 版本请先参考文档 [bilive](https://bilive.timerring.com)，本 README 只针对源码部署），所以这步需要提前在机器上安装 [bilitool](https://github.com/timerring/bilitool):
diff --git a/assets/ideogram.svg b/assets/ideogram.svg
@@ -0,0 +1 @@
+<svg fill="currentColor" fill-rule="evenodd" height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Ideogram</title><path d="M17.768 1.482a6.4 6.4 0 012.988 3.78c.19.068.377.154.555.257.026.015.05.03.073.047.226.137.436.301.627.49a3.377 3.377 0 01-.105 4.875 3.376 3.376 0 01-.216 5.158l-.037.029.021.032c.39.602.585 1.324.534 2.065l-.009.11a3.377 3.377 0 01-4.658 2.777 3.377 3.377 0 01-5.846.971 4.052 4.052 0 01-1.635.397c-.04.005-.082.008-.125.008h-.897l-.068-.003c-1.116-.067-1.093-1.775.068-1.775h.804l.046-.004.047-.001a2.3 2.3 0 00.079-4.6.735.735 0 01-.08.005H2.808a.889.889 0 010-1.778h7.034l.046-.004.048-.001a2.3 2.3 0 00.509-4.544l-.141-.031a2.188 2.188 0 00-.299-.046.759.759 0 01-.07.003H2.808a.889.889 0 110-1.777h7.035l.046-.004.047-.001a2.3 2.3 0 00.087-4.6.972.972 0 01-.087.004h-.897L8.97 3.32c-1.116-.067-1.093-1.775.068-1.775h.8a.9.9 0 01.097-.005c.402 0 .79.058 1.157.166l.031-.02a6.399 6.399 0 016.645-.203zm-3.737 11.484l-.068.036-.083.051a4.08 4.08 0 01-1.404 2.153 4.07 4.07 0 011.537 3.189 4.06 4.06 0 01-.92 2.579 1.599 1.599 0 002.824-1.322l-.03-.124-.013-.079-.005-.056-.001-.042.001-.058.005-.053.013-.078.018-.072.022-.064.035-.079.005-.01a.869.869 0 01.158-.217l.055-.052.041-.032.05-.035.042-.026.055-.03.073-.03.056-.02.04-.011.078-.016.073-.009.045-.002h.048l.041.002.058.006.056.01.062.015.031.01.049.017.054.023.044.021.053.031.035.023c.06.042.114.09.163.147l.038.046.034.048a1.6 1.6 0 102.4-2.07 3.256 3.256 0 01-.473.007l-.057-.005-.026-.005h-.016a3.376 3.376 0 01-2.985-2.568l-.026-.116a1.6 1.6 0 00-2.285-1.133zm-4.416 4.595a.889.889 0 110 1.777H5.939a.889.889 0 010-1.777h3.676zm9.208-10.503a1.6 1.6 0 00-.797 1.3l-.002.085a.889.889 0 01-.83.887l-.059.002a.889.889 0 01-.887-.83l-.002-.059a1.6 1.6 0 00-2.528-1.301 4.089 4.089 0 01-1.255 1.674 4.083 4.083 0 011.451 2.308 3.374 3.374 0 014.125 2.532l.02.093a1.6 1.6 0 001.395 1.26l.124.01.042.002c.06 0 .12-.003.18-.01l.085-.011.062-.015a1.6 1.6 0 00-.245-3.163l-.079-.002a.887.887 0 01-.683-.32l-.034-.044a.884.884 0 01-.154-.702.889.889 0 01.807-.71l.064-.001a1.6 1.6 0 001.134-2.728 1.6 1.6 0 00-.786-.426l-.102-.019a.856.856 0 01-.076-.016 1.592 1.592 0 00-.97.204zm-9.199 4.08a.889.889 0 010 1.778H.89a.889.889 0 010-1.777h8.735zm-.01-6.432a.889.889 0 110 1.777H5.94a.889.889 0 110-1.777h3.676zm3.297-1.954l-.053.021a4.065 4.065 0 011.126 2.36 3.375 3.375 0 013.151 1.027 3.366 3.366 0 011.695-1 4.621 4.621 0 00-5.919-2.408z"></path></svg>
diff --git a/bilive.toml b/bilive.toml
@@ -37,11 +37,12 @@ qwen_api_key = "" # Apply for your own Qwen API key at https://bailian.console.a
 
 [cover]
 generate_cover = false # whether to generate cover
-image_gen_model = "minimax" # the image generation model, can be "minimax" or "siliconflow" or "tencent" or "baidu" or "stability" or "luma"
+image_gen_model = "minimax" # the image generation model, can be "minimax" or "siliconflow" or "tencent" or "baidu" or "stability" or "luma" or "ideogram"
 minimax_api_key = "" # Apply for your own Minimax API key at https://platform.minimaxi.com/user-center/basic-information/interface-key
 siliconflow_api_key = "" # Apply for your own SiliconFlow API key at https://cloud.siliconflow.cn/i/3Szr5BVg
 tencent_secret_id = "" # Apply for your own Tencent Cloud API key at https://console.cloud.tencent.com/cam/capi
 tencent_secret_key = "" # Apply for your own Tencent Cloud secret key as above
 baidu_api_key = "" # Apply for your own Baidu API key at https://console.bce.baidu.com/iam/key/list
 stability_api_key = "" # Apply for your own Stability API key at https://platform.stability.ai/account/keys
 luma_api_key = "" # Apply for your own Luma API key at https://lumalabs.ai/api/keys
+ideogram_api_key = "" # Apply for your own Ideogram API key at https://ideogram.ai/manage-api
diff --git a/src/config.py b/src/config.py
@@ -80,4 +80,5 @@ def get_interface_config():
 TENCENT_SECRET_KEY = config.get('cover', {}).get('tencent_secret_key')
 BAIDU_API_KEY = config.get('cover', {}).get('baidu_api_key')
 STABILITY_API_KEY = config.get('cover', {}).get('stability_api_key')
-LUMA_API_KEY = config.get('cover', {}).get('luma_api_key')
+LUMA_API_KEY = config.get('cover', {}).get('luma_api_key')
+IDEOGRAM_API_KEY = config.get('cover', {}).get('ideogram_api_key')
diff --git a/src/cover/cover_generator.py b/src/cover/cover_generator.py
@@ -77,6 +77,10 @@ def wrapper(video_path):
                 from .image_model_sdk.luma_sdk import luma_generate_cover
 
                 return luma_generate_cover(cover_path)
+            elif model_type == "ideogram":
+                from .image_model_sdk.ideogram_sdk import ideogram_generate_cover
+
+                return ideogram_generate_cover(cover_path)
             else:
                 upload_log.error(f"Unsupported model type: {model_type}")
                 return None
diff --git a/src/cover/image_model_sdk/ideogram_sdk.py b/src/cover/image_model_sdk/ideogram_sdk.py
@@ -0,0 +1,51 @@
+import requests
+import json
+import os
+import time
+from src.config import IDEOGRAM_API_KEY
+
+
+def ideogram_generate_cover(your_file_path):
+    """Generater cover image using ideogram V_2 model
+    Args:
+        your_file_path: str, path to the image file
+    Returns:
+        str, local download path of the generated cover image file
+    """
+    try:
+        url = "https://api.ideogram.ai/remix"
+
+        files = {"image_file": open(your_file_path, "rb")}
+        payload = {
+            "image_request": json.dumps(
+                {
+                    "prompt": "This is a video screenshot, please generate a cover in the style of a manga",
+                    "aspect_ratio": "ASPECT_10_16",
+                    "image_weight": 75,
+                    "magic_prompt_option": "ON",
+                    "model": "V_2",
+                }
+            )
+        }
+        headers = {"Api-Key": f"{IDEOGRAM_API_KEY}"}
+
+        response = requests.post(url, data=payload, files=files, headers=headers)
+        if response.status_code == 200:
+            response_json = response.json()
+            image_url = response_json["data"][0]["url"]
+            img_data = requests.get(image_url).content
+            cover_name = time.strftime("%Y%m%d%H%M%S") + ".png"
+            temp_cover_path = os.path.join(os.path.dirname(your_file_path), cover_name)
+            with open(temp_cover_path, "wb") as handler:
+                handler.write(img_data)
+            os.remove(your_file_path)
+            return temp_cover_path
+        else:
+            raise Exception(response.text)
+    except Exception as e:
+        print(e, flush=True)
+        return None
+
+
+if __name__ == "__main__":
+    print(ideogram_generate_cover(""))