refactor: adjust settings (#258)

timerring · web-flow · commit 811471301b6f · 2025-04-04T15:02:03.000+08:00
diff --git a/requirements.txt b/requirements.txt
@@ -13,4 +13,5 @@ zhconv==1.4.3
 bilitool==0.1.2
 google-generativeai>=0.7.2 # don't change this part
 zhipuai
-openai
+openai
+toml
diff --git a/settings.toml b/settings.toml
@@ -1,19 +1,57 @@
 version = "1.0"
 webhooks = []
 
+# bilive Settings
+[model]
+model_type = "append" # Can be pipeline, append, merge
+
+# WARNING!: If you choose "deploy" local inference:
+# 1. Please check the VRAM requirements twice!
+# 2. Please make sure you have installed the Nvidia GPU driver and can check the cuda via `nvcc -V`!
+# WARNING!: If you choose "api":
+# due to the limitation of free tier, you should keep every video less than 30 minutes(around)
+# which means your MODEL_TYPE should not be "merge".
+[asr]
+asr_method = "none" # can be "deploy" or "api" or "none"
+whisper_api_key = "" # Apply for your own API key at https://console.groq.com/keys
+inference_model = "small" # If you choose "deploy", you should download the inference model from https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt
+
+[video]
+# You can change the title as you like, eg.
+# f"{artist}直播回放-{date}-{title}" - Streamer直播回放-20250328-Live title
+# f"{date}-{artist}直播回放" - 20250328-Streamer直播回放
+title = "{artist}直播回放-{date}-{title}" # Key words: {artist}, {date}, {title}, {source_link}
+description = "{artist}直播回放，直播间地址：{source_link} 内容仅供娱乐，直播中主播的言论、观点和行为均由主播本人负责，不代表录播员的观点或立场。" # Key words: {artist}, {date}, {title}, {source_link}
+gift_price_filter = 1 # The gift whose price is less than this value will be filtered, unit: RMB
+reserve_for_fixing = false # If encounter MOOV crash error, delete the video or reserve for fixing
+upload_line = "auto" # The upload line to be used, default None is auto detect(recommended), if you want to specify, it can be "bldsa", "ws", "tx", "qn", "bda2".
+
+[slice]
+auto_slice = false # General control
+slice_duration = 60 # better not exceed 300 seconds
+slice_num = 2
+slice_overlap = 30
+slice_step = 1
+min_video_size = 200 # The minimum video size to be sliced (MB)
+mllm_model = "gemini" # the multi-model LLMs, can be "gemini" or "zhipu" or "qwen"
+zhipu_api_key = "" # Apply for your own GLM-4v-Plus API key at https://www.bigmodel.cn/invite?icode=shBtZUfNE6FfdMH1R6NybGczbXFgPRGIalpycrEwJ28%3D
+gemini_api_key = "" # Apply for your own Gemini API key at https://aistudio.google.com/app/apikey
+qwen_api_key = "" # Apply for your own Qwen API key at https://bailian.console.aliyun.com/?apiKey=1
+
+# blrec Settings
 [[tasks]]
 room_id = 173551
 enable_monitor = true
 enable_recorder = true
 
 [output]
-path_template = "{roomid}/{roomid}_{year}{month}{day}-{hour}-{minute}-{second}"
+path_template = "{roomid}/{roomid}_{year}{month}{day}-{hour}-{minute}-{second}" # Don't change this
 filesize_limit = 0
 duration_limit = 1800
-out_dir = "./Videos"
+out_dir = "./Videos" # Don't change this
 
 [logging]
-log_dir = "./logs/blrec"
+log_dir = "./logs/blrec" # Don't change this
 console_log_level = "INFO"
 backup_count = 30
 
diff --git a/src/autoslice/mllm_sdk/gemini_new_sdk.py b/src/autoslice/mllm_sdk/gemini_new_sdk.py
@@ -14,14 +14,14 @@ def gemini_generate_title(video_path, artist):
         model='models/gemini-2.0-flash',
         contents=types.Content(
             parts=[
-                types.Part(text=f'视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只需要返回一个标题即可，无需返回其他内容'),
+                types.Part(text=f'视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只需要返回一个标题即可，无需返回其他内容，标题中不要有表情符号。'),
                 types.Part(
                     inline_data=types.Blob(data=video_bytes, mime_type='video/mp4')
                 )
             ]
         )
     )
     scan_log.info("使用 Gemini-2.0-flash 生成切片标题")
-    scan_log.info(f"Prompt: 视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只需要返回一个标题即可，无需返回其他内容")
+    scan_log.info(f"Prompt: 视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只需要返回一个标题即可，无需返回其他内容，标题中不要有表情符号。")
     scan_log.info(f"生成的切片标题为: {response.text}")
     return response.text
diff --git a/src/autoslice/mllm_sdk/gemini_old_sdk.py b/src/autoslice/mllm_sdk/gemini_old_sdk.py
@@ -22,7 +22,7 @@ def gemini_generate_title(video_path, artist):
         raise ValueError(video_file.state.name)
 
     # Create the prompt.
-    prompt = f"视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只返回该标题即可，无需返回其他内容"
+    prompt = f"视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只返回该标题即可，无需返回其他内容，标题中不要有表情符号。"
 
     # Set the model to Gemini Flash.
     model = genai.GenerativeModel(model_name="models/gemini-2.0-flash")
@@ -32,6 +32,6 @@ def gemini_generate_title(video_path, artist):
     # delete the video file
     genai.delete_file(video_file.name)
     scan_log.info("使用 Gemini-2.0-flash 生成切片标题")
-    scan_log.info(f"Prompt: 视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只需要返回一个标题即可，无需返回其他内容")
+    scan_log.info(f"Prompt: 视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只需要返回一个标题即可，无需返回其他内容，标题中不要有表情符号。")
     scan_log.info(f"生成的切片标题为: {response.text}")
     return response.text
diff --git a/src/config.py b/src/config.py
@@ -5,78 +5,70 @@
 from datetime import datetime
 import configparser
 import torch
+import toml
+import src.log.logger as scan_log
 from db.conn import create_table
 
-# ============================ Your configuration ============================
-# Can be pipeline, append, merge
-MODEL_TYPE = "append"
+def load_config_from_toml(file_path):
+    """
+    load config from toml file and update global variables
+    """
+    try:
+        with open(file_path, 'r', encoding='utf-8') as file:
+            config = toml.load(file)
+            return config
+    except FileNotFoundError:
+        scan_log.error(f"cannot find {file_path}")
+    except toml.TomlDecodeError as e:
+        scan_log.error(f"cannot parse {file_path} as a valid toml file, error: {e}")
+    except Exception as e:
+        scan_log.error(f"unknown error when loading config file, error: {e}")
+    return None
 
-# =============== The auto speech recognition configuration ============================
-# WARNING!: If you choose "deploy" local inference:
-# 1. Please check the VRAM requirements twice!
-# 2. Please make sure you have installed the Nvidia GPU driver and can check the cuda via `nvcc -V`!
-# WARNING!: If you choose "api":
-# due to the limitation of free tier, you should keep every video less than 30 minutes(around)
-# which means your MODEL_TYPE should not be "merge".
-ASR_METHOD = "none" # can be "deploy" or "api" or "none"
-# Apply for your own API key at https://console.groq.com/keys
-WHISPER_API_KEY = ""
-# If you choose "deploy", you should download the model from https://huggingface.co/openai/whisper-large-v3-turbo
-Inference_Model = "small" # the model to be deployed
+def get_model_path():
+    SRC_DIR = str(Path(os.path.abspath(__file__)).parent)
+    model_dir = os.path.join(SRC_DIR, 'subtitle', 'models')
+    model_path = os.path.join(model_dir, f'{INFERENCE_MODEL}.pt')
+    return model_path
 
-# =============== The video configuration ============================
-TITLE = "{artist}直播回放-{date}-{title}" 
-# You can change the title as you like, eg.
-# f"{artist}直播回放-{date}-{title}" - Streamer直播回放-20250328-Live title
-# f"{date}-{artist}直播回放" - 20250328-Streamer直播回放
-DESC = "{artist}直播回放，直播间地址：{source_link} 内容仅供娱乐，直播中主播的言论、观点和行为均由主播本人负责，不代表录播员的观点或立场。"
-# You can change the description as you like.
-GIFT_PRICE_FILTER = 1 # The gift whose price is less than this value will be filtered, unit: RMB
-RESERVE_FOR_FIXING = False # If encounter MOOV crash error, delete the video or reserve for fixing
-UPLOAD_LINE = None # The upload line to be used, default None is auto detect(recommended), if you want to specify, it can be "bldsa", "ws", "tx", "qn", "bda2".
+def get_interface_config():
+    interface_config = configparser.ConfigParser()
+    interface_dir = os.path.join(SRC_DIR, 'subtitle')
+    interface_file = os.path.join(interface_dir, "en.ini")
+    interface_config.read(interface_file, encoding='utf-8')
+    return interface_config
 
-# ============================ The video slice configuration ==================
-AUTO_SLICE = False
-SLICE_DURATION = 60 # better not exceed 300 seconds
-SLICE_NUM = 2
-SLICE_OVERLAP = 30
-SLICE_STEP = 1
-# The minimum video size to be sliced (MB)
-MIN_VIDEO_SIZE = 200
-# the multi-model LLMs, can be "gemini" or "zhipu" or "qwen"
-MLLM_MODEL = "gemini" # Please make sure you have the right API key for the LLM you choose
-# Apply for your own GLM-4v-Plus API key at https://www.bigmodel.cn/invite?icode=shBtZUfNE6FfdMH1R6NybGczbXFgPRGIalpycrEwJ28%3D
-ZHIPU_API_KEY = ""
-# Apply for your own Gemini API key at https://aistudio.google.com/app/apikey
-GEMINI_API_KEY = ""
-# Apply for your own Qwen API key at https://bailian.console.aliyun.com/?apiKey=1
-QWEN_API_KEY = ""
-# ============================ Basic configuration ============================
-GPU_EXIST = torch.cuda.is_available()
 SRC_DIR = str(Path(os.path.abspath(__file__)).parent)
 BILIVE_DIR = str(Path(SRC_DIR).parent)
 LOG_DIR = os.path.join(BILIVE_DIR, 'logs')
 VIDEOS_DIR = os.path.join(BILIVE_DIR, 'Videos')
-
-
 if not os.path.exists(SRC_DIR + '/db/data.db'):
-    print("Initialize the database")
+    scan_log.info("Initialize the database")
     create_table()
 
-if not os.path.exists(VIDEOS_DIR):
-    os.makedirs(VIDEOS_DIR)
-if not os.path.exists(VIDEOS_DIR + '/upload_conf'):
-    os.makedirs(VIDEOS_DIR + '/upload_conf')
+config = load_config_from_toml(os.path.join(BILIVE_DIR, 'settings.toml'))
+if config is None:
+    scan_log.error("failed to load config file, please check twice")
+    exit(1)
 
-def get_model_path():
-    SRC_DIR = str(Path(os.path.abspath(__file__)).parent)
-    model_dir = os.path.join(SRC_DIR, 'subtitle', 'models')
-    model_path = os.path.join(model_dir, f'{Inference_Model}.pt')
-    return model_path
+GPU_EXIST = torch.cuda.is_available()
+MODEL_TYPE = config.get('model', {}).get('model_type')
+ASR_METHOD = config.get('asr', {}).get('asr_method')
+WHISPER_API_KEY = config.get('asr', {}).get('whisper_api_key')
+INFERENCE_MODEL = config.get('asr', {}).get('inference_model')
 
-def get_interface_config():
-    interface_config = configparser.ConfigParser()
-    interface_dir = os.path.join(SRC_DIR, 'subtitle')
-    interface_file = os.path.join(interface_dir, "en.ini")
-    interface_config.read(interface_file, encoding='utf-8')
-    return interface_config
+TITLE = config.get('video', {}).get('title')
+DESC = config.get('video', {}).get('description')
+GIFT_PRICE_FILTER = config.get('video', {}).get('gift_price_filter')
+RESERVE_FOR_FIXING = config.get('video', {}).get('reserve_for_fixing')
+UPLOAD_LINE = config.get('video', {}).get('upload_line')
+AUTO_SLICE = config.get('slice', {}).get('auto_slice')
+SLICE_DURATION = config.get('slice', {}).get('slice_duration')
+SLICE_NUM = config.get('slice', {}).get('slice_num')
+SLICE_OVERLAP = config.get('slice', {}).get('slice_overlap')
+SLICE_STEP = config.get('slice', {}).get('slice_step')
+MIN_VIDEO_SIZE = config.get('slice', {}).get('min_video_size')
+MLLM_MODEL = config.get('slice', {}).get('mllm_model')
+ZHIPU_API_KEY = config.get('slice', {}).get('zhipu_api_key')
+GEMINI_API_KEY = config.get('slice', {}).get('gemini_api_key')
+QWEN_API_KEY = config.get('slice', {}).get('qwen_api_key')

Original file line number	Diff line number	Diff line change
`@@ -14,14 +14,14 @@ def gemini_generate_title(video_path, artist):`
`14`	`14`	`model='models/gemini-2.0-flash',`
`15`	`15`	`contents=types.Content(`
`16`	`16`	`parts=[`
`17`		`- types.Part(text=f'视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只需要返回一个标题即可，无需返回其他内容'),`
	`17`	`+ types.Part(text=f'视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只需要返回一个标题即可，无需返回其他内容，标题中不要有表情符号。'),`
`18`	`18`	`types.Part(`
`19`	`19`	`inline_data=types.Blob(data=video_bytes, mime_type='video/mp4')`
`20`	`20`	`)`
`21`	`21`	`]`
`22`	`22`	`)`
`23`	`23`	`)`
`24`	`24`	`scan_log.info("使用 Gemini-2.0-flash 生成切片标题")`
`25`		`- scan_log.info(f"Prompt: 视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只需要返回一个标题即可，无需返回其他内容")`
	`25`	`+ scan_log.info(f"Prompt: 视频是{artist}的直播的切片，请根据该视频中的内容及弹幕信息，为这段视频起一个调皮并且吸引眼球的标题，只需要返回一个标题即可，无需返回其他内容，标题中不要有表情符号。")`
`26`	`26`	`scan_log.info(f"生成的切片标题为: {response.text}")`
`27`	`27`	`return response.text`