Skip to content

Commit d240309

Browse files
committed
docs: code review sth
- 修改默认分割器参数 100 => 30
1 parent 5751230 commit d240309

File tree

4 files changed

+23
-2
lines changed

4 files changed

+23
-2
lines changed

modules/core/handler/AudioHandler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ async def enqueue_to_stream(self) -> AsyncGenerator[bytes, None]:
123123
yield chunk_data
124124
chunk_data = encoder.read()
125125

126+
# wait to finish
126127
encoder.close()
127128

128129
chunk_data = encoder.read()

modules/core/handler/datacls/tts_model.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,11 @@ class TTSConfig(BaseModel):
1919

2020

2121
class InferConfig(BaseModel):
22-
batch_size: int = 4
23-
spliter_threshold: int = 100
22+
# NOTE: batch_size * spliter_threshold = 预计最大vram面积 * 不同模型的系数
23+
# 大概 batch_sise=2 spliter_threshold=30 可以保证在8gb显存正常推理
24+
batch_size: int = 2
25+
spliter_threshold: int = 30
26+
2427
# end_of_sentence
2528
eos: str = "。"
2629
seed: int = 42

modules/core/models/TTSModel.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
from modules.core.models.tts.InferCache import InferCache
66
from modules.core.pipeline.dcls import TTSSegment
77
from modules.core.pipeline.processor import NP_AUDIO, TTSPipelineContext
8+
from modules.core.spk.TTSSpeaker import TTSSpeaker
89
from modules.devices import devices
910
from modules.utils import audio_utils
1011

12+
import numpy.typing as npt
1113

1214
class TTSModel(BaseZooModel):
1315

@@ -26,6 +28,16 @@ def get_sample_rate(self) -> int:
2628
def generate(self, segment: TTSSegment, context: TTSPipelineContext) -> NP_AUDIO:
2729
return self.generate_batch([segment], context=context)[0]
2830

31+
def compute_spk_features(self, spk: TTSSpeaker) -> npt.NDArray:
32+
"""
33+
计算说话人特征,只有部分模型支持
34+
35+
用于音色合并或者其他用途
36+
"""
37+
raise NotImplementedError(
38+
f"Model {self.model_id} is not support extracting speaker features"
39+
)
40+
2941
# NOTE: 这里会有假设,所有的 segments 除了文本以外所有配置相同,具体调用逻辑在 core.pipeline.generate 中
3042
def generate_batch(
3143
self, segments: list[TTSSegment], context: TTSPipelineContext

modules/core/pipeline/generate/SynthSteamer.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,19 @@ def __init__(
1616
self.output_wav = np.empty(0)
1717

1818
def flush(self):
19+
"""
20+
刷新合并音频
21+
"""
1922
output_wav = np.empty(0)
2023

2124
for seg in self.segments:
2225
data = seg.data
2326
if data.size == 0 and not seg.done:
27+
# 空检查
2428
break
2529
output_wav = np.concatenate((output_wav, data), axis=0)
2630
if not seg.done:
31+
# 未完成的块,退出,因为只需要合并已经完成的块
2732
break
2833

2934
self.output_wav = output_wav

0 commit comments

Comments
 (0)