Skip to content

Commit 67ec624

Browse files
committed
fix ci for waveflow, test=tts
1 parent f510976 commit 67ec624

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

paddlespeech/t2s/frontend/zh_normalization/num.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def verbalize_digit(value_string: str, alt_one=False) -> str:
208208
result_symbols = [DIGITS[digit] for digit in value_string]
209209
result = ''.join(result_symbols)
210210
if alt_one:
211-
result.replace("一", "幺")
211+
result = result.replace("一", "幺")
212212
return result
213213

214214

paddlespeech/t2s/models/waveflow.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ def fold(x, n_group):
3333
"""Fold audio or spectrogram's temporal dimension in to groups.
3434
3535
Args:
36-
x(Tensor): The input tensor. shape=(\*, time_steps)
36+
x(Tensor): The input tensor. shape=(*, time_steps)
3737
n_group(int): The size of a group.
3838
3939
Returns:
40-
Tensor: Folded tensor. shape=(\*, time_steps // n_group, group)
40+
Tensor: Folded tensor. shape=(*, time_steps // n_group, group)
4141
"""
4242
spatial_shape = list(x.shape[:-1])
4343
time_steps = paddle.shape(x)[-1]
@@ -98,11 +98,11 @@ def forward(self, x, trim_conv_artifact=False):
9898
trim_conv_artifact(bool, optional, optional): Trim deconvolution artifact at each layer. Defaults to False.
9999
100100
Returns:
101-
Tensor: The upsampled spectrogram. shape=(batch_size, input_channels, time_steps \* upsample_factor)
101+
Tensor: The upsampled spectrogram. shape=(batch_size, input_channels, time_steps * upsample_factor)
102102
103103
Notes:
104104
If trim_conv_artifact is ``True``, the output time steps is less
105-
than ``time_steps \* upsample_factors``.
105+
than ``time_steps * upsample_factors``.
106106
"""
107107
x = paddle.unsqueeze(x, 1) # (B, C, T) -> (B, 1, C, T)
108108
for layer in self:
@@ -641,7 +641,7 @@ def infer(self, mel):
641641
mel(np.ndarray): Mel spectrogram of an utterance(in log-magnitude). shape=(C_mel, T_mel)
642642
643643
Returns:
644-
Tensor: The synthesized audio, where``T <= T_mel \* upsample_factors``. shape=(B, T)
644+
Tensor: The synthesized audio, where``T <= T_mel * upsample_factors``. shape=(B, T)
645645
"""
646646
start = time.time()
647647
condition = self.encoder(mel, trim_conv_artifact=True) # (B, C, T)

0 commit comments

Comments
 (0)