Skip to content

Commit 38517cc

Browse files
qryxipX-20AsaboneruneHiroshiba
authored
feat: pause_length{,_scale}をデフォルト値限定で受け入れる (#872)
feat: `pause_length{,_scale}`をデフォルト値限定で受け入れる `AudioQuery`に`pause_length{,_scale}`を追加する。ただしそれぞれ`null`と `1.`のみを許し、無音時間調整自体はまだ実装しない。 VOICEVOX/voicevox_engine#1308VOICEVOX/voicevox_engine#1425 の一部 を参考にコードを書いた。 @Hiroshiba さんと以下の2名の許諾のもと、 #874 にのっとりMITライセンスと してライセンスする。 * @X-20A (VOICEVOX/voicevox_engine#1308) * @sabonerune (VOICEVOX/voicevox_engine#1425) Co-authored-by: X-20A <[email protected]> Co-authored-by: sabonerune <[email protected]> Co-authored-by: Hiroshiba <[email protected]> Refs: VOICEVOX/voicevox_engine#1308, VOICEVOX/voicevox_engine#1425 Refs: #874 (comment) Refs: #874 (comment)
1 parent 52b3d0d commit 38517cc

File tree

4 files changed

+161
-1
lines changed

4 files changed

+161
-1
lines changed

crates/voicevox_core/src/engine/model.rs

Lines changed: 140 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
use serde::{Deserialize, Serialize};
1+
use std::fmt;
2+
3+
use duplicate::duplicate_item;
4+
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
25

36
/* 各フィールドのjsonフィールド名はsnake_caseとする*/
47

@@ -64,6 +67,21 @@ pub struct AudioQuery {
6467
pub output_sampling_rate: u32,
6568
/// 音声データをステレオ出力するか否か。
6669
pub output_stereo: bool,
70+
// TODO: VOICEVOX/voicevox_engine#1308 を実装する
71+
/// 句読点などの無音時間。`null`のときは無視される。デフォルト値は`null`。
72+
#[serde(
73+
default,
74+
deserialize_with = "deserialize_pause_length",
75+
serialize_with = "serialize_pause_length"
76+
)]
77+
pub pause_length: (),
78+
/// 読点などの無音時間(倍率)。デフォルト値は`1`。
79+
#[serde(
80+
default,
81+
deserialize_with = "deserialize_pause_length_scale",
82+
serialize_with = "serialize_pause_length_scale"
83+
)]
84+
pub pause_length_scale: (),
6785
/// \[読み取り専用\] AquesTalk風記法。
6886
///
6987
/// [`Synthesizer::audio_query`]が返すもののみ`Some`となる。入力としてのAudioQueryでは無視され
@@ -73,6 +91,87 @@ pub struct AudioQuery {
7391
pub kana: Option<String>,
7492
}
7593

94+
fn deserialize_pause_length<'de, D>(deserializer: D) -> Result<(), D::Error>
95+
where
96+
D: Deserializer<'de>,
97+
{
98+
return deserializer.deserialize_any(Visitor);
99+
100+
struct Visitor;
101+
102+
impl<'de> de::Visitor<'de> for Visitor {
103+
type Value = ();
104+
105+
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
106+
formatter.write_str("`null`")
107+
}
108+
109+
#[duplicate_item(
110+
method T;
111+
[ visit_i64 ] [ i64 ];
112+
[ visit_u64 ] [ u64 ];
113+
[ visit_f64 ] [ f64 ];
114+
)]
115+
fn method<E>(self, _: T) -> Result<Self::Value, E>
116+
where
117+
E: de::Error,
118+
{
119+
Err(E::custom("currently `pause_length` must be `null`"))
120+
}
121+
122+
fn visit_unit<E>(self) -> Result<Self::Value, E> {
123+
Ok(())
124+
}
125+
}
126+
}
127+
128+
fn serialize_pause_length<S>(_: &(), serializer: S) -> Result<S::Ok, S::Error>
129+
where
130+
S: Serializer,
131+
{
132+
serializer.serialize_unit()
133+
}
134+
135+
fn deserialize_pause_length_scale<'de, D>(deserializer: D) -> Result<(), D::Error>
136+
where
137+
D: Deserializer<'de>,
138+
{
139+
return deserializer.deserialize_any(Visitor);
140+
141+
struct Visitor;
142+
143+
impl<'de> de::Visitor<'de> for Visitor {
144+
type Value = ();
145+
146+
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
147+
formatter.write_str("`1.`")
148+
}
149+
150+
#[duplicate_item(
151+
method T ONE;
152+
[ visit_i64 ] [ i64 ] [ 1 ];
153+
[ visit_u64 ] [ u64 ] [ 1 ];
154+
[ visit_f64 ] [ f64 ] [ 1. ];
155+
)]
156+
fn method<E>(self, v: T) -> Result<Self::Value, E>
157+
where
158+
E: de::Error,
159+
{
160+
if v != ONE {
161+
return Err(E::custom("currently `pause_length_scale` must be `1.`"));
162+
}
163+
Ok(())
164+
}
165+
}
166+
}
167+
168+
fn serialize_pause_length_scale<S>(_: &(), serializer: S) -> Result<S::Ok, S::Error>
169+
where
170+
S: Serializer,
171+
{
172+
(1.).serialize(serializer)
173+
}
174+
76175
impl AudioQuery {
77176
pub(crate) fn with_kana(self, kana: Option<String>) -> Self {
78177
Self { kana, ..self }
@@ -99,6 +198,8 @@ mod tests {
99198
post_phoneme_length: 0.0,
100199
output_sampling_rate: 0,
101200
output_stereo: false,
201+
pause_length: (),
202+
pause_length_scale: (),
102203
kana: None,
103204
};
104205
let val = serde_json::to_value(audio_query_model).unwrap();
@@ -152,4 +253,42 @@ mod tests {
152253
}))?;
153254
Ok(())
154255
}
256+
257+
// TODO: 型的に自明になったらこのテストは削除する
258+
#[rstest]
259+
fn it_denies_non_null_for_pause_length() {
260+
serde_json::from_value::<AudioQuery>(json!({
261+
"accent_phrases": [],
262+
"speed_scale": 1.0,
263+
"pitch_scale": 0.0,
264+
"intonation_scale": 1.0,
265+
"volume_scale": 1.0,
266+
"pre_phoneme_length": 0.1,
267+
"post_phoneme_length": 0.1,
268+
"output_sampling_rate": 24000,
269+
"output_stereo": false,
270+
"pause_length": "aaaaa"
271+
}))
272+
.map(|_| ())
273+
.unwrap_err();
274+
}
275+
276+
// TODO: 型的に自明になったらこのテストは削除する
277+
#[rstest]
278+
fn it_denies_non_float_for_pause_length_scale() {
279+
serde_json::from_value::<AudioQuery>(json!({
280+
"accent_phrases": [],
281+
"speed_scale": 1.0,
282+
"pitch_scale": 0.0,
283+
"intonation_scale": 1.0,
284+
"volume_scale": 1.0,
285+
"pre_phoneme_length": 0.1,
286+
"post_phoneme_length": 0.1,
287+
"output_sampling_rate": 24000,
288+
"output_stereo": false,
289+
"pause_length_scale": "aaaaa",
290+
}))
291+
.map(|_| ())
292+
.unwrap_err();
293+
}
155294
}

crates/voicevox_core/src/synthesizer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,8 @@ mod inner {
11871187
post_phoneme_length: 0.1,
11881188
output_sampling_rate: DEFAULT_SAMPLING_RATE,
11891189
output_stereo: false,
1190+
pause_length: (),
1191+
pause_length_scale: (),
11901192
kana: Some(kana),
11911193
}
11921194
}

crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/AudioQuery.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,17 @@ public class AudioQuery {
5555
@Expose
5656
public boolean outputStereo;
5757

58+
/** 句読点などの無音時間。{@code null}のときは無視される。デフォルト値は{@code null}。 */
59+
@SerializedName("pause_length")
60+
@Expose
61+
@Nullable
62+
public Double pauseLength;
63+
64+
/** 読点などの無音時間(倍率)。デフォルト値は{@code 1.}。 */
65+
@SerializedName("pause_length_scale")
66+
@Expose
67+
public double pauseLengthScale;
68+
5869
/**
5970
* [読み取り専用] AquesTalk風記法。
6071
*
@@ -75,6 +86,8 @@ public AudioQuery() {
7586
this.prePhonemeLength = 0.1;
7687
this.postPhonemeLength = 0.1;
7788
this.outputSamplingRate = 24000;
89+
this.pauseLength = null;
90+
this.pauseLengthScale = 1.0;
7891
this.kana = null;
7992
}
8093
}

crates/voicevox_core_python_api/python/voicevox_core/_models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,12 @@ class AudioQuery:
208208
output_stereo: bool
209209
"""音声データをステレオ出力するか否か。"""
210210

211+
pause_length: None = None
212+
"""句読点などの無音時間。 ``None`` のときは無視される。デフォルト値は ``None`` 。"""
213+
214+
pause_length_scale: float = 1.0
215+
"""読点などの無音時間(倍率)。デフォルト値は ``1.0`` 。"""
216+
211217
kana: Optional[str] = None
212218
"""
213219
[読み取り専用] AquesTalk風記法。

0 commit comments

Comments
 (0)