|
1 | 1 | // エンジンを起動してyukarin_s・yukarin_sa・decodeの推論を行う
|
2 | 2 |
|
3 |
| -use std::ffi::CStr; |
4 | 3 | use std::sync::LazyLock;
|
| 4 | +use std::{cmp::min, ffi::CStr}; |
5 | 5 |
|
6 | 6 | use assert_cmd::assert::AssertResult;
|
7 | 7 | use libloading::Library;
|
@@ -83,12 +83,86 @@ impl assert_cdylib::TestCase for TestCase {
|
83 | 83 | wave
|
84 | 84 | };
|
85 | 85 |
|
| 86 | + // 中間生成物を経由した場合の生成音声 |
| 87 | + let wave2 = { |
| 88 | + let length_with_margin = |
| 89 | + EXAMPLE_DATA.intermediate.f0_length + 2 * EXAMPLE_DATA.intermediate.margin_width; |
| 90 | + let mut audio_feature = |
| 91 | + vec![0.; (length_with_margin * EXAMPLE_DATA.intermediate.feature_dim) as usize]; |
| 92 | + let mut wave = vec![0.; 256 * length_with_margin as usize]; |
| 93 | + assert!(lib.generate_full_intermediate( |
| 94 | + EXAMPLE_DATA.intermediate.f0_length, |
| 95 | + EXAMPLE_DATA.intermediate.phoneme_size, |
| 96 | + EXAMPLE_DATA.intermediate.f0_vector.as_ptr() as *mut f32, |
| 97 | + EXAMPLE_DATA.intermediate.phoneme_vector.as_ptr() as *mut f32, |
| 98 | + &mut { EXAMPLE_DATA.speaker_id } as *mut i64, |
| 99 | + audio_feature.as_mut_ptr(), |
| 100 | + )); |
| 101 | + assert!(lib.render_audio_segment( |
| 102 | + length_with_margin, |
| 103 | + EXAMPLE_DATA.intermediate.margin_width, |
| 104 | + EXAMPLE_DATA.intermediate.feature_dim, |
| 105 | + audio_feature.as_ptr() as *mut f32, |
| 106 | + &mut { EXAMPLE_DATA.speaker_id } as *mut i64, |
| 107 | + wave.as_mut_ptr(), |
| 108 | + )); |
| 109 | + wave[256 * EXAMPLE_DATA.intermediate.margin_width as usize |
| 110 | + ..wave.len() - 256 * EXAMPLE_DATA.intermediate.margin_width as usize] |
| 111 | + .to_vec() |
| 112 | + }; |
| 113 | + |
| 114 | + // 中間生成物を経由し、さらにチャンクごとに変換した場合の生成音声 |
| 115 | + let wave3 = { |
| 116 | + let length_with_margin = |
| 117 | + EXAMPLE_DATA.intermediate.f0_length + 2 * EXAMPLE_DATA.intermediate.margin_width; |
| 118 | + let mut audio_feature = |
| 119 | + vec![0.; (length_with_margin * EXAMPLE_DATA.intermediate.feature_dim) as usize]; |
| 120 | + let mut wave = vec![0.; 256 * EXAMPLE_DATA.intermediate.f0_length as usize]; |
| 121 | + assert!(lib.generate_full_intermediate( |
| 122 | + EXAMPLE_DATA.intermediate.f0_length, |
| 123 | + EXAMPLE_DATA.intermediate.phoneme_size, |
| 124 | + EXAMPLE_DATA.intermediate.f0_vector.as_ptr() as *mut f32, |
| 125 | + EXAMPLE_DATA.intermediate.phoneme_vector.as_ptr() as *mut f32, |
| 126 | + &mut { EXAMPLE_DATA.speaker_id } as *mut i64, |
| 127 | + audio_feature.as_mut_ptr(), |
| 128 | + )); |
| 129 | + let full_length = EXAMPLE_DATA.intermediate.f0_length as usize; |
| 130 | + let pitch = EXAMPLE_DATA.intermediate.feature_dim as usize; |
| 131 | + for render_start in (0..full_length).step_by(10) { |
| 132 | + // render_start .. render_end の音声を取得する |
| 133 | + let render_end = min(render_start + 10, full_length); |
| 134 | + let slice_start = render_start; |
| 135 | + let slice_end = render_end + 2 * EXAMPLE_DATA.intermediate.margin_width as usize; |
| 136 | + let feature_segment = &audio_feature[slice_start * pitch..slice_end * pitch]; |
| 137 | + let slice_length = slice_end - slice_start; |
| 138 | + let mut wave_segment_with_margin = vec![0.; 256 * slice_length]; |
| 139 | + assert!(lib.render_audio_segment( |
| 140 | + slice_length as i64, |
| 141 | + EXAMPLE_DATA.intermediate.margin_width, |
| 142 | + pitch as i64, |
| 143 | + feature_segment.as_ptr() as *mut f32, |
| 144 | + &mut { EXAMPLE_DATA.speaker_id } as *mut i64, |
| 145 | + wave_segment_with_margin.as_mut_ptr(), |
| 146 | + )); |
| 147 | + let wave_segment = &wave_segment_with_margin[256 |
| 148 | + * EXAMPLE_DATA.intermediate.margin_width as usize |
| 149 | + ..wave_segment_with_margin.len() |
| 150 | + - 256 * EXAMPLE_DATA.intermediate.margin_width as usize]; |
| 151 | + wave[render_start * 256..render_end * 256].clone_from_slice(wave_segment); |
| 152 | + } |
| 153 | + wave |
| 154 | + }; |
| 155 | + |
86 | 156 | std::assert_eq!(SNAPSHOTS.metas, metas_json);
|
87 | 157 |
|
88 | 158 | float_assert::close_l1(&phoneme_length, &EXAMPLE_DATA.duration.result, 0.01);
|
89 | 159 | float_assert::close_l1(&intonation_list, &EXAMPLE_DATA.intonation.result, 0.01);
|
90 | 160 |
|
91 | 161 | assert!(wave.iter().copied().all(f32::is_normal));
|
| 162 | + assert!(wave2.iter().copied().all(f32::is_normal)); |
| 163 | + assert!(wave3.iter().copied().all(f32::is_normal)); |
| 164 | + float_assert::close_l1(&wave2, &wave, 0.001); |
| 165 | + float_assert::close_l1(&wave3, &wave, 0.001); |
92 | 166 |
|
93 | 167 | lib.finalize();
|
94 | 168 | Ok(())
|
|
0 commit comments