Skip to content

Commit 91d6699

Browse files
authored
Merge pull request #79 from sp-nitech/f0eval
Add f0eval
2 parents ef35a10 + 84f688c commit 91d6699

File tree

7 files changed

+436
-49
lines changed

7 files changed

+436
-49
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ set(MAIN_SOURCES
276276
${SOURCE_DIR}/main/entropy.cc
277277
${SOURCE_DIR}/main/excite.cc
278278
${SOURCE_DIR}/main/extract.cc
279+
${SOURCE_DIR}/main/f0eval.cc
279280
${SOURCE_DIR}/main/fbank.cc
280281
${SOURCE_DIR}/main/fd.cc
281282
${SOURCE_DIR}/main/fft.cc

doc/main/f0eval.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
.. _f0eval:
2+
3+
f0eval
4+
======
5+
6+
.. doxygenfile:: f0eval.cc
7+
8+
.. seealso::
9+
10+
:ref:`rmse` :ref:`pitch`

doc/main/rmse.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ rmse
77

88
.. seealso::
99

10-
:ref:`cdist`
10+
:ref:`cdist` :ref:`f0eval`

src/main/cdist.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ int main(int argc, char* argv[]) {
336336
int num_data;
337337
if (!statistics_accumulation.GetNumData(buffer, &num_data)) {
338338
std::ostringstream error_message;
339-
error_message << "Failed to accumulate statistics";
339+
error_message << "Failed to get statistics";
340340
sptk::PrintErrorMessage("cdist", error_message);
341341
return 1;
342342
}

src/main/f0eval.cc

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
// ------------------------------------------------------------------------ //
2+
// Copyright 2021 SPTK Working Group //
3+
// //
4+
// Licensed under the Apache License, Version 2.0 (the "License"); //
5+
// you may not use this file except in compliance with the License. //
6+
// You may obtain a copy of the License at //
7+
// //
8+
// http://www.apache.org/licenses/LICENSE-2.0 //
9+
// //
10+
// Unless required by applicable law or agreed to in writing, software //
11+
// distributed under the License is distributed on an "AS IS" BASIS, //
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
13+
// See the License for the specific language governing permissions and //
14+
// limitations under the License. //
15+
// ------------------------------------------------------------------------ //
16+
17+
#include <cmath> // std::log2, std::sqrt
18+
#include <fstream> // std::ifstream
19+
#include <iomanip> // std::setw
20+
#include <iostream> // std::cerr, std::cin, std::cout, std::endl, etc.
21+
#include <sstream> // std::ostringstream
22+
23+
#include "GETOPT/ya_getopt.h"
24+
#include "SPTK/utils/sptk_utils.h"
25+
26+
namespace {
27+
28+
enum InputFormats { kPitch = 0, kF0, kLogF0, kNumInputFormats };
29+
30+
enum OutputFormats {
31+
kF0ErrorAndVuvError = 0,
32+
kF0Error,
33+
kVuvError,
34+
kNumOutputFormats
35+
};
36+
37+
const InputFormats kDefaultInputFormat(kPitch);
38+
const OutputFormats kDefaultOutputFormat(kF0ErrorAndVuvError);
39+
const double kDefaultSamplingRate(16.0);
40+
41+
void PrintUsage(std::ostream* stream) {
42+
// clang-format off
43+
*stream << std::endl;
44+
*stream << " f0eval - calculation of F0 metrics" << std::endl;
45+
*stream << std::endl;
46+
*stream << " usage:" << std::endl;
47+
*stream << " f0eval [ options ] file1 [ infile ] > stdout" << std::endl;
48+
*stream << " options:" << std::endl;
49+
*stream << " -q q : input format ( int)[" << std::setw(5) << std::right << kDefaultInputFormat << "][ 0 <= q <= 2 ]" << std::endl; // NOLINT
50+
*stream << " 0 (Fs/F0)" << std::endl;
51+
*stream << " 1 (F0)" << std::endl;
52+
*stream << " 2 (log F0)" << std::endl;
53+
*stream << " -o o : output format ( int)[" << std::setw(5) << std::right << kDefaultOutputFormat << "][ 0 <= o <= 2 ]" << std::endl; // NOLINT
54+
*stream << " 0 (F0 RMSE and V/UV error)" << std::endl;
55+
*stream << " 1 (F0 RMSE)" << std::endl;
56+
*stream << " 2 (V/UV error)" << std::endl;
57+
*stream << " -s s : sampling rate [kHz] (double)[" << std::setw(5) << std::right << kDefaultSamplingRate << "][ 0 < s <= ]" << std::endl; // NOLINT
58+
*stream << " -h : print this message" << std::endl;
59+
*stream << " file1:" << std::endl;
60+
*stream << " pitch (double)" << std::endl;
61+
*stream << " infile:" << std::endl;
62+
*stream << " pitch (double)[stdin]" << std::endl;
63+
*stream << " stdout:" << std::endl;
64+
*stream << " metrics (double)" << std::endl;
65+
*stream << " notice:" << std::endl;
66+
*stream << " F0 RMSE is measured in cents" << std::endl;
67+
*stream << " V/UV error is measured in percent" << std::endl;
68+
*stream << " -q 0 and -q 1 options treat unvoiced frames as 0.0" << std::endl; // NOLINT
69+
*stream << " -q 2 option treats unvoiced frames as -1e+10" << std::endl; // NOLINT
70+
*stream << " -s option is used only when -q 0 option is specified" << std::endl; // NOLINT
71+
*stream << std::endl;
72+
*stream << " SPTK: version " << sptk::kVersion << std::endl;
73+
*stream << std::endl;
74+
// clang-format on
75+
}
76+
77+
} // namespace
78+
79+
/**
80+
* @a f0eval [ @e option ] [ @e infile ]
81+
*
82+
* - @b -q @e int
83+
* - input format
84+
* @arg @c 0 pitch @f$(F_s / F_0)@f$
85+
* @arg @c 1 F0
86+
* @arg @c 2 log F0
87+
* - @b -o @e int
88+
* - output format
89+
* @arg @c 0 F0 RMSE and V/UV error
90+
* @arg @c 1 F0 RMSE
91+
* @arg @c 2 V/UV error
92+
* - @b -s @e double
93+
* - sampling rate [kHz] @f$(0 < F_s)@f$
94+
* - @b file1 @e str
95+
* - double-type pitch
96+
* - @b infile @e str
97+
* - double-type pitch
98+
* - @b stdout
99+
* - double-type F0 RMSE [cent] and/or voiced/unvoiced error [%]
100+
*
101+
* In the example below, the voiced/unvoiced error [%] between two f0 files
102+
* @c data1.f0 and @c data2.f0 is evaluated and displayed:
103+
*
104+
* @code{.sh}
105+
* f0eval -q 1 -o 2 data1.f0 data2.f0 | dmp +d
106+
* @endcode
107+
*
108+
* @param[in] argc Number of arguments.
109+
* @param[in] argv Argument vector.
110+
* @return 0 on success, 1 on failure.
111+
*/
112+
int main(int argc, char* argv[]) {
113+
InputFormats input_format(kDefaultInputFormat);
114+
OutputFormats output_format(kDefaultOutputFormat);
115+
double sampling_rate(kDefaultSamplingRate);
116+
117+
for (;;) {
118+
const int option_char(getopt_long(argc, argv, "q:o:s:h", NULL, NULL));
119+
if (-1 == option_char) break;
120+
121+
switch (option_char) {
122+
case 'q': {
123+
const int min(0);
124+
const int max(static_cast<int>(kNumInputFormats) - 1);
125+
int tmp;
126+
if (!sptk::ConvertStringToInteger(optarg, &tmp) ||
127+
!sptk::IsInRange(tmp, min, max)) {
128+
std::ostringstream error_message;
129+
error_message << "The argument for the -o option must be an integer "
130+
<< "in the range of " << min << " to " << max;
131+
sptk::PrintErrorMessage("f0eval", error_message);
132+
return 1;
133+
}
134+
input_format = static_cast<InputFormats>(tmp);
135+
break;
136+
}
137+
case 'o': {
138+
const int min(0);
139+
const int max(static_cast<int>(kNumOutputFormats) - 1);
140+
int tmp;
141+
if (!sptk::ConvertStringToInteger(optarg, &tmp) ||
142+
!sptk::IsInRange(tmp, min, max)) {
143+
std::ostringstream error_message;
144+
error_message << "The argument for the -o option must be an integer "
145+
<< "in the range of " << min << " to " << max;
146+
sptk::PrintErrorMessage("f0eval", error_message);
147+
return 1;
148+
}
149+
output_format = static_cast<OutputFormats>(tmp);
150+
break;
151+
}
152+
case 's': {
153+
if (!sptk::ConvertStringToDouble(optarg, &sampling_rate) ||
154+
sampling_rate <= 0.0) {
155+
std::ostringstream error_message;
156+
error_message
157+
<< "The argument for the -s option must be a positive number";
158+
sptk::PrintErrorMessage("f0eval", error_message);
159+
return 1;
160+
}
161+
break;
162+
}
163+
case 'h': {
164+
PrintUsage(&std::cout);
165+
return 0;
166+
}
167+
default: {
168+
PrintUsage(&std::cerr);
169+
return 1;
170+
}
171+
}
172+
}
173+
174+
const char* input_file1(NULL);
175+
const char* input_file2(NULL);
176+
const int num_input_files(argc - optind);
177+
if (2 == num_input_files) {
178+
input_file1 = argv[argc - 2];
179+
input_file2 = argv[argc - 1];
180+
} else if (1 == num_input_files) {
181+
input_file1 = argv[argc - 1];
182+
input_file2 = NULL;
183+
} else {
184+
std::ostringstream error_message;
185+
error_message << "Just two input files, file1, and infile, are required";
186+
sptk::PrintErrorMessage("f0eval", error_message);
187+
return 1;
188+
}
189+
190+
if (!sptk::SetBinaryMode()) {
191+
std::ostringstream error_message;
192+
error_message << "Cannot set translation mode";
193+
sptk::PrintErrorMessage("f0eval", error_message);
194+
return 1;
195+
}
196+
197+
std::ifstream ifs1;
198+
ifs1.open(input_file1, std::ios::in | std::ios::binary);
199+
if (ifs1.fail()) {
200+
std::ostringstream error_message;
201+
error_message << "Cannot open file " << input_file1;
202+
sptk::PrintErrorMessage("f0eval", error_message);
203+
return 1;
204+
}
205+
std::istream& input_stream1(ifs1);
206+
207+
std::ifstream ifs2;
208+
if (NULL != input_file2) {
209+
ifs2.open(input_file2, std::ios::in | std::ios::binary);
210+
if (ifs2.fail()) {
211+
std::ostringstream error_message;
212+
error_message << "Cannot open file " << input_file2;
213+
sptk::PrintErrorMessage("f0eval", error_message);
214+
return 1;
215+
}
216+
}
217+
std::istream& input_stream2(ifs2.is_open() ? ifs2 : std::cin);
218+
219+
const double sampling_rate_in_hz(1000.0 * sampling_rate);
220+
const double unvoiced_value(kLogF0 == input_format ? sptk::kLogZero : 0.0);
221+
222+
double f1, f2;
223+
double f0_error_value(0.0);
224+
int num_voiced_frame(0);
225+
int vuv_error_count(0);
226+
int num_frame(0);
227+
228+
while (sptk::ReadStream(&f1, &input_stream1) &&
229+
sptk::ReadStream(&f2, &input_stream2)) {
230+
++num_frame;
231+
if (f1 == unvoiced_value && f2 == unvoiced_value) {
232+
// nothing to do
233+
} else if (f1 != unvoiced_value && f2 != unvoiced_value) {
234+
switch (input_format) {
235+
case kPitch: {
236+
f1 = std::log2(sampling_rate_in_hz / f1);
237+
f2 = std::log2(sampling_rate_in_hz / f2);
238+
break;
239+
}
240+
case kF0: {
241+
f1 = std::log2(f1);
242+
f2 = std::log2(f2);
243+
break;
244+
}
245+
case kLogF0: {
246+
f1 *= sptk::kOctave;
247+
f2 *= sptk::kOctave;
248+
break;
249+
}
250+
default: {
251+
return 1;
252+
}
253+
}
254+
const double error_in_cent(1200.0 * (f1 - f2));
255+
f0_error_value += error_in_cent * error_in_cent;
256+
++num_voiced_frame;
257+
} else {
258+
++vuv_error_count;
259+
}
260+
}
261+
262+
if (0 < num_frame) {
263+
if (kF0ErrorAndVuvError == output_format || kF0Error == output_format) {
264+
if (0 == num_voiced_frame) {
265+
std::ostringstream error_message;
266+
error_message << "There are no voiced frames";
267+
sptk::PrintErrorMessage("f0eval", error_message);
268+
return 1;
269+
}
270+
const double f0_error(std::sqrt(f0_error_value / num_voiced_frame));
271+
if (!sptk::WriteStream(f0_error, &std::cout)) {
272+
std::ostringstream error_message;
273+
error_message << "Failed to write F0 RMSE";
274+
sptk::PrintErrorMessage("f0eval", error_message);
275+
return 1;
276+
}
277+
}
278+
if (kF0ErrorAndVuvError == output_format || kVuvError == output_format) {
279+
const double vuv_error(100.0 * vuv_error_count / num_frame);
280+
if (!sptk::WriteStream(vuv_error, &std::cout)) {
281+
std::ostringstream error_message;
282+
error_message << "Failed to write V/UV error";
283+
sptk::PrintErrorMessage("f0eval", error_message);
284+
return 1;
285+
}
286+
}
287+
}
288+
289+
return 0;
290+
}

0 commit comments

Comments
 (0)