|
| 1 | +// ------------------------------------------------------------------------ // |
| 2 | +// Copyright 2021 SPTK Working Group // |
| 3 | +// // |
| 4 | +// Licensed under the Apache License, Version 2.0 (the "License"); // |
| 5 | +// you may not use this file except in compliance with the License. // |
| 6 | +// You may obtain a copy of the License at // |
| 7 | +// // |
| 8 | +// http://www.apache.org/licenses/LICENSE-2.0 // |
| 9 | +// // |
| 10 | +// Unless required by applicable law or agreed to in writing, software // |
| 11 | +// distributed under the License is distributed on an "AS IS" BASIS, // |
| 12 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // |
| 13 | +// See the License for the specific language governing permissions and // |
| 14 | +// limitations under the License. // |
| 15 | +// ------------------------------------------------------------------------ // |
| 16 | + |
| 17 | +#include <cmath> // std::log2, std::sqrt |
| 18 | +#include <fstream> // std::ifstream |
| 19 | +#include <iomanip> // std::setw |
| 20 | +#include <iostream> // std::cerr, std::cin, std::cout, std::endl, etc. |
| 21 | +#include <sstream> // std::ostringstream |
| 22 | + |
| 23 | +#include "GETOPT/ya_getopt.h" |
| 24 | +#include "SPTK/utils/sptk_utils.h" |
| 25 | + |
| 26 | +namespace { |
| 27 | + |
| 28 | +enum InputFormats { kPitch = 0, kF0, kLogF0, kNumInputFormats }; |
| 29 | + |
| 30 | +enum OutputFormats { |
| 31 | + kF0ErrorAndVuvError = 0, |
| 32 | + kF0Error, |
| 33 | + kVuvError, |
| 34 | + kNumOutputFormats |
| 35 | +}; |
| 36 | + |
| 37 | +const InputFormats kDefaultInputFormat(kPitch); |
| 38 | +const OutputFormats kDefaultOutputFormat(kF0ErrorAndVuvError); |
| 39 | +const double kDefaultSamplingRate(16.0); |
| 40 | + |
| 41 | +void PrintUsage(std::ostream* stream) { |
| 42 | + // clang-format off |
| 43 | + *stream << std::endl; |
| 44 | + *stream << " f0eval - calculation of F0 metrics" << std::endl; |
| 45 | + *stream << std::endl; |
| 46 | + *stream << " usage:" << std::endl; |
| 47 | + *stream << " f0eval [ options ] file1 [ infile ] > stdout" << std::endl; |
| 48 | + *stream << " options:" << std::endl; |
| 49 | + *stream << " -q q : input format ( int)[" << std::setw(5) << std::right << kDefaultInputFormat << "][ 0 <= q <= 2 ]" << std::endl; // NOLINT |
| 50 | + *stream << " 0 (Fs/F0)" << std::endl; |
| 51 | + *stream << " 1 (F0)" << std::endl; |
| 52 | + *stream << " 2 (log F0)" << std::endl; |
| 53 | + *stream << " -o o : output format ( int)[" << std::setw(5) << std::right << kDefaultOutputFormat << "][ 0 <= o <= 2 ]" << std::endl; // NOLINT |
| 54 | + *stream << " 0 (F0 RMSE and V/UV error)" << std::endl; |
| 55 | + *stream << " 1 (F0 RMSE)" << std::endl; |
| 56 | + *stream << " 2 (V/UV error)" << std::endl; |
| 57 | + *stream << " -s s : sampling rate [kHz] (double)[" << std::setw(5) << std::right << kDefaultSamplingRate << "][ 0 < s <= ]" << std::endl; // NOLINT |
| 58 | + *stream << " -h : print this message" << std::endl; |
| 59 | + *stream << " file1:" << std::endl; |
| 60 | + *stream << " pitch (double)" << std::endl; |
| 61 | + *stream << " infile:" << std::endl; |
| 62 | + *stream << " pitch (double)[stdin]" << std::endl; |
| 63 | + *stream << " stdout:" << std::endl; |
| 64 | + *stream << " metrics (double)" << std::endl; |
| 65 | + *stream << " notice:" << std::endl; |
| 66 | + *stream << " F0 RMSE is measured in cents" << std::endl; |
| 67 | + *stream << " V/UV error is measured in percent" << std::endl; |
| 68 | + *stream << " -q 0 and -q 1 options treat unvoiced frames as 0.0" << std::endl; // NOLINT |
| 69 | + *stream << " -q 2 option treats unvoiced frames as -1e+10" << std::endl; // NOLINT |
| 70 | + *stream << " -s option is used only when -q 0 option is specified" << std::endl; // NOLINT |
| 71 | + *stream << std::endl; |
| 72 | + *stream << " SPTK: version " << sptk::kVersion << std::endl; |
| 73 | + *stream << std::endl; |
| 74 | + // clang-format on |
| 75 | +} |
| 76 | + |
| 77 | +} // namespace |
| 78 | + |
| 79 | +/** |
| 80 | + * @a f0eval [ @e option ] [ @e infile ] |
| 81 | + * |
| 82 | + * - @b -q @e int |
| 83 | + * - input format |
| 84 | + * @arg @c 0 pitch @f$(F_s / F_0)@f$ |
| 85 | + * @arg @c 1 F0 |
| 86 | + * @arg @c 2 log F0 |
| 87 | + * - @b -o @e int |
| 88 | + * - output format |
| 89 | + * @arg @c 0 F0 RMSE and V/UV error |
| 90 | + * @arg @c 1 F0 RMSE |
| 91 | + * @arg @c 2 V/UV error |
| 92 | + * - @b -s @e double |
| 93 | + * - sampling rate [kHz] @f$(0 < F_s)@f$ |
| 94 | + * - @b file1 @e str |
| 95 | + * - double-type pitch |
| 96 | + * - @b infile @e str |
| 97 | + * - double-type pitch |
| 98 | + * - @b stdout |
| 99 | + * - double-type F0 RMSE [cent] and/or voiced/unvoiced error [%] |
| 100 | + * |
| 101 | + * In the example below, the voiced/unvoiced error [%] between two f0 files |
| 102 | + * @c data1.f0 and @c data2.f0 is evaluated and displayed: |
| 103 | + * |
| 104 | + * @code{.sh} |
| 105 | + * f0eval -q 1 -o 2 data1.f0 data2.f0 | dmp +d |
| 106 | + * @endcode |
| 107 | + * |
| 108 | + * @param[in] argc Number of arguments. |
| 109 | + * @param[in] argv Argument vector. |
| 110 | + * @return 0 on success, 1 on failure. |
| 111 | + */ |
| 112 | +int main(int argc, char* argv[]) { |
| 113 | + InputFormats input_format(kDefaultInputFormat); |
| 114 | + OutputFormats output_format(kDefaultOutputFormat); |
| 115 | + double sampling_rate(kDefaultSamplingRate); |
| 116 | + |
| 117 | + for (;;) { |
| 118 | + const int option_char(getopt_long(argc, argv, "q:o:s:h", NULL, NULL)); |
| 119 | + if (-1 == option_char) break; |
| 120 | + |
| 121 | + switch (option_char) { |
| 122 | + case 'q': { |
| 123 | + const int min(0); |
| 124 | + const int max(static_cast<int>(kNumInputFormats) - 1); |
| 125 | + int tmp; |
| 126 | + if (!sptk::ConvertStringToInteger(optarg, &tmp) || |
| 127 | + !sptk::IsInRange(tmp, min, max)) { |
| 128 | + std::ostringstream error_message; |
| 129 | + error_message << "The argument for the -o option must be an integer " |
| 130 | + << "in the range of " << min << " to " << max; |
| 131 | + sptk::PrintErrorMessage("f0eval", error_message); |
| 132 | + return 1; |
| 133 | + } |
| 134 | + input_format = static_cast<InputFormats>(tmp); |
| 135 | + break; |
| 136 | + } |
| 137 | + case 'o': { |
| 138 | + const int min(0); |
| 139 | + const int max(static_cast<int>(kNumOutputFormats) - 1); |
| 140 | + int tmp; |
| 141 | + if (!sptk::ConvertStringToInteger(optarg, &tmp) || |
| 142 | + !sptk::IsInRange(tmp, min, max)) { |
| 143 | + std::ostringstream error_message; |
| 144 | + error_message << "The argument for the -o option must be an integer " |
| 145 | + << "in the range of " << min << " to " << max; |
| 146 | + sptk::PrintErrorMessage("f0eval", error_message); |
| 147 | + return 1; |
| 148 | + } |
| 149 | + output_format = static_cast<OutputFormats>(tmp); |
| 150 | + break; |
| 151 | + } |
| 152 | + case 's': { |
| 153 | + if (!sptk::ConvertStringToDouble(optarg, &sampling_rate) || |
| 154 | + sampling_rate <= 0.0) { |
| 155 | + std::ostringstream error_message; |
| 156 | + error_message |
| 157 | + << "The argument for the -s option must be a positive number"; |
| 158 | + sptk::PrintErrorMessage("f0eval", error_message); |
| 159 | + return 1; |
| 160 | + } |
| 161 | + break; |
| 162 | + } |
| 163 | + case 'h': { |
| 164 | + PrintUsage(&std::cout); |
| 165 | + return 0; |
| 166 | + } |
| 167 | + default: { |
| 168 | + PrintUsage(&std::cerr); |
| 169 | + return 1; |
| 170 | + } |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + const char* input_file1(NULL); |
| 175 | + const char* input_file2(NULL); |
| 176 | + const int num_input_files(argc - optind); |
| 177 | + if (2 == num_input_files) { |
| 178 | + input_file1 = argv[argc - 2]; |
| 179 | + input_file2 = argv[argc - 1]; |
| 180 | + } else if (1 == num_input_files) { |
| 181 | + input_file1 = argv[argc - 1]; |
| 182 | + input_file2 = NULL; |
| 183 | + } else { |
| 184 | + std::ostringstream error_message; |
| 185 | + error_message << "Just two input files, file1, and infile, are required"; |
| 186 | + sptk::PrintErrorMessage("f0eval", error_message); |
| 187 | + return 1; |
| 188 | + } |
| 189 | + |
| 190 | + if (!sptk::SetBinaryMode()) { |
| 191 | + std::ostringstream error_message; |
| 192 | + error_message << "Cannot set translation mode"; |
| 193 | + sptk::PrintErrorMessage("f0eval", error_message); |
| 194 | + return 1; |
| 195 | + } |
| 196 | + |
| 197 | + std::ifstream ifs1; |
| 198 | + ifs1.open(input_file1, std::ios::in | std::ios::binary); |
| 199 | + if (ifs1.fail()) { |
| 200 | + std::ostringstream error_message; |
| 201 | + error_message << "Cannot open file " << input_file1; |
| 202 | + sptk::PrintErrorMessage("f0eval", error_message); |
| 203 | + return 1; |
| 204 | + } |
| 205 | + std::istream& input_stream1(ifs1); |
| 206 | + |
| 207 | + std::ifstream ifs2; |
| 208 | + if (NULL != input_file2) { |
| 209 | + ifs2.open(input_file2, std::ios::in | std::ios::binary); |
| 210 | + if (ifs2.fail()) { |
| 211 | + std::ostringstream error_message; |
| 212 | + error_message << "Cannot open file " << input_file2; |
| 213 | + sptk::PrintErrorMessage("f0eval", error_message); |
| 214 | + return 1; |
| 215 | + } |
| 216 | + } |
| 217 | + std::istream& input_stream2(ifs2.is_open() ? ifs2 : std::cin); |
| 218 | + |
| 219 | + const double sampling_rate_in_hz(1000.0 * sampling_rate); |
| 220 | + const double unvoiced_value(kLogF0 == input_format ? sptk::kLogZero : 0.0); |
| 221 | + |
| 222 | + double f1, f2; |
| 223 | + double f0_error_value(0.0); |
| 224 | + int num_voiced_frame(0); |
| 225 | + int vuv_error_count(0); |
| 226 | + int num_frame(0); |
| 227 | + |
| 228 | + while (sptk::ReadStream(&f1, &input_stream1) && |
| 229 | + sptk::ReadStream(&f2, &input_stream2)) { |
| 230 | + ++num_frame; |
| 231 | + if (f1 == unvoiced_value && f2 == unvoiced_value) { |
| 232 | + // nothing to do |
| 233 | + } else if (f1 != unvoiced_value && f2 != unvoiced_value) { |
| 234 | + switch (input_format) { |
| 235 | + case kPitch: { |
| 236 | + f1 = std::log2(sampling_rate_in_hz / f1); |
| 237 | + f2 = std::log2(sampling_rate_in_hz / f2); |
| 238 | + break; |
| 239 | + } |
| 240 | + case kF0: { |
| 241 | + f1 = std::log2(f1); |
| 242 | + f2 = std::log2(f2); |
| 243 | + break; |
| 244 | + } |
| 245 | + case kLogF0: { |
| 246 | + f1 *= sptk::kOctave; |
| 247 | + f2 *= sptk::kOctave; |
| 248 | + break; |
| 249 | + } |
| 250 | + default: { |
| 251 | + return 1; |
| 252 | + } |
| 253 | + } |
| 254 | + const double error_in_cent(1200.0 * (f1 - f2)); |
| 255 | + f0_error_value += error_in_cent * error_in_cent; |
| 256 | + ++num_voiced_frame; |
| 257 | + } else { |
| 258 | + ++vuv_error_count; |
| 259 | + } |
| 260 | + } |
| 261 | + |
| 262 | + if (0 < num_frame) { |
| 263 | + if (kF0ErrorAndVuvError == output_format || kF0Error == output_format) { |
| 264 | + if (0 == num_voiced_frame) { |
| 265 | + std::ostringstream error_message; |
| 266 | + error_message << "There are no voiced frames"; |
| 267 | + sptk::PrintErrorMessage("f0eval", error_message); |
| 268 | + return 1; |
| 269 | + } |
| 270 | + const double f0_error(std::sqrt(f0_error_value / num_voiced_frame)); |
| 271 | + if (!sptk::WriteStream(f0_error, &std::cout)) { |
| 272 | + std::ostringstream error_message; |
| 273 | + error_message << "Failed to write F0 RMSE"; |
| 274 | + sptk::PrintErrorMessage("f0eval", error_message); |
| 275 | + return 1; |
| 276 | + } |
| 277 | + } |
| 278 | + if (kF0ErrorAndVuvError == output_format || kVuvError == output_format) { |
| 279 | + const double vuv_error(100.0 * vuv_error_count / num_frame); |
| 280 | + if (!sptk::WriteStream(vuv_error, &std::cout)) { |
| 281 | + std::ostringstream error_message; |
| 282 | + error_message << "Failed to write V/UV error"; |
| 283 | + sptk::PrintErrorMessage("f0eval", error_message); |
| 284 | + return 1; |
| 285 | + } |
| 286 | + } |
| 287 | + } |
| 288 | + |
| 289 | + return 0; |
| 290 | +} |
0 commit comments