Skip to content

Commit f819b2f

Browse files
authored
Update Bliss::Lexicon & some related classes to return Core::Status (#121)
1 parent 0e159e5 commit f819b2f

File tree

12 files changed

+262
-45
lines changed

12 files changed

+262
-45
lines changed

Modules.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ endif
148148
# ****** Libraries ******
149149
LIBS_SEARCH = src/Search/libSprintSearch.$(a)
150150
LIBS_SEARCH += src/Search/LexiconfreeTimesyncBeamSearch/libSprintLexiconfreeTimesyncBeamSearch.$(a)
151+
LIBS_SEARCH += src/Search/TreeTimesyncBeamSearch/libSprintTreeTimesyncBeamSearch.$(a)
151152
ifdef MODULE_SEARCH_WFST
152153
LIBS_SEARCH += src/Search/Wfst/libSprintSearchWfst.$(a)
153154
LIBS_SEARCH += src/OpenFst/libSprintOpenFst.$(a)

src/Bliss/Lexicon.cc

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
* limitations under the License.
1414
*/
1515
// $Id$
16-
16+
#include <chrono>
1717
#include <unordered_map>
1818

19+
#include <Core/IoUtilities.hh>
1920
#include <Core/MD5.hh>
21+
#include <Core/StopWatch.hh>
2022
#include <Core/Utility.hh>
2123
#include <Fsa/AlphabetUtility.hh>
2224
#include "Fsa.hh"
@@ -137,6 +139,9 @@ Lexicon::~Lexicon() {
137139
}
138140

139141
void Lexicon::load(const std::string& filename) {
142+
Core::StopWatch stopwatch;
143+
stopwatch.start();
144+
140145
Core::MD5 md5;
141146
std::string strippedFilename = Core::FormatSet::stripQualifier(filename);
142147
if (md5.updateFromFile(strippedFilename)) {
@@ -146,10 +151,20 @@ void Lexicon::load(const std::string& filename) {
146151
warning("Could not derive md5 sum from file '%s'", strippedFilename.c_str());
147152
}
148153

149-
log("Reading lexicon from file") << " \"" << strippedFilename << "\" ...";
154+
stopwatch.stop();
155+
log("md5 dependency computed in %.2f seconds", stopwatch.elapsedSeconds());
156+
157+
std::string absFilename = Core::realPath(strippedFilename);
158+
log("reading lexicon from file \"%s\" (%s) ...", strippedFilename.c_str(), absFilename.c_str());
159+
stopwatch.reset();
160+
stopwatch.start();
161+
150162
if (!formats().read(filename, *this)) {
151163
error("Error while reading lexicon file.");
152164
}
165+
166+
stopwatch.stop();
167+
log("parsed XML lexicon in %.2f seconds", stopwatch.elapsedSeconds());
153168
log("dependency value: ") << dependency_.value();
154169
}
155170

@@ -230,9 +245,7 @@ Pronunciation* Lexicon::getOrCreatePronunciation(const std::vector<Phoneme::Id>&
230245
tie(it, isNew) = pronunciationMap_.insert(pron);
231246
if (isNew) {
232247
phon_.start();
233-
for (std::vector<Phoneme::Id>::const_iterator i = phonemes.begin();
234-
i != phonemes.end(); ++i)
235-
phon_.grow(*i);
248+
phon_.grow(phonemes.data(), phonemes.data() + phonemes.size());
236249
pron->phonemes_ = phon_.currentBegin();
237250
verify(phon_.currentEnd()[-1] == Phoneme::term);
238251
phon_.finish();
@@ -245,7 +258,7 @@ Pronunciation* Lexicon::getOrCreatePronunciation(const std::vector<Phoneme::Id>&
245258
return pron;
246259
}
247260

248-
void Lexicon::parsePronunciation(const std::string& phonStr, std::vector<Phoneme::Id>& phonemes) const {
261+
Core::Status Lexicon::parsePronunciation(const std::string& phonStr, std::vector<Phoneme::Id>& phonemes) const {
249262
require(phonemeInventory());
250263
const Phoneme* phoneme;
251264
std::string::size_type i, j;
@@ -258,20 +271,27 @@ void Lexicon::parsePronunciation(const std::string& phonStr, std::vector<Phoneme
258271
phonemes.push_back(phoneme->id());
259272
}
260273
else {
261-
error("ignoring unknown phoneme \"%s\"",
262-
phonStr.substr(i, j - i).c_str());
274+
std::string errorMessage = std::string("Unknown phoneme: \"") + phonStr.substr(i, j - i) + "\"";
275+
return Core::Status(Core::StatusCode::InvalidArgument, errorMessage);
263276
}
264277
i = phonStr.find_first_not_of(utf8::whitespace, j);
265278
}
279+
return Core::Status();
266280
}
267281

268-
Pronunciation* Lexicon::getPronunciation(const std::string& phon) {
282+
Core::Status Lexicon::getPronunciation(const std::string& phon, Pronunciation*& out) {
269283
require(phonemeInventory());
284+
270285
std::vector<Phoneme::Id> phonemes;
271-
parsePronunciation(phon, phonemes);
286+
Core::Status status = parsePronunciation(phon, phonemes);
287+
if (!status.ok()) {
288+
out = nullptr;
289+
return status;
290+
}
291+
272292
phonemes.push_back(Phoneme::term);
273-
Pronunciation* pron = getOrCreatePronunciation(phonemes);
274-
return pron;
293+
out = getOrCreatePronunciation(phonemes);
294+
return status;
275295
}
276296

277297
void Lexicon::addPronunciation(Lemma* lemma, Pronunciation* pron, f32 weight) {

src/Bliss/Lexicon.hh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@
2929
#include <Core/Obstack.hh>
3030
#include <Core/Parameter.hh>
3131
#include <Core/ReferenceCounting.hh>
32+
#include <Core/Status.hh>
3233
#include <Core/StringUtilities.hh>
3334
#include <Core/Types.hh>
35+
3436
#include "Phoneme.hh"
3537
#include "Symbol.hh"
3638

@@ -541,7 +543,7 @@ protected:
541543
EvaluationToken* getOrCreateEvaluationToken(Symbol);
542544

543545
/** Convert phonemic string to sequence of phoneme ids */
544-
void parsePronunciation(const std::string&, std::vector<Phoneme::Id>&) const;
546+
Core::Status parsePronunciation(const std::string&, std::vector<Phoneme::Id>&) const;
545547

546548
struct Internal;
547549
Internal* internal_;
@@ -574,7 +576,7 @@ public:
574576
* @param phon a string containing a white-space separate list
575577
* of phoneme symbols.
576578
*/
577-
Pronunciation* getPronunciation(const std::string& phon);
579+
Core::Status getPronunciation(const std::string& phon, Pronunciation*& out);
578580

579581
/**
580582
* Add a pronunciation to a lemma.

src/Bliss/LexiconParser.cc

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,13 @@ void LexiconElement::addPhon(const WeightedPhonemeString& phon) {
261261
return;
262262
}
263263

264-
Pronunciation* pron = product_->getPronunciation(phon.phon);
265-
if (lemma_->hasPronunciation(pron)) {
264+
Pronunciation* pron = nullptr;
265+
Core::Status status = product_->getPronunciation(phon.phon, pron);
266+
if (pron == nullptr) {
267+
parser()->error("Could not create pronunciation for %s: %s", phon.phon.c_str(), status.message().c_str());
268+
return;
269+
}
270+
else if (lemma_->hasPronunciation(pron)) {
266271
parser()->error("duplicate pronunciation");
267272
return;
268273
}
@@ -458,8 +463,14 @@ void VocabTextLexiconParser::createLemmata() {
458463
Lemma* newLemma_ = lexicon_->newLemma();
459464
// set orth
460465
lexicon_->setOrthographicForms(newLemma_, {symbol});
466+
461467
// set phon
462-
Pronunciation* pron = lexicon_->getPronunciation(symbol);
468+
Pronunciation* pron = nullptr;
469+
Core::Status status = lexicon_->getPronunciation(symbol, pron);
470+
if (pron == nullptr) {
471+
Core::Application::us()->error("Could not create pronunciation for %s: %s", symbol.c_str(), status.message().c_str());
472+
return;
473+
}
463474
lexicon_->addPronunciation(newLemma_, pron);
464475
lexicon_->setDefaultLemmaName(newLemma_);
465476
}

src/Core/Component.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,10 @@ bool Component::hasFatalErrors() const {
171171
return false;
172172
}
173173

174+
bool Component::hasErrors() const {
175+
return errorCounts_[ErrorTypeError] > 0 or errorCounts_[ErrorTypeCriticalError] > 0;
176+
}
177+
174178
void Component::respondToDelayedErrors() const {
175179
if (hasFatalErrors())
176180
exit();

src/Core/Component.hh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,11 @@ public:
309309
*/
310310
bool hasFatalErrors() const;
311311

312+
/**
313+
* Test whether any errors have occured (ignored or not).
314+
*/
315+
bool hasErrors() const;
316+
312317
/**
313318
* Terminate execution iff an error has occured previously.
314319
* More specifically, terminate iff hasFatalErrors() returns

src/Core/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ LIBSPRINTCORE_O = $(OBJDIR)/Application.o \
4343
$(OBJDIR)/ReferenceCounting.o \
4444
$(OBJDIR)/ResourceUsageInfo.o \
4545
$(OBJDIR)/Statistics.o \
46+
$(OBJDIR)/Status.o \
4647
$(OBJDIR)/StopWatch.o \
4748
$(OBJDIR)/StringExpression.o \
4849
$(OBJDIR)/StringUtilities.o \

src/Core/Status.cc

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/** Copyright 2025 RWTH Aachen University. All rights reserved.
2+
*
3+
* Licensed under the RWTH ASR License (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.hltpr.rwth-aachen.de/rwth-asr/rwth-asr-license.html
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
#include "Status.hh"
16+
17+
namespace Core {
18+
std::string statusCodeToString(StatusCode code) {
19+
switch (code) {
20+
case StatusCode::Ok:
21+
return "OK";
22+
case StatusCode::Cancelled:
23+
return "CANCELLED";
24+
case StatusCode::Unknown:
25+
return "UNKNOWN";
26+
case StatusCode::InvalidArgument:
27+
return "INVALID_ARGUMENT";
28+
case StatusCode::DeadlineExceeded:
29+
return "DEADLINE_EXCEEDED";
30+
case StatusCode::NotFound:
31+
return "NOT_FOUND";
32+
case StatusCode::AlreadyExists:
33+
return "ALREADY_EXISTS";
34+
case StatusCode::PermissionDenied:
35+
return "PERMISSION_DENIED";
36+
case StatusCode::ResourceExhausted:
37+
return "RESOURCE_EXHAUSTED";
38+
case StatusCode::FailedPrecondition:
39+
return "FAILED_PRECONDITION";
40+
case StatusCode::Aborted:
41+
return "ABORTED";
42+
case StatusCode::OutOfRange:
43+
return "OUT_OF_RANGE";
44+
case StatusCode::Unimplemented:
45+
return "UNIMPLEMENTED";
46+
case StatusCode::Internal:
47+
return "INTERNAL";
48+
case StatusCode::Unavailable:
49+
return "UNAVAILABLE";
50+
case StatusCode::DataLoss:
51+
return "DATA_LOSS";
52+
case StatusCode::Unauthenticated:
53+
return "UNAUTHENTICATED";
54+
}
55+
return "UNKNOWN_ERROR_TYPE";
56+
}
57+
58+
Status::Status()
59+
: code_(StatusCode::Ok),
60+
msg_(""){};
61+
62+
Status::Status(StatusCode code, std::string msg)
63+
: code_(code),
64+
msg_("") {
65+
if (!ok()) {
66+
msg_ = msg;
67+
}
68+
}
69+
70+
std::string Status::message() const {
71+
std::stringstream msg(statusCodeToString(code_));
72+
if (!msg_.empty()) {
73+
msg << ": " << msg_;
74+
}
75+
return msg.str();
76+
}
77+
78+
void Status::update(StatusCode code) {
79+
update(code, "");
80+
}
81+
82+
void Status::update(StatusCode code, std::string msg) {
83+
if (ok()) {
84+
code_ = code;
85+
msg_ = msg;
86+
}
87+
}
88+
89+
void Status::update(Status const& status) {
90+
update(status.code(), status.message());
91+
}
92+
93+
} // namespace Core

src/Core/Status.hh

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/** Copyright 2025 RWTH Aachen University. All rights reserved.
2+
*
3+
* Licensed under the RWTH ASR License (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.hltpr.rwth-aachen.de/rwth-asr/rwth-asr-license.html
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
#ifndef _CORE_STATUS_HH
16+
#define _CORE_STATUS_HH
17+
18+
#include <sstream>
19+
20+
namespace Core {
21+
22+
enum class StatusCode : int {
23+
Ok = 0,
24+
Cancelled = 1,
25+
Unknown = 2,
26+
InvalidArgument = 3,
27+
DeadlineExceeded = 4,
28+
NotFound = 5,
29+
AlreadyExists = 6,
30+
PermissionDenied = 7,
31+
ResourceExhausted = 8,
32+
FailedPrecondition = 9,
33+
Aborted = 10,
34+
OutOfRange = 11,
35+
Unimplemented = 12,
36+
Internal = 13,
37+
Unavailable = 14,
38+
DataLoss = 15,
39+
Unauthenticated = 16,
40+
// RASR internal codes
41+
InvalidFileFormat = 100,
42+
};
43+
44+
std::string statusCodeToString(StatusCode code);
45+
46+
class [[nodiscard]] Status final {
47+
public:
48+
// This default constructor creates an OK status
49+
Status();
50+
51+
// This constructor sets error message if not OK Status
52+
Status(StatusCode code, std::string msg);
53+
54+
bool ok() const;
55+
StatusCode code() const;
56+
std::string message() const;
57+
58+
void update(StatusCode code);
59+
void update(StatusCode code, std::string msg);
60+
void update(Status const& status);
61+
62+
private:
63+
StatusCode code_;
64+
std::string msg_;
65+
};
66+
67+
// inline implementations
68+
69+
inline bool Status::ok() const {
70+
return code_ == StatusCode::Ok;
71+
}
72+
73+
inline StatusCode Status::code() const {
74+
return code_;
75+
}
76+
77+
} // namespace Core
78+
79+
#endif // _CORE_STATUS_HH

0 commit comments

Comments
 (0)