1313 * limitations under the License.
1414 */
1515// $Id$
16-
16+ # include < chrono >
1717#include < unordered_map>
1818
19+ #include < Core/IoUtilities.hh>
1920#include < Core/MD5.hh>
21+ #include < Core/StopWatch.hh>
2022#include < Core/Utility.hh>
2123#include < Fsa/AlphabetUtility.hh>
2224#include " Fsa.hh"
@@ -137,6 +139,9 @@ Lexicon::~Lexicon() {
137139}
138140
139141void Lexicon::load (const std::string& filename) {
142+ Core::StopWatch stopwatch;
143+ stopwatch.start ();
144+
140145 Core::MD5 md5;
141146 std::string strippedFilename = Core::FormatSet::stripQualifier (filename);
142147 if (md5.updateFromFile (strippedFilename)) {
@@ -146,10 +151,20 @@ void Lexicon::load(const std::string& filename) {
146151 warning (" Could not derive md5 sum from file '%s'" , strippedFilename.c_str ());
147152 }
148153
149- log (" Reading lexicon from file" ) << " \" " << strippedFilename << " \" ..." ;
154+ stopwatch.stop ();
155+ log (" md5 dependency computed in %.2f seconds" , stopwatch.elapsedSeconds ());
156+
157+ std::string absFilename = Core::realPath (strippedFilename);
158+ log (" reading lexicon from file \" %s\" (%s) ..." , strippedFilename.c_str (), absFilename.c_str ());
159+ stopwatch.reset ();
160+ stopwatch.start ();
161+
150162 if (!formats ().read (filename, *this )) {
151163 error (" Error while reading lexicon file." );
152164 }
165+
166+ stopwatch.stop ();
167+ log (" parsed XML lexicon in %.2f seconds" , stopwatch.elapsedSeconds ());
153168 log (" dependency value: " ) << dependency_.value ();
154169}
155170
@@ -230,9 +245,7 @@ Pronunciation* Lexicon::getOrCreatePronunciation(const std::vector<Phoneme::Id>&
230245 tie (it, isNew) = pronunciationMap_.insert (pron);
231246 if (isNew) {
232247 phon_.start ();
233- for (std::vector<Phoneme::Id>::const_iterator i = phonemes.begin ();
234- i != phonemes.end (); ++i)
235- phon_.grow (*i);
248+ phon_.grow (phonemes.data (), phonemes.data () + phonemes.size ());
236249 pron->phonemes_ = phon_.currentBegin ();
237250 verify (phon_.currentEnd ()[-1 ] == Phoneme::term);
238251 phon_.finish ();
@@ -245,7 +258,7 @@ Pronunciation* Lexicon::getOrCreatePronunciation(const std::vector<Phoneme::Id>&
245258 return pron;
246259}
247260
248- void Lexicon::parsePronunciation (const std::string& phonStr, std::vector<Phoneme::Id>& phonemes) const {
261+ Core::Status Lexicon::parsePronunciation (const std::string& phonStr, std::vector<Phoneme::Id>& phonemes) const {
249262 require (phonemeInventory ());
250263 const Phoneme* phoneme;
251264 std::string::size_type i, j;
@@ -258,20 +271,27 @@ void Lexicon::parsePronunciation(const std::string& phonStr, std::vector<Phoneme
258271 phonemes.push_back (phoneme->id ());
259272 }
260273 else {
261- error ( " ignoring unknown phoneme \" %s \" " ,
262- phonStr. substr (i, j - i). c_str () );
274+ std::string errorMessage = std::string ( " Unknown phoneme: \" " ) + phonStr. substr (i, j - i) + " \" " ;
275+ return Core::Status (Core::StatusCode::InvalidArgument, errorMessage );
263276 }
264277 i = phonStr.find_first_not_of (utf8::whitespace, j);
265278 }
279+ return Core::Status ();
266280}
267281
268- Pronunciation* Lexicon::getPronunciation (const std::string& phon) {
282+ Core::Status Lexicon::getPronunciation (const std::string& phon, Pronunciation*& out ) {
269283 require (phonemeInventory ());
284+
270285 std::vector<Phoneme::Id> phonemes;
271- parsePronunciation (phon, phonemes);
286+ Core::Status status = parsePronunciation (phon, phonemes);
287+ if (!status.ok ()) {
288+ out = nullptr ;
289+ return status;
290+ }
291+
272292 phonemes.push_back (Phoneme::term);
273- Pronunciation* pron = getOrCreatePronunciation (phonemes);
274- return pron ;
293+ out = getOrCreatePronunciation (phonemes);
294+ return status ;
275295}
276296
277297void Lexicon::addPronunciation (Lemma* lemma, Pronunciation* pron, f32 weight) {
0 commit comments