@@ -257,7 +257,7 @@ void LexiconElement::addPhon(const WeightedPhonemeString& phon) {
257257 return ;
258258 if (!product_->phonemeInventory ()) {
259259 parser ()->warning (
260- " No phoneme inventory defined. Ingnoring pronunciation" );
260+ " No phoneme inventory defined. Ignoring pronunciation" );
261261 return ;
262262 }
263263
@@ -358,7 +358,7 @@ const Core::ParameterString paramEncoding(
358358 " utf-8" );
359359} // namespace
360360
361- void LexiconParser ::loadWhitelist (const Core::Configuration& config, Core::StringHashSet& whitelist) {
361+ void XmlLexiconParser ::loadWhitelist (const Core::Configuration& config, Core::StringHashSet& whitelist) {
362362 std::string filename = paramFile (config);
363363 if (!filename.empty ()) {
364364 Core::CompressedInputStream* cis = new Core::CompressedInputStream (filename.c_str ());
@@ -379,12 +379,88 @@ void LexiconParser::loadWhitelist(const Core::Configuration& config, Core::Strin
379379 }
380380}
381381
382- LexiconParser::LexiconParser (const Core::Configuration& c, Lexicon* _lexicon)
383- : Precursor(c) {
382+ XmlLexiconParser::XmlLexiconParser (const Core::Configuration& c, Lexicon* _lexicon)
383+ : LexiconParser(),
384+ XmlSchemaParser(c) {
384385 lexicon_ = _lexicon;
385386
386387 // build schema
387388 LexiconElement* lexElement = new LexiconElement (this , LexiconElement::creationHandler (&Self::pseudoCreateLexicon), c);
388389 loadWhitelist (select (" vocab" ), lexElement->whitelist_ );
389390 setRoot (collect (lexElement));
390391}
392+
393+ // use base class parse function
394+ bool XmlLexiconParser::parseFile (const std::string& filename) {
395+ return parser ()->Core ::XmlSchemaParser::parseFile (filename.c_str ()) == 0 ;
396+ }
397+
398+ VocabTextLexiconParser::VocabTextLexiconParser (Lexicon* _lexicon)
399+ : LexiconParser(),
400+ lexicon_(_lexicon) {
401+ phonemeInventory_ = Core::Ref (new PhonemeInventory ());
402+ }
403+
404+ // parse txt file line by line to a Bliss::Lexicon
405+ // in the first step, the phonemes are created and the phoneme inventory is set
406+ // and afterwards the lemmata can be created from these phonemes
407+ bool VocabTextLexiconParser::parseFile (const std::string& filename) {
408+ // collect all labels from the file and add them as phonemes to the phoneme inventory
409+ std::ifstream file (filename);
410+ if (!file.is_open ()) {
411+ return false ;
412+ }
413+ std::string line;
414+ while (std::getline (file, line)) {
415+ if (line.empty ())
416+ continue ;
417+ createPhoneme (line);
418+ }
419+
420+ // set the phoneme inventory
421+ lexicon_->setPhonemeInventory (phonemeInventory_);
422+ // iterate over the phonemes in the inventory to create the lemmata in the lexicon
423+ createLemmata ();
424+ return true ;
425+ }
426+
427+ // helper function to handle one label and create a corresponding phoneme
428+ void VocabTextLexiconParser::createPhoneme (const std::string& line) {
429+ std::string symbol (line);
430+ stripWhitespace (symbol); // in case there are any unintentional whitespaces
431+ suppressTrailingBlank (symbol);
432+
433+ // check if phoneme was already added (if one label appears more than once)
434+ if (phonemeInventory_->phoneme (symbol)) {
435+ Core::Application::us ()->error (" Phoneme \" %s\" was already added to the inventory. It may be duplicated in the lexicon." , symbol.c_str ());
436+ }
437+
438+ // create a new phoneme
439+ Phoneme* newPhoneme_ = phonemeInventory_->newPhoneme ();
440+ // set symbol
441+ phonemeInventory_->assignSymbol (newPhoneme_, symbol);
442+ // set variation to none
443+ newPhoneme_->setContextDependent (false );
444+ }
445+
446+ // helper function to create the lemmata
447+ void VocabTextLexiconParser::createLemmata () {
448+ // iterate over the phonemes which were assigned to the inventory previously
449+ auto phonemes = phonemeInventory_->phonemes ();
450+ for (auto it = phonemes.first ; it != phonemes.second ; ++it) {
451+ const Phoneme* phoneme = *it;
452+ std::string symbol = phoneme->symbol ();
453+
454+ // make sure that lemma has not been added yet
455+ verify (!lexicon_->lemma (symbol));
456+
457+ // create a new lemma
458+ Lemma* newLemma_ = lexicon_->newLemma ();
459+ // set orth
460+ lexicon_->setOrthographicForms (newLemma_, {symbol});
461+ // set phon
462+ Pronunciation* pron = lexicon_->getPronunciation (symbol);
463+ lexicon_->addPronunciation (newLemma_, pron);
464+ lexicon_->setDefaultLemmaName (newLemma_);
465+ }
466+ }
0 commit comments