diff --git a/inflection/resources/org/unicode/inflection/dictionary/.gitattributes b/inflection/resources/org/unicode/inflection/dictionary/.gitattributes index bff5d4bc..141b7cd6 100644 --- a/inflection/resources/org/unicode/inflection/dictionary/.gitattributes +++ b/inflection/resources/org/unicode/inflection/dictionary/.gitattributes @@ -1,4 +1,6 @@ dictionary_da.lst filter=lfs diff=lfs merge=lfs -text dictionary_en.lst filter=lfs diff=lfs merge=lfs -text +dictionary_es.lst filter=lfs diff=lfs merge=lfs -text inflectional_da.xml filter=lfs diff=lfs merge=lfs -text inflectional_en.xml filter=lfs diff=lfs merge=lfs -text +inflectional_es.xml filter=lfs diff=lfs merge=lfs -text diff --git a/inflection/resources/org/unicode/inflection/dictionary/dictionary_es.lst b/inflection/resources/org/unicode/inflection/dictionary/dictionary_es.lst index f2cca817..a4f94f0f 100644 --- a/inflection/resources/org/unicode/inflection/dictionary/dictionary_es.lst +++ b/inflection/resources/org/unicode/inflection/dictionary/dictionary_es.lst @@ -1,152 +1,3 @@ -Avión: singular masculine feminine noun proper-noun inflection=79 -Aviones: plural masculine feminine noun proper-noun inflection=79 -Bienvenida: singular feminine noun proper-noun inflection=6 -Bienvenido: singular masculine noun proper-noun inflection=c -Bienvenidos: plural masculine noun proper-noun inflection=c -Luces: plural feminine noun proper-noun inflection=179 -Luz: singular feminine noun proper-noun inflection=179 -Mac: singular masculine feminine appleproduct abbreviation noun proper-noun inflection=5c -Madrid: singular masculine feminine noun proper-noun inflection=12 -Real: singular masculine noun proper-noun inflection=13 -Victoria: singular feminine noun proper-noun inflection=6 -a: adposition -abracadabra: singular masculine noun inflection=1 -acciones: singular plural feminine subjunctive second present noun verb inflection=5 -actrices: plural feminine noun inflection=2c2 -actriz: singular feminine noun inflection=2c2 -aérea: singular feminine adjective inflection=3 -agua: singular stressed feminine indicative imperative second third present noun verb inflection=2 -aguas: singular plural feminine indicative second present noun verb inflection=2 -águila: singular stressed masculine feminine noun inflection=956 -al: determiner adposition inflection=117 -alarma: singular feminine indicative imperative second third present noun verb inflection=2 -álgebra: singular stressed feminine noun inflection=2 -amiga: singular feminine indicative imperative second third present adjective noun verb inflection=a -análisis: singular plural masculine noun inflection=14 -añeja: singular feminine indicative imperative second third present adjective verb inflection=3 -anís: singular masculine noun inflection=de -app: singular stressed feminine noun inflection=2 -apps: plural feminine noun inflection=2 -área: singular stressed feminine noun inflection=2 -áreas: plural feminine noun inflection=2 -avión: singular masculine noun inflection=e -aviones: plural masculine noun inflection=e -barco: singular masculine noun inflection=1 -barcos: plural masculine noun inflection=1 -bienvenidas: plural feminine adjective noun inflection=3 -bienvenida: singular feminine adjective noun inflection=3 -bienvenido: singular masculine adjective inflection=3 -bienvenidos: plural masculine adjective inflection=3 -café: singular masculine feminine adjective noun inflection=1 -cambio: singular masculine indicative first present noun verb inflection=1 -cambios: plural masculine noun inflection=1 -camión: singular masculine noun inflection=e -capataz: singular masculine feminine noun inflection=1c0 -clima: singular masculine noun inflection=1 -clubes: plural masculine noun inflection=72 -compás: singular masculine noun inflection=90 -conversación: singular feminine noun inflection=5 -conversaciones: plural feminine noun inflection=5 -costumbre: singular feminine noun inflection=2 -crisis: singular plural feminine noun inflection=19 -cuarto: singular masculine indicative first present determiner noun pronoun verb inflection=1 -de: singular imperative subjunctive first second third present adposition verb -déficits: plural masculine noun inflection=1 -del: determiner adposition inflection=117 -delegada: singular feminine participle adjective noun verb inflection=3 -delegado: singular masculine participle adjective noun verb inflection=3 -el: singular definite masculine determiner pronoun inflection=33b -elección: singular feminine noun inflection=5 -elecciones: plural feminine noun inflection=5 -en: adposition -encendido: singular masculine past participle adjective verb inflection=3 -encendidos: plural masculine adjective inflection=3 -es: singular indicative second third present verb inflection=8da -esta: singular demonstrative feminine determiner noun pronoun inflection=81 -está: singular indicative imperative second third present verb inflection=6d0 -estaría: singular conditional first third past verb inflection=6d0 -están: plural indicative third present verb inflection=6d0 -estas: plural demonstrative feminine determiner noun pronoun inflection=81 -este: singular demonstrative masculine determiner noun pronoun inflection=1 -farola: singular feminine noun inflection=2 -fax: singular masculine noun inflection=7 -faxes: plural masculine noun inflection=7 -finanzas: plural feminine noun inflection=2 -frac: singular masculine noun inflection=1 -fracs: plural masculine noun inflection=1 -fraque: singular masculine noun inflection=1 -fraques: plural masculine noun inflection=1 -fría: singular feminine imperative subjunctive first second third present adjective verb inflection=271 -gata: singular feminine adjective noun inflection=3 -gatas: plural feminine adjective noun inflection=3 -gato: singular masculine adjective noun inflection=3 -gatos: plural masculine adjective noun inflection=3 -habitación: singular feminine noun inflection=5 -hábitat: singular masculine noun inflection=1 -harina: singular feminine noun inflection=2 -histórico: singular masculine adjective inflection=3 -históricos: plural masculine adjective inflection=3 -hotel: singular masculine noun inflection=7 -hoteles: plural masculine noun inflection=7 -humedad: singular feminine noun inflection=d -incendios: plural masculine noun inflection=1 -ítems: plural masculine noun inflection=1 -la: singular definite feminine determiner pronoun inflection=33b -lápices: plural masculine noun inflection=3f -lápiz: singular masculine noun inflection=3f -las: plural definite feminine determiner pronoun inflection=33b -lemming: singular masculine feminine noun inflection=1 -lemmings: plural masculine feminine noun inflection=1 -línea: singular feminine noun inflection=2 -llaves: plural feminine noun inflection=2 -los: plural definite masculine determiner pronoun inflection=117 -luces: singular plural feminine indicative second present noun verb inflection=23 -lunes: singular plural masculine noun inflection=14 -luz: singular feminine noun inflection=23 -móvil: singular masculine feminine adjective noun inflection=b -nariz: singular feminine noun inflection=23 -pan: singular masculine noun inflection=7 -panes: plural masculine noun inflection=7 -pantalla: singular feminine noun inflection=38 -pantallas: plural feminine noun inflection=38 -pantallita: singular feminine diminutive noun inflection=38 -pantallitas: plural feminine diminutive noun inflection=38 -papás: plural masculine noun inflection=1 -papel: singular masculine noun inflection=7 -para: singular indicative imperative subjunctive first second third present adposition verb inflection=7a3 -paracaídas: singular plural masculine noun inflection=14 -paraguas: singular plural masculine noun inflection=14 -planeta: singular masculine feminine noun inflection=1 -porcentaje: singular masculine noun inflection=1 -porcentual: singular masculine feminine adjective inflection=b -porcentuales: plural masculine feminine adjective inflection=b -precio: singular masculine indicative first present noun verb inflection=1 -precios: plural masculine noun inflection=1 -real: singular masculine feminine adjective noun inflection=7 -registrada: singular feminine participle adjective verb inflection=17 -registradísima: singular superlative feminine adjective inflection=17 -registradísimo: singular superlative masculine adjective inflection=17 -registrado: singular masculine participle adjective verb inflection=17 -sensor: singular masculine noun inflection=7 -sensores: plural masculine noun inflection=7 -superficie: singular feminine noun inflection=2 -televisión: singular feminine noun inflection=5 -temperatura: singular feminine noun inflection=2 -tiempo: singular masculine noun inflection=1 -toquera: singular feminine noun inflection=a -toqueras: plural feminine noun inflection=a -toquero: singular masculine noun inflection=a -toqueros: plural masculine noun inflection=a -un: singular plural indefinite short-form masculine feminine determiner number inflection=3c4 -una: singular indefinite feminine imperative subjunctive first second third present determiner noun number pronoun verb inflection=27 -unas: singular plural indefinite feminine subjunctive second present determiner pronoun verb inflection=3c4 -universidad: singular feminine noun inflection=d -uno: singular masculine indicative first present noun number pronoun verb inflection=1 -usuaria: singular feminine adjective noun inflection=2 inflection=3 inflection=a -usuarias: plural feminine adjective noun inflection=2 inflection=3 inflection=a -usuario: singular masculine adjective noun inflection=3 inflection=a -usuarios: plural masculine adjective noun inflection=3 inflection=a -victoria: singular feminine noun inflection=2 -============================================== -Manually curated for tests to pass -Copyright 2024-2024 Apple Inc. All rights reserved. +version https://git-lfs.github.com/spec/v1 +oid sha256:f453b219310e220251f795c0440227e2cd6a9f048068c215292b83b196b51d78 +size 37322867 diff --git a/inflection/resources/org/unicode/inflection/dictionary/inflectional_es.xml b/inflection/resources/org/unicode/inflection/dictionary/inflectional_es.xml index 1139e7f0..2339c5aa 100644 --- a/inflection/resources/org/unicode/inflection/dictionary/inflectional_es.xml +++ b/inflection/resources/org/unicode/inflection/dictionary/inflectional_es.xml @@ -1,516 +1,3 @@ - - - - - noun - - - - s - - - - noun - - - - s - - - - adjective - o - - o - a - os - as - - - - noun - ón - - ón - ones - - - - noun - proper-noun - - - - s - - - - noun - - - - es - - - - noun - o - - o - a - os - as - - - - adjective - - - - - es - es - - - - noun - proper-noun - - - - s - - - - noun - - - - es - - - - noun - ón - - ón - ones - - - - noun - proper-noun - - - - - - - noun - proper-noun - - - - - - - noun - - - - - - - - adjective - o - - ísimo - ísima - ísimos - ísimas - o - a - os - as - - - - noun - - - - - - - - noun - z - - z - ces - - - - noun - - - - - - - noun - a - - ita - itas - a - as - - - - noun - z - - z - ces - - - - noun - proper-noun - - - - s - - - - noun - - - - s - es - - - - noun - proper-noun - ón - - ón - ón - ones - ones - - - - noun - e - - e - a - es - as - - - - noun - ás - - ás - ases - - - - noun - ís - - ís - ises - - - - determiner - - - noun - proper-noun - z - - z - ces - - - - noun - z - - z - z - ces - ces - za - zas - - - - adjective - o - - simo - sima - simos - simas - o - a - os - as - - - - noun - or - - or - riz - ores - rices - - - - determiner - el - - el - la - lo - los - las - - - - determiner - - - - a - os - as - - - - verb - ar - - aría - arías - aría - aba - uve - oy - aré - abas - uviste - ás - arás - aba - uvo - á - ará - uviera - uviese - é - uviere - uvieras - uvieses - és - uvieres - uviera - uviese - é - uviere - aríamos - arían - uvimos - ábamos - amos - aremos - aban - uvieron - án - arán - uviéramos - uviésemos - emos - uviéremos - uvieran - uvieseis - én - uvieren - á - é - emos - én - ado - ar - ando - - - - verb - rar - - raría - rarías - raría - raba - - ro - raré - rabas - raste - ras - rarás - raba - - ra - rara - rase - re - rare - raras - rases - res - rares - rara - rase - re - rare - raríamos - rarían - ramos - rábamos - ramos - raremos - raban - raron - ran - ráramos - rásemos - remos - ráremos - raran - rasen - ren - raren - ra - re - remos - ren - rado - rar - rando - rará - rarán - - - - verb - ser - - sería - sería - serían - sería - era - fui - soy - seré - era - erais - eran - eras - fue - fueron - es - son - será - serán - era - es - será - fuera - fuese - sea - fuere - fuera - fueran - fuese - fuesen - sea - sean - fuere - fueren - fuera - fuese - sea - fuere - seríamos - serían - fuimos - éramos - somos - seremos - eran - fueron - son - serán - fuéramos - fuésemos - seamos - fuéremos - fueran - fuesen - sean - fueren - sea - - seamos - sean - seríais - serías - fuiste - fuisteis - eres - sois - serás - seréis - fue - fuerais - fueras - fueseis - fueses - seas - seáis - fuereis - fueres - sido - ser - siendo - - - - noun - águila - - aguilita - aguilitas - águila - águilas - - - +version https://git-lfs.github.com/spec/v1 +oid sha256:81b3cde1c7e11d50a8885286256e814f25c7c708d112aa92219701b13f3501ed +size 4324391 diff --git a/inflection/src/inflection/dialog/DictionaryLookupFunction.cpp b/inflection/src/inflection/dialog/DictionaryLookupFunction.cpp index e4683caa..0fd2b6d0 100644 --- a/inflection/src/inflection/dialog/DictionaryLookupFunction.cpp +++ b/inflection/src/inflection/dialog/DictionaryLookupFunction.cpp @@ -49,7 +49,7 @@ ::std::u16string DictionaryLookupFunction::determine(const ::std::u16string& wor { int64_t properties = 0; getDictionary().getCombinedBinaryType(&properties, word); - if (enableDisambiguation && countMaskedEnabledBits(properties) != 1) { + if (enableDisambiguation && std::popcount(static_cast(properties)) != 1) { // OK so now it's either ambiguous or not in the dictionary. return determineWithDisambiguation(word); } @@ -60,7 +60,7 @@ ::std::u16string DictionaryLookupFunction::determinePhrase(const ::std::u16strin { int64_t properties = 0; getDictionary().getCombinedBinaryType(&properties, word); - if (countMaskedEnabledBits(properties) != 1) { + if (std::popcount(static_cast(properties)) != 1) { // OK so now it's either ambiguous or not in the dictionary. ::std::u16string result; if (properties == 0) { @@ -85,7 +85,10 @@ ::std::u16string DictionaryLookupFunction::determinePhrase(const ::std::u16strin } // else it's an unknown single word or not a word. } - // else it's a known single word. We're not going to guess it. + else { + // It's a known single word. Try to disambiuate it. + result = determineWithDisambiguation(word); + } return result; } @@ -136,7 +139,7 @@ ::std::u16string DictionaryLookupFunction::determineWithDisambiguation(std::u16s if (propertySets.empty()) { return {}; } - ::std::vector disambiguatedPropertySets(disambiguationPartsOfSpeech.size()+1, 0); + ::std::vector disambiguatedPropertySets(disambiguationPartsOfSpeech.size() + 1, 0); for (const auto properties : propertySets) { int64_t i = 0; for (const auto partOfSpeech : disambiguationPartsOfSpeech) { @@ -146,7 +149,7 @@ ::std::u16string DictionaryLookupFunction::determineWithDisambiguation(std::u16s } i += 1; } - if (i == ((int64_t) disambiguationPartsOfSpeech.size())) { + if (i == static_cast(disambiguationPartsOfSpeech.size())) { disambiguatedPropertySets[i] |= properties; } } @@ -158,17 +161,6 @@ ::std::u16string DictionaryLookupFunction::determineWithDisambiguation(std::u16s return {}; } -int8_t DictionaryLookupFunction::countMaskedEnabledBits(int64_t bitField) const -{ - int8_t result = 0; - uint64_t uBitField = (uint64_t)(bitField & mask); - while (uBitField != 0) { - uBitField &= uBitField - 1; // e.g. 1100 & 1011 --> 1000 - result++; - } - return result; -} - ::std::u16string DictionaryLookupFunction::getFirstWord(const ::std::u16string& word) const { std::unique_ptr tokenChain(npc(tokenizer->createTokenChain(word))); diff --git a/inflection/src/inflection/dialog/DictionaryLookupFunction.hpp b/inflection/src/inflection/dialog/DictionaryLookupFunction.hpp index 92bc9bb3..1613812a 100644 --- a/inflection/src/inflection/dialog/DictionaryLookupFunction.hpp +++ b/inflection/src/inflection/dialog/DictionaryLookupFunction.hpp @@ -37,7 +37,6 @@ class inflection::dialog::DictionaryLookupFunction private: ::std::u16string determineWithDisambiguation(std::u16string_view word) const; - int8_t countMaskedEnabledBits(int64_t bitField) const; public: ::std::u16string getFirstWord(const ::std::u16string& word) const; diff --git a/inflection/src/inflection/grammar/synthesis/EsGrammarSynthesizer_CountGenderLookupFunction.cpp b/inflection/src/inflection/grammar/synthesis/EsGrammarSynthesizer_CountGenderLookupFunction.cpp index c14c1976..0c44bcd1 100644 --- a/inflection/src/inflection/grammar/synthesis/EsGrammarSynthesizer_CountGenderLookupFunction.cpp +++ b/inflection/src/inflection/grammar/synthesis/EsGrammarSynthesizer_CountGenderLookupFunction.cpp @@ -4,10 +4,6 @@ #include #include -#include -#include -#include -#include #include #include #include @@ -19,13 +15,12 @@ EsGrammarSynthesizer_CountGenderLookupFunction::EsGrammarSynthesizer_CountGender , dictionary(getDictionary()) , grammarCategoryType(categoryType) { - ::inflection::util::Validate::notNull(dictionary.getBinaryProperties(&nounProperty, {u"noun"})); + ::inflection::util::Validate::notNull(dictionary.getBinaryProperties(&nounProperty, {GrammemeConstants::POS_NOUN()})); ::inflection::util::Validate::notNull(dictionary.getBinaryProperties(&propertiesMask, properties)); } EsGrammarSynthesizer_CountGenderLookupFunction::~EsGrammarSynthesizer_CountGenderLookupFunction() { - } ::std::u16string guessGender(::std::u16string_view word, bool knownWord) { diff --git a/inflection/tools/dictionary-parser/src/main/java/org/unicode/wikidata/ParserOptions.java b/inflection/tools/dictionary-parser/src/main/java/org/unicode/wikidata/ParserOptions.java index 3f557c23..99928dcb 100644 --- a/inflection/tools/dictionary-parser/src/main/java/org/unicode/wikidata/ParserOptions.java +++ b/inflection/tools/dictionary-parser/src/main/java/org/unicode/wikidata/ParserOptions.java @@ -140,7 +140,7 @@ private static void printUsage() { posToBeInflected.clear(); for (String pos : inflectionTypes.split(",")) { - posToBeInflected.add(Grammar.PartOfSpeech.valueOf(pos.toUpperCase())); + posToBeInflected.add(Grammar.PartOfSpeech.valueOf(pos.toUpperCase().replace('-', '_'))); } optionsUsedToInvoke.add(ParserOptions.INFLECTION_TYPES); diff --git a/inflection/tools/dictionary-parser/src/main/resources/org/unicode/wikidata/filter_es.properties b/inflection/tools/dictionary-parser/src/main/resources/org/unicode/wikidata/filter_es.properties new file mode 100644 index 00000000..96948ec3 --- /dev/null +++ b/inflection/tools/dictionary-parser/src/main/resources/org/unicode/wikidata/filter_es.properties @@ -0,0 +1,7 @@ +# Copyright 2025 Unicode Incorporated and others. All rights reserved. +# +# These are lexemes that should either be ignored due to irrelevance that can't be easily tagged as irrelevant, +# or words that are just not that common that should be sorted last in the inflection patterns. + +# The L940889 lexeme refers to la (a musical note), which should not be confused with the definite article. +L940889=omit \ No newline at end of file diff --git a/inflection/tools/dictionary-parser/src/test/java/org/unicode/wikidata/ParseWikidataTest.java b/inflection/tools/dictionary-parser/src/test/java/org/unicode/wikidata/ParseWikidataTest.java index e25ee1ca..3489a7ef 100644 --- a/inflection/tools/dictionary-parser/src/test/java/org/unicode/wikidata/ParseWikidataTest.java +++ b/inflection/tools/dictionary-parser/src/test/java/org/unicode/wikidata/ParseWikidataTest.java @@ -4,8 +4,6 @@ */ package org.unicode.wikidata; -import java.io.ByteArrayOutputStream; -import java.io.PrintStream; import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; import java.nio.file.Files; @@ -16,36 +14,33 @@ import org.xml.sax.SAXException; class ParseWikidataTest { - private final static String lexiconSource = Objects + private static final String lexiconSource = Objects .requireNonNull(MethodHandles.lookup().lookupClass().getClassLoader().getResource("sourceLexicon.json")).getFile(); - private final static String baseDir = lexiconSource.substring(0, lexiconSource.lastIndexOf("/")); - private final static String inflectionalFile = baseDir+"/LexiconInflectOut.xml"; - private final static String expectedOutputFile = baseDir+ "/lexiconCorrectOut.txt"; - private final static String fatalErrorFile = baseDir+ "/lexiconFatalError.json"; - private final static String missingGrammemeErrorFile = baseDir+ "/lexiconMissingGrammemeError.json"; - private final static String caseInsensitiveGrammemeFile = baseDir+ "/lexiconcaseInsenstiveGrammeme.json"; - private final static String LINE_END = "=============================================="; + private static final String baseDir = lexiconSource.substring(0, lexiconSource.lastIndexOf("/")); + private static final String inflectionalFile = baseDir+"/LexiconInflectOut.xml"; + private static final String dictionaryFile = baseDir+"/LexiconInflectOut.lst"; + private static final String expectedOutputFile = baseDir+ "/lexiconCorrectOut.txt"; + private static final String fatalErrorFile = baseDir+ "/lexiconFatalError.json"; + private static final String missingGrammemeErrorFile = baseDir+ "/lexiconMissingGrammemeError.json"; + private static final String caseInsensitiveGrammemeFile = baseDir+ "/lexiconcaseInsenstiveGrammeme.json"; + private static final String LINE_END = "=============================================="; private void compareOutputs(String actual, String expected) { - String[] actual_lines = actual.split("\n"); - String[] expected_lines = expected.split("\n"); - for(int i=0 ; (i ParseWikidata.main(args)); } @Test public void missingGrammemeTest() { - String[] args = {"--inflections", inflectionalFile, missingGrammemeErrorFile}; + String[] args = {"--inflections", inflectionalFile, "--dictionary", dictionaryFile, missingGrammemeErrorFile}; Assertions.assertThrows(SAXException.class,() -> ParseWikidata.main(args)); } @Test public void caseInsensitiveGrammemeTest() { - String[] args = {"--inflections", inflectionalFile, caseInsensitiveGrammemeFile}; + String[] args = {"--inflections", inflectionalFile, "--dictionary", dictionaryFile, caseInsensitiveGrammemeFile}; Assertions.assertDoesNotThrow(() -> ParseWikidata.main(args)); } @Test public void lexiconParserTest() throws Exception { - String[] args = {"--ignore-entries-with-affixes", "--inflection-types", "noun,adjective", "--ignore-grammemes-for-types", "verb", "--inflections", - inflectionalFile, lexiconSource}; + String[] args = {"--inflection-types", "noun,adjective,proper-noun", "--inflections", inflectionalFile, + "--dictionary", dictionaryFile, lexiconSource}; String actual = getParserOutput(args); String expected = Files.readString(Paths.get(expectedOutputFile), StandardCharsets.UTF_8); compareOutputs(actual, expected); diff --git a/inflection/tools/dictionary-parser/src/test/resources/lexiconCorrectOut.txt b/inflection/tools/dictionary-parser/src/test/resources/lexiconCorrectOut.txt new file mode 100644 index 00000000..4a7ed2f0 --- /dev/null +++ b/inflection/tools/dictionary-parser/src/test/resources/lexiconCorrectOut.txt @@ -0,0 +1,93 @@ +Ahmed: singular plural masculine vowel-start noun proper-noun inflection=5 +ciel: singular masculine noun inflection=1 +ciels: plural masculine noun inflection=1 +cieux: plural masculine noun inflection=1 +hey: interjection +house: singular noun verb inflection=7 +house's: singular genitive noun inflection=7 +housed: verb +houses: plural noun verb inflection=7 +houses': plural genitive noun inflection=7 +housing: verb +idéal: singular masculine noun inflection=1 inflection=6 +idéals: plural masculine noun inflection=1 inflection=6 +idéaux: plural masculine noun inflection=1 inflection=6 +work: verb +worked: verb +working: verb +works: verb +Élie: plural masculine noun proper-noun inflection=3 +Абердинская: singular nominative feminine adjective inflection=2 +Абердинские: plural inanimate accusative nominative adjective inflection=2 +Абердинский: singular inanimate accusative nominative masculine adjective inflection=2 +Абердинским: singular plural dative instrumental masculine neuter adjective inflection=2 +Абердинскими: plural instrumental adjective inflection=2 +Абердинских: plural animate accusative genitive prepositional adjective inflection=2 +Абердинского: singular animate accusative genitive masculine neuter adjective inflection=2 +Абердинское: singular animate inanimate accusative nominative neuter adjective inflection=2 +Абердинской: singular dative genitive instrumental prepositional feminine adjective inflection=2 +Абердинском: singular prepositional masculine neuter adjective inflection=2 +Абердинскому: singular dative masculine neuter adjective inflection=2 +Абердинскую: singular animate inanimate accusative feminine adjective inflection=2 +Австралийская: singular nominative feminine adjective inflection=2 +Австралийские: plural inanimate accusative nominative adjective inflection=2 +Австралийский: singular inanimate accusative nominative masculine adjective inflection=2 +Австралийским: singular plural dative instrumental masculine neuter adjective inflection=2 +Австралийскими: plural instrumental adjective inflection=2 +Австралийских: plural animate accusative genitive prepositional adjective inflection=2 +Австралийского: singular animate accusative genitive masculine neuter adjective inflection=2 +Австралийское: singular animate inanimate accusative nominative neuter adjective inflection=2 +Австралийской: singular dative genitive instrumental prepositional feminine adjective inflection=2 +Австралийском: singular prepositional masculine neuter adjective inflection=2 +Австралийскому: singular dative masculine neuter adjective inflection=2 +Австралийскую: singular animate inanimate accusative feminine adjective inflection=2 +Адыгее: singular dative prepositional feminine noun proper-noun inflection=4 +Адыгеей: singular instrumental feminine noun proper-noun inflection=4 +Адыгеи: singular genitive feminine noun proper-noun inflection=4 +Адыгею: singular accusative feminine noun proper-noun inflection=4 +Адыгея: singular nominative feminine noun proper-noun inflection=4 +============================================== + Source: sourceLexicon.json + Lemma terms: 13 + Unusable lemma terms: 0 + Incoming surface forms: 104 + Surface forms: 48 + Collapsed surface forms: 51 (49%) + Unusable surface forms: 5 + Usable terms: 48 (100%) + Unclassified terms: 0 (0%) +============================================== +Animacy: + inanimate: 8 (16.7%) + animate: 8 (16.7%) + +Case: + accusative: 13 (27.1%) + nominative: 9 (18.8%) + genitive: 9 (18.8%) + prepositional: 7 (14.6%) + instrumental: 7 (14.6%) + dative: 7 (14.6%) + +Count: + singular: 28 (58.3%) + plural: 16 (33.3%) + +Gender: + masculine: 18 (37.5%) + feminine: 11 (22.9%) + neuter: 10 (20.8%) + +PartOfSpeech: + adjective: 24 (50%) + noun: 17 (35.4%) + verb: 8 (16.7%) + proper-noun: 7 (14.6%) + interjection: 1 (2.1%) + +Sound: + vowel-start: 1 (2.1%) + +processed in 0.50 seconds +License: Apple's Lexical Resources (https://stashweb.sd.apple.com/projects/NLP/repos/lexicalresources/) +generated with options: --ignore-entries-with-affixes --inflection-types noun,adjective --ignore-grammemes-for-types verb \ No newline at end of file diff --git a/inflection/tools/dictionary-parser/src/test/resources/sourceLexicon.json b/inflection/tools/dictionary-parser/src/test/resources/sourceLexicon.json index e69de29b..d8c5b81b 100644 --- a/inflection/tools/dictionary-parser/src/test/resources/sourceLexicon.json +++ b/inflection/tools/dictionary-parser/src/test/resources/sourceLexicon.json @@ -0,0 +1,210 @@ +[ + { + "type": "lexeme", + "id": "L14900", + "lemmas": { + "en": { + "language": "en", + "value": "hey" + } + }, + "lexicalCategory": "Q83034", + "claims": {}, + "forms": [ + { + "id": "L14900-F1", + "representations": { + "en": { + "language": "en", + "value": "hey" + } + }, + "grammaticalFeatures": [], + "claims": {} + } + ], + "senses": [] + }, + { + "type": "lexeme", + "id": "L494142", + "lemmas": { + "en": { + "language": "en", + "value": "Ahmed" + } + }, + "lexicalCategory": "Q147276", + "claims": {}, + "forms": [ + { + "id": "L494142-F1", + "representations": { + "en": { + "language": "en", + "value": "Ahmed" + } + }, + "grammaticalFeatures": [ + "Q110786", + "Q499327" + ], + "claims": { + "P898": [ + { + "mainsnak": { + "snaktype": "value", + "property": "P898", + "datavalue": { + "value": "\u02c8\u0251\u02d0.m\u025bd", + "type": "string" + }, + "datatype": "string" + }, + "type": "statement", + "rank": "normal" + } + ] + } + }, + { + "id": "L494142-F2", + "representations": { + "en": { + "language": "en", + "value": "Ahmed" + } + }, + "grammaticalFeatures": [ + "Q146786", + "Q499327" + ], + "claims": {} + } + ], + "senses": [] + }, + { + "type": "lexeme", + "id": "L23571", + "lemmas": { + "en": { + "language": "en", + "value": "house" + } + }, + "lexicalCategory": "Q24905", + "claims": {}, + "forms": [ + { + "id": "L23571-F1", + "representations": { + "en": { + "language": "en", + "value": "house" + } + }, + "grammaticalFeatures": [ + "Q3910936" + ], + "claims": {} + }, + { + "id": "L23571-F2", + "representations": { + "en": { + "language": "en", + "value": "houses" + } + }, + "grammaticalFeatures": [ + "Q110786", + "Q3910936", + "Q51929074" + ], + "claims": {} + }, + { + "id": "L23571-F3", + "representations": { + "en": { + "language": "en", + "value": "housed" + } + }, + "grammaticalFeatures": [ + "Q1392475" + ], + "claims": {} + }, + { + "id": "L23571-F4", + "representations": { + "en": { + "language": "en", + "value": "housing" + } + }, + "grammaticalFeatures": [ + "Q10345583" + ], + "claims": {} + }, + { + "id": "L23571-F5", + "representations": { + "en": { + "language": "en", + "value": "housed" + } + }, + "grammaticalFeatures": [ + "Q1230649" + ], + "claims": {} + } + ], + "senses": [] + }, + { + "type": "lexeme", + "id": "L512", + "lemmas": { + "en": { + "language": "en", + "value": "house" + } + }, + "lexicalCategory": "Q1084", + "claims": {}, + "forms": [ + { + "id": "L512-F1", + "representations": { + "en": { + "language": "en", + "value": "house" + } + }, + "grammaticalFeatures": [ + "Q110786" + ], + "claims": {} + }, + { + "id": "L512-F2", + "representations": { + "en": { + "language": "en", + "value": "houses" + } + }, + "grammaticalFeatures": [ + "Q146786" + ], + "claims": {} + } + ], + "senses": [] + } +] \ No newline at end of file