Skip to content

Commit 46333c3

Browse files
committed
Inflection-63 Integrate ko Wikidata into Unicode Inflection
Inflection-62 Integrate ar Wikidata into Unicode Inflection Inflection-61 Integrate he Wikidata into Unicode Inflection Inflection-60 Integrate hi Wikidata into Unicode Inflection Inflection-58 Integrate nb Wikidata into Unicode Inflection Inflection-56 Integrate nl Wikidata into Unicode Inflection Inflection-55 Integrate tr Wikidata into Unicode Inflection Inflection-54 Integrate ru Wikidata into Unicode Inflection Inflection-53 Integrate it Wikidata into Unicode Inflection Inflection-52 Integrate pt Wikidata into Unicode Inflection Inflection-51 Integrate fr Wikidata into Unicode Inflection Inflection-50 Integrate de Wikidata into Unicode Inflection
1 parent bb2bf15 commit 46333c3

File tree

139 files changed

+1341
-1319
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+1341
-1319
lines changed

.gitignore

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
build/*
1+
.DS_Store
2+
.idea
23
.vscode/launch.json
34
.vscode/settings.json
5+
build
46
fst/__pycache__/*
5-
inflection/tools/dictionary-parser/bin/*

inflection/cmake/versions.mk

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#
22
# Copyright 2021-2024 Apple Inc. All rights reserved.
33
#
4-
CATCH2_VERSION := 3.8.0
5-
CF_VERSION := 5.10.1
4+
CATCH2_VERSION := 3.9.0
65
ICU_VERSION_MINIMUM := 77.1
7-
MARISA_VERSION := 0.2.6
6+
MARISA_VERSION := 0.3.1

inflection/ext/lib/Marisa/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,7 @@ else()
2727
set(ARTIFACT "v${MARISA_VERSION}.tar.gz")
2828
ExternalProject_Add(marisa-trie-download
2929
URL https://github.com/s-yata/marisa-trie/archive/refs/tags/v${MARISA_VERSION}.tar.gz
30-
# This patch is needed to address weak vtables, which is an issue when using full link time optimization.
31-
# The patch may need to be updated if another version is created or additional patches are needed.
32-
BUILD_COMMAND ${CMAKE_CURRENT_LIST_DIR}/patch.sh ${CMAKE_CURRENT_BINARY_DIR}/marisa-trie ${CMAKE_CURRENT_LIST_DIR}/Exception.patch
30+
BUILD_COMMAND ""
3331
CONFIGURE_COMMAND "" INSTALL_COMMAND "" LOG_DOWNLOAD ON EXCLUDE_FROM_ALL TRUE
3432
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/marisa-trie
3533
)

inflection/ext/lib/Marisa/Exception.patch

Lines changed: 0 additions & 34 deletions
This file was deleted.

inflection/ext/lib/Marisa/patch.sh

Lines changed: 0 additions & 2 deletions
This file was deleted.

inflection/resources/org/unicode/inflection/dictionary/supplemental_fr.lst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Moyen-Orient: singular masculine noun proper-noun
1212
Tour Eiffel: singular feminine noun
1313
Vendée Globe: singular masculine noun
1414
hôtel: singular masculine noun vowel-start
15+
hôpital: singular masculine vowel-start noun
1516
==============================================
1617
Manually curated for tests to pass
1718
Copyright 2024-2024 Apple Inc. All rights reserved.

inflection/resources/org/unicode/inflection/features/grammar.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -591,10 +591,10 @@
591591
<restrictions>
592592
<restriction name="pos" value="pronoun"/>
593593
</restrictions>
594-
<grammeme name="common"/> <!-- unspecified/unknown/epicene e.g. "them" or "one" -->
595594
<grammeme name="masculine"/> <!-- e.g. "him" -->
596595
<grammeme name="feminine"/> <!-- e.g. "her" -->
597596
<grammeme name="neuter"/> <!-- e.g. "it", and not to be confused with "them" -->
597+
<!-- null means unknown/epicene e.g. "them" or "one" -->
598598
</category>
599599
<category name="pronounType">
600600
<restrictions>

inflection/resources/org/unicode/inflection/tokenizer/nl/tokenizer.dictionary

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40083,7 +40083,7 @@ alegre 000000b9
4008340083
tigre 01000040
4008440084
ogre 0100002d
4008540085
ihre 00000031
40086-
aire 01000054
40086+
aire 0d000054
4008740087
hypothecaire 000000f6
4008840088
precaire 00000053
4008940089
bancaire 0000006a
@@ -120654,7 +120654,7 @@ partijcongres 00000142
120654120654
vrouwencongres 00000028
120655120655
vredescongres 0000002c
120656120656
volkscongres 0000004e
120657-
aires 010002f5
120657+
aires 0d000055
120658120658
affaires 0000009c
120659120659
claires 01000036
120660120660
baudelaires 0000002c

inflection/src/inflection/analysis/RussianExposableMorphology.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ RussianExposableMorphology::RussianExposableMorphology()
2323
{GrammemeConstants::GENDER_MASCULINE(), GrammemeConstants::GENDER_FEMININE(), GrammemeConstants::GENDER_NEUTER()},
2424
{GrammemeConstants::NUMBER_SINGULAR(), GrammemeConstants::NUMBER_PLURAL()},
2525
{GrammemeConstants::ANIMACY_INANIMATE(), GrammemeConstants::ANIMACY_ANIMATE()},
26+
{GrammemeConstants::REGISTER_INFORMAL()}
2627
}, {{GrammemeConstants::POS_PROPER_NOUN(), GrammemeConstants::NUMBER_PLURAL()}}, true)
2728
{
2829
const auto &dictionary = getDictionary();

inflection/src/inflection/dialog/ArticleDetectionFunction.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
namespace inflection::dialog {
1414

15-
static bool getFeatureValues(::std::set<::std::u16string>* articles, const std::vector<::inflection::lang::features::LanguageGrammarFeatures_Feature>& featureValues, const ::std::set<::std::u16string>& excludeValues)
15+
static bool getFeatureValues(::std::set<::std::u16string, std::less<>>* articles, const std::vector<::inflection::lang::features::LanguageGrammarFeatures_Feature>& featureValues, const ::std::set<::std::u16string, std::less<>>& excludeValues)
1616
{
1717
bool hasTrailingSpace = false;
1818
bool normalizeApostrophe = false;
@@ -28,7 +28,7 @@ static bool getFeatureValues(::std::set<::std::u16string>* articles, const std::
2828
}
2929
for (const auto& featureValue : featureValues) {
3030
std::u16string featureValueStr(featureValue.getValue());
31-
if (featureValueStr.empty() || excludeValues.find(featureValueStr) != excludeValues.end()) {
31+
if (featureValueStr.empty() || excludeValues.contains(featureValueStr)) {
3232
continue;
3333
}
3434
normalizeApostrophe = normalizeApostrophe || featureValueStr.find(u'') != std::u16string::npos;
@@ -37,7 +37,7 @@ static bool getFeatureValues(::std::set<::std::u16string>* articles, const std::
3737
return normalizeApostrophe;
3838
}
3939

40-
ArticleDetectionFunction::ArticleDetectionFunction(const ::inflection::util::ULocale& locale, const ::std::set<::std::u16string>& definiteFeatures, const ::std::set<::std::u16string>& excludeDefiniteValues, const ::std::set<::std::u16string>& indefiniteFeatures, const ::std::set<::std::u16string>& excludeIndefiniteValues)
40+
ArticleDetectionFunction::ArticleDetectionFunction(const ::inflection::util::ULocale& locale, const ::std::set<::std::u16string, std::less<>>& definiteFeatures, const ::std::set<::std::u16string, std::less<>>& excludeDefiniteValues, const ::std::set<::std::u16string, std::less<>>& indefiniteFeatures, const ::std::set<::std::u16string, std::less<>>& excludeIndefiniteValues)
4141
: super()
4242
, locale(locale)
4343
{
@@ -47,10 +47,10 @@ ArticleDetectionFunction::ArticleDetectionFunction(const ::inflection::util::ULo
4747
if (values.empty()) {
4848
continue;
4949
}
50-
if (definiteFeatures.find(feature.getName()) != definiteFeatures.end()) {
50+
if (definiteFeatures.contains(feature.getName())) {
5151
normalizeApostrophe = getFeatureValues(&definiteArticles, values, excludeDefiniteValues) || normalizeApostrophe;
5252
}
53-
if (indefiniteFeatures.find(feature.getName()) != indefiniteFeatures.end()) {
53+
if (indefiniteFeatures.contains(feature.getName())) {
5454
normalizeApostrophe = getFeatureValues(&indefiniteArticles, values, excludeIndefiniteValues) || normalizeApostrophe;
5555
}
5656
}

0 commit comments

Comments
 (0)