Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/mongo/db/fts/fts_language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ struct {
{"portuguese"_sd, "pt"_sd},
{"romanian"_sd, "ro"_sd},
{"russian"_sd, "ru"_sd},
{"serbian"_sd, "sr"_sd},
{"spanish"_sd, "es"_sd},
{"swedish"_sd, "sv"_sd},
{"turkish"_sd, "tr"_sd},
Expand All @@ -109,9 +110,9 @@ struct {
{"it"_sd}, {"ita"_sd}, {"italian"_sd}, {"nl"_sd}, {"nld"_sd},
{"no"_sd}, {"nor"_sd}, {"norwegian"_sd}, {"por"_sd}, {"porter"_sd},
{"portuguese"_sd}, {"pt"_sd}, {"ro"_sd}, {"romanian"_sd}, {"ron"_sd},
{"ru"_sd}, {"rum"_sd}, {"rus"_sd}, {"russian"_sd}, {"spa"_sd},
{"spanish"_sd}, {"sv"_sd}, {"swe"_sd}, {"swedish"_sd}, {"tr"_sd},
{"tur"_sd}, {"turkish"_sd},
{"ru"_sd}, {"rum"_sd}, {"rus"_sd}, {"russian"_sd}, {"serbian"_sd},
{"spa"_sd}, {"spanish"_sd}, {"sr"_sd}, {"srp"_sd}, {"sv"_sd},
{"swe"_sd}, {"swedish"_sd}, {"tr"_sd}, {"tur"_sd}, {"turkish"_sd},
};

template <TextIndexVersion ver>
Expand Down
148 changes: 148 additions & 0 deletions src/mongo/db/fts/stop_words_serbian.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
a
i
o
u
za
na
sa
od
do
biti
ne
jesam
sam
jesi
si
je
jesmo
smo
jeste
ste
jesu
su
nijesam
nisam
nijesi
nisi
nije
nijesmo
nismo
nijeste
niste
nijesu
nisu
budem
budeš
bude
budemo
budete
budu
budes
bih
bi
bismo
biste
biše
bise
bio
bili
budimo
budite
bila
bilo
bile
ću
ćeš
će
ćemo
ćete
neću
nećeš
neće
nećemo
nećete
cu
ces
ce
cemo
cete
necu
neces
nece
necemo
necete
mogu
možeš
može
možemo
možete
mozes
moze
mozemo
mozete
а
и
о
у
за
на
са
од
до
бити
не
јесам
сам
јеси
си
је
јесмо
смо
јесте
сте
јесу
су
нијесам
нисам
нијеси
ниси
није
нијесмо
нисмо
нијесте
нисте
нијесу
нису
будем
будеш
буде
будемо
будете
буду
бих
би
бисмо
бисте
бише
био
били
будимо
будите
била
било
биле
ћу
ћеш
ће
ћемо
ћете
нећу
нећеш
неће
нећемо
нећете
могу
можеш
може
можемо
можете
4 changes: 4 additions & 0 deletions src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5506,6 +5506,10 @@ char32_t codepointRemoveDiacritics(char32_t codepoint) {
return 0x9f3b;
case 0x2fa1d:
return 0x2a600;
case 0x110:
return 0x44;
case 0x111:
return 0x64;
default:
return codepoint;
}
Expand Down
2 changes: 2 additions & 0 deletions src/third_party/libstemmer_c/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ mongo_cc_library(
"dist/src_c/stem_UTF_8_romanian.h",
"dist/src_c/stem_UTF_8_russian.c",
"dist/src_c/stem_UTF_8_russian.h",
"dist/src_c/stem_UTF_8_serbian.c",
"dist/src_c/stem_UTF_8_serbian.h",
"dist/src_c/stem_UTF_8_spanish.c",
"dist/src_c/stem_UTF_8_spanish.h",
"dist/src_c/stem_UTF_8_swedish.c",
Expand Down
2 changes: 2 additions & 0 deletions src/third_party/libstemmer_c/dist/MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ src_c/stem_UTF_8_romanian.c
src_c/stem_UTF_8_romanian.h
src_c/stem_UTF_8_russian.c
src_c/stem_UTF_8_russian.h
src_c/stem_UTF_8_serbian.c
src_c/stem_UTF_8_serbian.h
src_c/stem_UTF_8_spanish.c
src_c/stem_UTF_8_spanish.h
src_c/stem_UTF_8_swedish.c
Expand Down
7 changes: 6 additions & 1 deletion src/third_party/libstemmer_c/dist/libstemmer/modules.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* Modules included by this file are: danish, dutch, english, finnish, french,
* german, hungarian, italian, norwegian, porter, portuguese, romanian,
* russian, spanish, swedish, turkish
* russian, serbian, spanish, swedish, turkish
*/

#include "../src_c/stem_ISO_8859_1_danish.h"
Expand Down Expand Up @@ -34,6 +34,7 @@
#include "../src_c/stem_UTF_8_romanian.h"
#include "../src_c/stem_KOI8_R_russian.h"
#include "../src_c/stem_UTF_8_russian.h"
#include "../src_c/stem_UTF_8_serbian.h"
#include "../src_c/stem_ISO_8859_1_spanish.h"
#include "../src_c/stem_UTF_8_spanish.h"
#include "../src_c/stem_ISO_8859_1_swedish.h"
Expand Down Expand Up @@ -154,10 +155,13 @@ static struct stemmer_modules modules[] = {
{"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
{"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
{"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
{"serbian", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
{"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
{"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
{"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
{"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
{"sr", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
{"srp", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
{"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
{"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
{"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
Expand All @@ -183,6 +187,7 @@ static const char * algorithm_names[] = {
"portuguese",
"romanian",
"russian",
"serbian",
"spanish",
"swedish",
"turkish",
Expand Down
1 change: 1 addition & 0 deletions src/third_party/libstemmer_c/dist/libstemmer/modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ norwegian UTF_8,ISO_8859_1 norwegian,no,nor
portuguese UTF_8,ISO_8859_1 portuguese,pt,por
romanian UTF_8,ISO_8859_2 romanian,ro,rum,ron
russian UTF_8,KOI8_R russian,ru,rus
serbian UTF_8 serbian,sr,srp
spanish UTF_8,ISO_8859_1 spanish,es,esl,spa
swedish UTF_8,ISO_8859_1 swedish,sv,swe
turkish UTF_8 turkish,tr,tur
Expand Down
7 changes: 6 additions & 1 deletion src/third_party/libstemmer_c/dist/libstemmer/modules_utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* Modules included by this file are: danish, dutch, english, finnish, french,
* german, hungarian, italian, norwegian, porter, portuguese, romanian,
* russian, spanish, swedish, turkish
* russian, serbian, spanish, swedish, turkish
*/

#include "../src_c/stem_UTF_8_danish.h"
Expand All @@ -21,6 +21,7 @@
#include "../src_c/stem_UTF_8_portuguese.h"
#include "../src_c/stem_UTF_8_romanian.h"
#include "../src_c/stem_UTF_8_russian.h"
#include "../src_c/stem_UTF_8_serbian.h"
#include "../src_c/stem_UTF_8_spanish.h"
#include "../src_c/stem_UTF_8_swedish.h"
#include "../src_c/stem_UTF_8_turkish.h"
Expand Down Expand Up @@ -90,8 +91,11 @@ static struct stemmer_modules modules[] = {
{"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
{"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
{"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
{"serbian", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
{"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
{"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
{"sr", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
{"srp", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
{"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
{"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
{"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
Expand All @@ -114,6 +118,7 @@ static const char * algorithm_names[] = {
"portuguese",
"romanian",
"russian",
"serbian",
"spanish",
"swedish",
"turkish",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ norwegian UTF_8 norwegian,no,nor
portuguese UTF_8 portuguese,pt,por
romanian UTF_8 romanian,ro,rum,ron
russian UTF_8 russian,ru,rus
serbian UTF_8 serbian,sr,srp
spanish UTF_8 spanish,es,esl,spa
swedish UTF_8 swedish,sv,swe
turkish UTF_8 turkish,tr,tur
Expand Down
4 changes: 3 additions & 1 deletion src/third_party/libstemmer_c/dist/mkinc.mak
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# Modules included by this file are: danish, dutch, english, finnish, french,
# german, hungarian, italian, norwegian, porter, portuguese, romanian,
# russian, spanish, swedish, turkish
# russian, serbian, spanish, swedish, turkish

snowball_sources= \
src_c/stem_ISO_8859_1_danish.c \
Expand Down Expand Up @@ -34,6 +34,7 @@ snowball_sources= \
src_c/stem_UTF_8_romanian.c \
src_c/stem_KOI8_R_russian.c \
src_c/stem_UTF_8_russian.c \
src_c/stem_UTF_8_serbian.c \
src_c/stem_ISO_8859_1_spanish.c \
src_c/stem_UTF_8_spanish.c \
src_c/stem_ISO_8859_1_swedish.c \
Expand Down Expand Up @@ -70,6 +71,7 @@ snowball_headers= \
src_c/stem_UTF_8_romanian.h \
src_c/stem_KOI8_R_russian.h \
src_c/stem_UTF_8_russian.h \
src_c/stem_UTF_8_serbian.h \
src_c/stem_ISO_8859_1_spanish.h \
src_c/stem_UTF_8_spanish.h \
src_c/stem_ISO_8859_1_swedish.h \
Expand Down
4 changes: 3 additions & 1 deletion src/third_party/libstemmer_c/dist/mkinc_utf8.mak
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# Modules included by this file are: danish, dutch, english, finnish, french,
# german, hungarian, italian, norwegian, porter, portuguese, romanian,
# russian, spanish, swedish, turkish
# russian, serbian, spanish, swedish, turkish

snowball_sources= \
src_c/stem_UTF_8_danish.c \
Expand All @@ -21,6 +21,7 @@ snowball_sources= \
src_c/stem_UTF_8_portuguese.c \
src_c/stem_UTF_8_romanian.c \
src_c/stem_UTF_8_russian.c \
src_c/stem_UTF_8_serbian.c \
src_c/stem_UTF_8_spanish.c \
src_c/stem_UTF_8_swedish.c \
src_c/stem_UTF_8_turkish.c \
Expand All @@ -42,6 +43,7 @@ snowball_headers= \
src_c/stem_UTF_8_portuguese.h \
src_c/stem_UTF_8_romanian.h \
src_c/stem_UTF_8_russian.h \
src_c/stem_UTF_8_serbian.h \
src_c/stem_UTF_8_spanish.h \
src_c/stem_UTF_8_swedish.h \
src_c/stem_UTF_8_turkish.h \
Expand Down
Loading