Skip to content

Commit 0e293c9

Browse files
committed
Add hunspell ref_path feature with cache invalidation support
- Add INDEX_REF_PATH_SETTING for package-based hunspell dictionaries - Add RestHunspellCacheInvalidateAction for cache invalidation endpoint - Update HunspellService with cache management methods - Add ref_path validation in MetadataCreateIndexService
1 parent d56fa55 commit 0e293c9

File tree

10 files changed

+834
-28
lines changed

10 files changed

+834
-28
lines changed

mise.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[tools]
2+
java = "corretto-21"

server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@
117117

118118
import java.io.IOException;
119119
import java.io.UnsupportedEncodingException;
120+
import java.nio.file.Files;
120121
import java.nio.file.Path;
121122
import java.time.Instant;
122123
import java.util.ArrayList;
@@ -1701,6 +1702,7 @@ private static void validateErrors(String indexName, List<String> validationErro
17011702

17021703
List<String> getIndexSettingsValidationErrors(final Settings settings, final boolean forbidPrivateIndexSettings, String indexName) {
17031704
List<String> validationErrors = getIndexSettingsValidationErrors(settings, forbidPrivateIndexSettings, Optional.of(indexName));
1705+
validationErrors.addAll(validateRefPath(settings, env.configDir()));
17041706
return validationErrors;
17051707
}
17061708

@@ -1732,6 +1734,38 @@ List<String> getIndexSettingsValidationErrors(
17321734
}
17331735
return validationErrors;
17341736
}
1737+
/**
1738+
* Validates the ref_path setting if present.
1739+
* Checks that the path format is valid and the directory exists.
1740+
*
1741+
* @param settings the index settings
1742+
* @param configDir the config directory path
1743+
* @return a list containing validation errors or an empty list if valid
1744+
*/
1745+
private List<String> validateRefPath(Settings settings, Path configDir) {
1746+
List<String> validationErrors = new ArrayList<>();
1747+
String refPath = settings.get(IndexSettings.INDEX_REF_PATH_SETTING.getKey());
1748+
1749+
if (refPath != null && !refPath.isEmpty()) {
1750+
try {
1751+
// Validate format: should be in packages/<package_id> format
1752+
if (!refPath.startsWith("packages/")) {
1753+
validationErrors.add("ref_path [" + refPath + "] must start with 'packages/'");
1754+
return validationErrors;
1755+
}
1756+
1757+
// Resolve and check if path exists
1758+
Path resolvedPath = configDir.resolve(refPath).normalize();
1759+
if (!Files.isDirectory(resolvedPath)) {
1760+
validationErrors.add("ref_path [" + refPath + "] does not exist or is not a directory");
1761+
}
1762+
} catch (Exception e) {
1763+
validationErrors.add("invalid ref_path [" + refPath + "]: " + e.getMessage());
1764+
}
1765+
}
1766+
1767+
return validationErrors;
1768+
}
17351769

17361770
private static List<String> validatePrivateSettingsNotExplicitlySet(Settings settings, IndexScopedSettings indexScopedSettings) {
17371771
List<String> validationErrors = new ArrayList<>();

server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
146146
IndexSortConfig.INDEX_SORT_ORDER_SETTING,
147147
IndexSortConfig.INDEX_SORT_MISSING_SETTING,
148148
IndexSortConfig.INDEX_SORT_MODE_SETTING,
149+
IndexSettings.INDEX_REF_PATH_SETTING,
149150
IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING,
150151
IndexSettings.INDEX_TRANSLOG_READ_FORWARD_SETTING,
151152
IndexSettings.INDEX_WARMER_ENABLED_SETTING,

server/src/main/java/org/opensearch/index/IndexSettings.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,13 @@ public static IndexMergePolicy fromString(String text) {
917917
Property.Dynamic
918918
);
919919

920+
public static final Setting<String> INDEX_REF_PATH_SETTING = Setting.simpleString(
921+
"index.ref_path",
922+
"",
923+
Property.IndexScope,
924+
Property.Dynamic
925+
);
926+
920927
private final Index index;
921928
private final Version version;
922929
private final Logger logger;
@@ -974,6 +981,7 @@ public static IndexMergePolicy fromString(String text) {
974981
private volatile boolean allowDerivedField;
975982
private final boolean derivedSourceEnabled;
976983
private volatile boolean derivedSourceEnabledForTranslog;
984+
private volatile String refPath;
977985

978986
/**
979987
* The maximum age of a retention lease before it is considered expired.
@@ -1168,6 +1176,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
11681176
this.defaultAllowUnmappedFields = scopedSettings.get(ALLOW_UNMAPPED);
11691177
this.allowDerivedField = scopedSettings.get(ALLOW_DERIVED_FIELDS);
11701178
this.durability = scopedSettings.get(INDEX_TRANSLOG_DURABILITY_SETTING);
1179+
this.refPath = scopedSettings.get(INDEX_REF_PATH_SETTING);
11711180
this.translogReadForward = INDEX_TRANSLOG_READ_FORWARD_SETTING.get(settings);
11721181
defaultFields = scopedSettings.get(DEFAULT_FIELD_SETTING);
11731182
syncInterval = INDEX_TRANSLOG_SYNC_INTERVAL_SETTING.get(settings);
@@ -1381,6 +1390,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
13811390
this::setRemoteStoreTranslogRepository
13821391
);
13831392
scopedSettings.addSettingsUpdateConsumer(StarTreeIndexSettings.STAR_TREE_SEARCH_ENABLED_SETTING, this::setStarTreeIndexEnabled);
1393+
scopedSettings.addSettingsUpdateConsumer(INDEX_REF_PATH_SETTING, this::setRefPath);
13841394
}
13851395

13861396
private void setSearchIdleAfter(TimeValue searchIdleAfter) {
@@ -2002,6 +2012,14 @@ public boolean getStarTreeIndexEnabled() {
20022012
return isStarTreeIndexEnabled;
20032013
}
20042014

2015+
private void setRefPath(String refPath){
2016+
this.refPath = refPath;
2017+
}
2018+
2019+
public String getRefPath(){
2020+
return refPath;
2021+
}
2022+
20052023
/**
20062024
* Returns the merge policy that should be used for this index.
20072025
*

server/src/main/java/org/opensearch/index/analysis/HunspellTokenFilterFactory.java

Lines changed: 83 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,38 @@
3535
import org.apache.lucene.analysis.hunspell.Dictionary;
3636
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
3737
import org.opensearch.common.settings.Settings;
38+
import org.opensearch.env.Environment;
3839
import org.opensearch.index.IndexSettings;
3940
import org.opensearch.indices.analysis.HunspellService;
4041

4142
import java.util.Locale;
4243

4344
/**
4445
* The token filter factory for the hunspell analyzer
46+
* *
47+
* Supports hot-reload when used with {@code updateable: true} setting.
48+
* The dictionary is loaded from either:
49+
* <ul>
50+
* <li>A ref_path (package ID, e.g., "pkg-1234") combined with locale for package-based dictionaries</li>
51+
* <li>A locale (e.g., "en_US") for traditional hunspell dictionaries from config/hunspell/</li>
52+
* </ul>
53+
*
54+
* <h2>Usage Examples:</h2>
55+
* <pre>
56+
* // Traditional locale-based (loads from config/hunspell/en_US/)
57+
* {
58+
* "type": "hunspell",
59+
* "locale": "en_US"
60+
* }
61+
*
62+
* // Package-based (loads from config/packages/pkg-1234/hunspell/en_US/)
63+
* {
64+
* "type": "hunspell",
65+
* "ref_path": "pkg-1234",
66+
* "locale": "en_US"
67+
* }
68+
* </pre>
69+
*
4570
*
4671
* @opensearch.internal
4772
*/
@@ -50,18 +75,58 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
5075
private final Dictionary dictionary;
5176
private final boolean dedup;
5277
private final boolean longestOnly;
78+
private final AnalysisMode analysisMode;
5379

54-
public HunspellTokenFilterFactory(IndexSettings indexSettings, String name, Settings settings, HunspellService hunspellService) {
80+
public HunspellTokenFilterFactory(IndexSettings indexSettings, String name, Settings settings, HunspellService hunspellService, Environment env) {
5581
super(indexSettings, name, settings);
82+
// Check for updateable flag - enables hot-reload support (same pattern as SynonymTokenFilterFactory)
83+
boolean updateable = settings.getAsBoolean("updateable", false);
84+
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
5685

86+
// Get both ref_path and locale parameters
87+
String refPath = settings.get("ref_path"); // Package ID only (optional)
5788
String locale = settings.get("locale", settings.get("language", settings.get("lang", null)));
58-
if (locale == null) {
59-
throw new IllegalArgumentException("missing [locale | language | lang] configuration for hunspell token filter");
60-
}
61-
62-
dictionary = hunspellService.getDictionary(locale);
63-
if (dictionary == null) {
64-
throw new IllegalArgumentException(String.format(Locale.ROOT, "Unknown hunspell dictionary for locale [%s]", locale));
89+
90+
if (refPath != null) {
91+
// Package-based loading: ref_path (package ID) + locale (required)
92+
if (locale == null) {
93+
throw new IllegalArgumentException(
94+
"When using ref_path, the 'locale' parameter is required for hunspell token filter"
95+
);
96+
}
97+
98+
// Validate ref_path is just package ID (no slashes allowed)
99+
if (refPath.contains("/")) {
100+
throw new IllegalArgumentException(
101+
String.format(Locale.ROOT,
102+
"ref_path should contain only the package ID, not a full path. Got: [%s]. " +
103+
"Use ref_path for package ID and locale for the dictionary locale.",
104+
refPath)
105+
);
106+
}
107+
108+
// Load from package directory: config/packages/{ref_path}/hunspell/{locale}/
109+
dictionary = hunspellService.getDictionaryFromPackage(refPath, locale, env);
110+
if (dictionary == null) {
111+
throw new IllegalArgumentException(
112+
String.format(Locale.ROOT,
113+
"Could not find hunspell dictionary for locale [%s] in package [%s]",
114+
locale, refPath)
115+
);
116+
}
117+
} else if (locale != null) {
118+
// Traditional locale-based loading (backward compatible)
119+
// Loads from config/hunspell/{locale}/
120+
dictionary = hunspellService.getDictionary(locale);
121+
if (dictionary == null) {
122+
throw new IllegalArgumentException(
123+
String.format(Locale.ROOT, "Unknown hunspell dictionary for locale [%s]", locale)
124+
);
125+
}
126+
} else {
127+
throw new IllegalArgumentException(
128+
"missing [locale | language | lang] configuration for hunspell token filter"
129+
);
65130
}
66131

67132
dedup = settings.getAsBoolean("dedup", true);
@@ -73,6 +138,16 @@ public TokenStream create(TokenStream tokenStream) {
73138
return new HunspellStemFilter(tokenStream, dictionary, dedup, longestOnly);
74139
}
75140

141+
/**
142+
* Returns the analysis mode for this filter.
143+
* When {@code updateable: true} is set, returns {@code SEARCH_TIME} which enables hot-reload
144+
* via the _reload_search_analyzers API.
145+
*/
146+
@Override
147+
public AnalysisMode getAnalysisMode() {
148+
return this.analysisMode;
149+
}
150+
76151
public boolean dedup() {
77152
return dedup;
78153
}

server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) thr
119119
);
120120
}
121121

122-
HunspellService getHunspellService() {
122+
public HunspellService getHunspellService() {
123123
return hunspellService;
124124
}
125125

@@ -161,7 +161,7 @@ public boolean requiresAnalysisSettings() {
161161
tokenFilters.register(
162162
"hunspell",
163163
requiresAnalysisSettings(
164-
(indexSettings, env, name, settings) -> new HunspellTokenFilterFactory(indexSettings, name, settings, hunspellService)
164+
(indexSettings, env, name, settings) -> new HunspellTokenFilterFactory(indexSettings, name, settings, hunspellService, env)
165165
)
166166
);
167167

0 commit comments

Comments
 (0)