Skip to content

Commit c77e078

Browse files
committed
feat(hunspell): Add ref_path support for package-based dictionary loading
- Add ref_path parameter to HunspellTokenFilterFactory for package-based dictionaries - Load dictionaries from config/packages/{packageId}/hunspell/{locale}/ - Add cache key strategy: traditional (locale) vs package-based (packageId:locale) - Add cache management methods in HunspellService (invalidate, invalidateAll, invalidateByPackage) - Add security validation (path traversal, separator injection, null bytes) - Add updateable flag for hot-reload via _reload_search_analyzers - Use Strings.hasText() and Strings.isNullOrEmpty() for validation consistency - Add comprehensive unit tests for HunspellService and HunspellTokenFilterFactory Signed-off-by: Ayush Sharma <118544643+shayush622@users.noreply.github.com> Signed-off-by: shayush622 <ayush5267@gmail.com>
1 parent db61a88 commit c77e078

File tree

9 files changed

+63161
-26
lines changed

9 files changed

+63161
-26
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3939
- Implement FieldMappingIngestionMessageMapper for pull-based ingestion ([#20729](https://github.com/opensearch-project/OpenSearch/pull/20729))
4040
- Added support of WarmerRefreshListener in NRTReplicationEngine to trigger warmer after replication on replica shards ([#20650](https://github.com/opensearch-project/OpenSearch/pull/20650))
4141
- WLM group custom search settings - groundwork and timeout ([#20536](https://github.com/opensearch-project/OpenSearch/issues/20536))
42+
- Add ref_path support for package-based hunspell dictionary loading ([#20840](https://github.com/opensearch-project/OpenSearch/pull/20840))
4243

4344
### Changed
4445
- Make telemetry `Tags` immutable ([#20788](https://github.com/opensearch-project/OpenSearch/pull/20788))

server/src/main/java/org/opensearch/index/analysis/HunspellTokenFilterFactory.java

Lines changed: 128 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,25 +43,77 @@
4343
/**
4444
* The token filter factory for the hunspell analyzer
4545
*
46+
* Supports hot-reload when used with {@code updateable: true} setting.
47+
* The dictionary is loaded from either:
48+
* <ul>
49+
* <li>A ref_path (package ID, e.g., "pkg-1234") combined with locale for package-based dictionaries</li>
50+
* <li>A locale (e.g., "en_US") for traditional hunspell dictionaries from config/hunspell/</li>
51+
* </ul>
52+
*
53+
* <h2>Usage Examples:</h2>
54+
* <pre>
55+
* // Traditional locale-based (loads from config/hunspell/en_US/)
56+
* {
57+
* "type": "hunspell",
58+
* "locale": "en_US"
59+
* }
60+
*
61+
* // Package-based (loads from config/packages/pkg-1234/hunspell/en_US/)
62+
* {
63+
* "type": "hunspell",
64+
* "ref_path": "pkg-1234",
65+
* "locale": "en_US"
66+
* }
67+
* </pre>
68+
*
69+
*
4670
* @opensearch.internal
4771
*/
4872
public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
4973

5074
private final Dictionary dictionary;
5175
private final boolean dedup;
5276
private final boolean longestOnly;
77+
private final AnalysisMode analysisMode;
5378

5479
public HunspellTokenFilterFactory(IndexSettings indexSettings, String name, Settings settings, HunspellService hunspellService) {
5580
super(indexSettings, name, settings);
81+
// Check for updateable flag - enables hot-reload support (same pattern as SynonymTokenFilterFactory)
82+
boolean updateable = settings.getAsBoolean("updateable", false);
83+
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
5684

85+
// Get both ref_path and locale parameters
86+
String refPath = settings.get("ref_path"); // Package ID only (optional)
5787
String locale = settings.get("locale", settings.get("language", settings.get("lang", null)));
58-
if (locale == null) {
59-
throw new IllegalArgumentException("missing [locale | language | lang] configuration for hunspell token filter");
60-
}
6188

62-
dictionary = hunspellService.getDictionary(locale);
63-
if (dictionary == null) {
64-
throw new IllegalArgumentException(String.format(Locale.ROOT, "Unknown hunspell dictionary for locale [%s]", locale));
89+
if (refPath != null) {
90+
// Package-based loading: ref_path (package ID) + locale (required)
91+
if (locale == null) {
92+
throw new IllegalArgumentException("When using ref_path, the 'locale' parameter is required for hunspell token filter");
93+
}
94+
95+
// Validate ref_path and locale are safe package/locale identifiers
96+
validatePackageIdentifier(refPath, "ref_path");
97+
validatePackageIdentifier(locale, "locale");
98+
99+
// Load from package directory: config/packages/{ref_path}/hunspell/{locale}/
100+
dictionary = hunspellService.getDictionaryFromPackage(refPath, locale);
101+
if (dictionary == null) {
102+
throw new IllegalArgumentException(
103+
String.format(Locale.ROOT, "Could not find hunspell dictionary for locale [%s] in package [%s]", locale, refPath)
104+
);
105+
}
106+
} else if (locale != null) {
107+
// Traditional locale-based loading (backward compatible)
108+
// Loads from config/hunspell/{locale}/
109+
// Validate locale to prevent path traversal and cache key ambiguity
110+
validatePackageIdentifier(locale, "locale");
111+
dictionary = hunspellService.getDictionary(locale);
112+
if (dictionary == null) {
113+
throw new IllegalArgumentException(String.format(Locale.ROOT, "Unknown hunspell dictionary for locale [%s]", locale));
114+
}
115+
} else {
116+
throw new IllegalArgumentException("missing [locale | language | lang] configuration for hunspell token filter");
65117
}
66118

67119
dedup = settings.getAsBoolean("dedup", true);
@@ -73,6 +125,16 @@ public TokenStream create(TokenStream tokenStream) {
73125
return new HunspellStemFilter(tokenStream, dictionary, dedup, longestOnly);
74126
}
75127

128+
/**
129+
* Returns the analysis mode for this filter.
130+
* When {@code updateable: true} is set, returns {@code SEARCH_TIME} which enables hot-reload
131+
* via the _reload_search_analyzers API.
132+
*/
133+
@Override
134+
public AnalysisMode getAnalysisMode() {
135+
return this.analysisMode;
136+
}
137+
76138
public boolean dedup() {
77139
return dedup;
78140
}
@@ -81,4 +143,64 @@ public boolean longestOnly() {
81143
return longestOnly;
82144
}
83145

146+
/**
147+
* Validates that a package identifier or locale is safe and doesn't contain
148+
* path traversal sequences, separators, or other dangerous characters.
149+
*
150+
* @param value The value to validate (package ID or locale)
151+
* @param paramName The parameter name for error messages
152+
* @throws IllegalArgumentException if validation fails
153+
*/
154+
private static void validatePackageIdentifier(String value, String paramName) {
155+
if (value == null || value.isEmpty()) {
156+
return; // Null/empty handled elsewhere
157+
}
158+
159+
// Reject path traversal attempts
160+
if (value.equals(".")
161+
|| value.equals("..")
162+
|| value.contains("./")
163+
|| value.contains("../")
164+
|| value.contains("\\.")
165+
|| value.contains("\\..")
166+
|| value.startsWith(".")
167+
|| value.endsWith(".")) {
168+
throw new IllegalArgumentException(
169+
String.format(Locale.ROOT, "Invalid %s: [%s]. Path traversal sequences (., ..) are not allowed.", paramName, value)
170+
);
171+
}
172+
173+
// Reject any path separators (Unix and Windows)
174+
if (value.contains("/") || value.contains("\\")) {
175+
throw new IllegalArgumentException(
176+
String.format(
177+
Locale.ROOT,
178+
"Invalid %s: [%s]. Path separators (/, \\) are not allowed. "
179+
+ "Use ref_path for package ID and locale for dictionary locale.",
180+
paramName,
181+
value
182+
)
183+
);
184+
}
185+
186+
// Reject cache key separator to prevent cache key injection
187+
if (value.contains(":")) {
188+
throw new IllegalArgumentException(
189+
String.format(
190+
Locale.ROOT,
191+
"Invalid %s: [%s]. Colon (:) is not allowed as it is used as cache key separator.",
192+
paramName,
193+
value
194+
)
195+
);
196+
}
197+
198+
// Reject null bytes (security)
199+
if (value.contains("\0")) {
200+
throw new IllegalArgumentException(
201+
String.format(Locale.ROOT, "Invalid %s: [%s]. Null bytes are not allowed.", paramName, value)
202+
);
203+
}
204+
}
205+
84206
}

server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) thr
119119
);
120120
}
121121

122-
HunspellService getHunspellService() {
122+
public HunspellService getHunspellService() {
123123
return hunspellService;
124124
}
125125

0 commit comments

Comments
 (0)