4343/**
4444 * The token filter factory for the hunspell analyzer
4545 *
46+ * Supports hot-reload when used with {@code updateable: true} setting.
47+ * The dictionary is loaded from either:
48+ * <ul>
49+ * <li>A ref_path (package ID, e.g., "pkg-1234") combined with locale for package-based dictionaries</li>
50+ * <li>A locale (e.g., "en_US") for traditional hunspell dictionaries from config/hunspell/</li>
51+ * </ul>
52+ *
53+ * <h2>Usage Examples:</h2>
54+ * <pre>
55+ * // Traditional locale-based (loads from config/hunspell/en_US/)
56+ * {
57+ * "type": "hunspell",
58+ * "locale": "en_US"
59+ * }
60+ *
61+ * // Package-based (loads from config/packages/pkg-1234/hunspell/en_US/)
62+ * {
63+ * "type": "hunspell",
64+ * "ref_path": "pkg-1234",
65+ * "locale": "en_US"
66+ * }
67+ * </pre>
68+ *
69+ *
4670 * @opensearch.internal
4771 */
4872public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
4973
5074 private final Dictionary dictionary ;
5175 private final boolean dedup ;
5276 private final boolean longestOnly ;
77+ private final AnalysisMode analysisMode ;
5378
5479 public HunspellTokenFilterFactory (IndexSettings indexSettings , String name , Settings settings , HunspellService hunspellService ) {
5580 super (indexSettings , name , settings );
81+ // Check for updateable flag - enables hot-reload support (same pattern as SynonymTokenFilterFactory)
82+ boolean updateable = settings .getAsBoolean ("updateable" , false );
83+ this .analysisMode = updateable ? AnalysisMode .SEARCH_TIME : AnalysisMode .ALL ;
5684
85+ // Get both ref_path and locale parameters
86+ String refPath = settings .get ("ref_path" ); // Package ID only (optional)
5787 String locale = settings .get ("locale" , settings .get ("language" , settings .get ("lang" , null )));
58- if (locale == null ) {
59- throw new IllegalArgumentException ("missing [locale | language | lang] configuration for hunspell token filter" );
60- }
6188
62- dictionary = hunspellService .getDictionary (locale );
63- if (dictionary == null ) {
64- throw new IllegalArgumentException (String .format (Locale .ROOT , "Unknown hunspell dictionary for locale [%s]" , locale ));
89+ if (refPath != null ) {
90+ // Package-based loading: ref_path (package ID) + locale (required)
91+ if (locale == null ) {
92+ throw new IllegalArgumentException ("When using ref_path, the 'locale' parameter is required for hunspell token filter" );
93+ }
94+
95+ // Validate ref_path and locale are safe package/locale identifiers
96+ validatePackageIdentifier (refPath , "ref_path" );
97+ validatePackageIdentifier (locale , "locale" );
98+
99+ // Load from package directory: config/packages/{ref_path}/hunspell/{locale}/
100+ dictionary = hunspellService .getDictionaryFromPackage (refPath , locale );
101+ if (dictionary == null ) {
102+ throw new IllegalArgumentException (
103+ String .format (Locale .ROOT , "Could not find hunspell dictionary for locale [%s] in package [%s]" , locale , refPath )
104+ );
105+ }
106+ } else if (locale != null ) {
107+ // Traditional locale-based loading (backward compatible)
108+ // Loads from config/hunspell/{locale}/
109+ // Validate locale to prevent path traversal and cache key ambiguity
110+ validatePackageIdentifier (locale , "locale" );
111+ dictionary = hunspellService .getDictionary (locale );
112+ if (dictionary == null ) {
113+ throw new IllegalArgumentException (String .format (Locale .ROOT , "Unknown hunspell dictionary for locale [%s]" , locale ));
114+ }
115+ } else {
116+ throw new IllegalArgumentException ("missing [locale | language | lang] configuration for hunspell token filter" );
65117 }
66118
67119 dedup = settings .getAsBoolean ("dedup" , true );
@@ -73,6 +125,16 @@ public TokenStream create(TokenStream tokenStream) {
73125 return new HunspellStemFilter (tokenStream , dictionary , dedup , longestOnly );
74126 }
75127
128+ /**
129+ * Returns the analysis mode for this filter.
130+ * When {@code updateable: true} is set, returns {@code SEARCH_TIME} which enables hot-reload
131+ * via the _reload_search_analyzers API.
132+ */
133+ @ Override
134+ public AnalysisMode getAnalysisMode () {
135+ return this .analysisMode ;
136+ }
137+
76138 public boolean dedup () {
77139 return dedup ;
78140 }
@@ -81,4 +143,64 @@ public boolean longestOnly() {
81143 return longestOnly ;
82144 }
83145
146+ /**
147+ * Validates that a package identifier or locale is safe and doesn't contain
148+ * path traversal sequences, separators, or other dangerous characters.
149+ *
150+ * @param value The value to validate (package ID or locale)
151+ * @param paramName The parameter name for error messages
152+ * @throws IllegalArgumentException if validation fails
153+ */
154+ private static void validatePackageIdentifier (String value , String paramName ) {
155+ if (value == null || value .isEmpty ()) {
156+ return ; // Null/empty handled elsewhere
157+ }
158+
159+ // Reject path traversal attempts
160+ if (value .equals ("." )
161+ || value .equals (".." )
162+ || value .contains ("./" )
163+ || value .contains ("../" )
164+ || value .contains ("\\ ." )
165+ || value .contains ("\\ .." )
166+ || value .startsWith ("." )
167+ || value .endsWith ("." )) {
168+ throw new IllegalArgumentException (
169+ String .format (Locale .ROOT , "Invalid %s: [%s]. Path traversal sequences (., ..) are not allowed." , paramName , value )
170+ );
171+ }
172+
173+ // Reject any path separators (Unix and Windows)
174+ if (value .contains ("/" ) || value .contains ("\\ " )) {
175+ throw new IllegalArgumentException (
176+ String .format (
177+ Locale .ROOT ,
178+ "Invalid %s: [%s]. Path separators (/, \\ ) are not allowed. "
179+ + "Use ref_path for package ID and locale for dictionary locale." ,
180+ paramName ,
181+ value
182+ )
183+ );
184+ }
185+
186+ // Reject cache key separator to prevent cache key injection
187+ if (value .contains (":" )) {
188+ throw new IllegalArgumentException (
189+ String .format (
190+ Locale .ROOT ,
191+ "Invalid %s: [%s]. Colon (:) is not allowed as it is used as cache key separator." ,
192+ paramName ,
193+ value
194+ )
195+ );
196+ }
197+
198+ // Reject null bytes (security)
199+ if (value .contains ("\0 " )) {
200+ throw new IllegalArgumentException (
201+ String .format (Locale .ROOT , "Invalid %s: [%s]. Null bytes are not allowed." , paramName , value )
202+ );
203+ }
204+ }
205+
84206}
0 commit comments