/hunspell/en_US}
- * directory.
- *
- * The following settings can be set for each dictionary:
+ * Serves as a node level registry for hunspell dictionaries. This service supports loading dictionaries from:
+ *
+ * - Traditional location: {@code /hunspell//} (e.g., config/hunspell/en_US/)
+ * - Package-based location: {@code /analyzers//hunspell//} (e.g., config/analyzers/pkg-1234/hunspell/en_US/)
+ *
+ *
+ * Cache Key Strategy:
+ *
+ * - Traditional dictionaries: Cache key = locale (e.g., "en_US")
+ * - Package-based dictionaries: Cache key = "{packageId}:{locale}" (e.g., "pkg-1234:en_US")
+ *
+ *
+ * The following settings can be set for each dictionary:
*
* - {@code ignore_case} - If true, dictionary matching will be case insensitive (defaults to {@code false})
* - {@code strict_affix_parsing} - Determines whether errors while reading a affix rules file will cause exception or simple be ignored
* (defaults to {@code true})
*
- *
- * These settings can either be configured as node level configuration, such as:
- *
+ *
+ *
These settings can either be configured as node level configuration, such as:
*
* indices.analysis.hunspell.dictionary.en_US.ignore_case: true
* indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing: false
*
- *
- * or, as dedicated configuration per dictionary, placed in a {@code settings.yml} file under the dictionary directory. For
- * example, the following can be the content of the {@code /hunspell/en_US/settings.yml} file:
- *
- *
- * ignore_case: true
- * strict_affix_parsing: false
- *
+ *
+ * or, as dedicated configuration per dictionary, placed in a {@code settings.yml} file under the dictionary directory.
*
* @see org.opensearch.index.analysis.HunspellTokenFilterFactory
*
@@ -112,16 +113,18 @@ public class HunspellService {
private final Map knownDictionaries;
private final boolean defaultIgnoreCase;
private final Path hunspellDir;
+ private final Environment env;
private final Function loadingFunction;
public HunspellService(final Settings settings, final Environment env, final Map knownDictionaries)
throws IOException {
this.knownDictionaries = Collections.unmodifiableMap(knownDictionaries);
+ this.env = env;
this.hunspellDir = resolveHunspellDirectory(env);
this.defaultIgnoreCase = HUNSPELL_IGNORE_CASE.get(settings);
this.loadingFunction = (locale) -> {
try {
- return loadDictionary(locale, settings, env);
+ return loadDictionary(locale, settings, env, hunspellDir);
} catch (Exception e) {
logger.error("Failed to load hunspell dictionary for locale: " + locale, e);
throw new IllegalStateException("Failed to load hunspell dictionary for locale: " + locale);
@@ -135,8 +138,10 @@ public HunspellService(final Settings settings, final Environment env, final Map
/**
* Returns the hunspell dictionary for the given locale.
+ * Loads from traditional location: config/hunspell/{locale}/
*
- * @param locale The name of the locale
+ * @param locale The name of the locale (e.g., "en_US")
+ * @return The loaded Dictionary
*/
public Dictionary getDictionary(String locale) {
Dictionary dictionary = knownDictionaries.get(locale);
@@ -146,6 +151,141 @@ public Dictionary getDictionary(String locale) {
return dictionary;
}
+ /**
+ * Returns the hunspell dictionary from a package directory.
+ * Loads from package location: config/analyzers/{packageId}/hunspell/{locale}/
+ *
+ * Cache key format: "{packageId}:{locale}" (e.g., "pkg-1234:en_US")
+ *
+ * @param packageId The package ID (e.g., "pkg-1234")
+ * @param locale The locale (e.g., "en_US")
+ * @return The loaded Dictionary
+ * @throws IllegalArgumentException if packageId or locale is null
+ * @throws IllegalStateException if hunspell directory not found or dictionary cannot be loaded
+ */
+ public Dictionary getDictionaryFromPackage(String packageId, String locale) {
+ if (Strings.isNullOrEmpty(packageId)) {
+ throw new IllegalArgumentException("packageId cannot be null or empty");
+ }
+ if (Strings.isNullOrEmpty(locale)) {
+ throw new IllegalArgumentException("locale cannot be null or empty");
+ }
+
+ String cacheKey = buildPackageCacheKey(packageId, locale);
+
+ return dictionaries.computeIfAbsent(cacheKey, (key) -> {
+ try {
+ return loadDictionaryFromPackage(packageId, locale);
+ } catch (Exception e) {
+
+ throw new IllegalStateException(
+ String.format(Locale.ROOT, "Failed to load hunspell dictionary for package [%s] locale [%s]", packageId, locale),
+ e
+ );
+ }
+ });
+ }
+
+ /**
+ * Loads a hunspell dictionary from a package directory.
+ * Expects hunspell files at: config/analyzers/{packageId}/hunspell/{locale}/
+ *
+ * @param packageId The package identifier
+ * @param locale The locale (e.g., "en_US")
+ * @return The loaded Dictionary
+ * @throws Exception if loading fails
+ */
+ private Dictionary loadDictionaryFromPackage(String packageId, String locale) throws Exception {
+ // Validate raw inputs before path resolution (defense-in-depth, caller should also validate)
+ if (packageId.contains("/") || packageId.contains("\\") || packageId.contains("..")) {
+ throw new IllegalArgumentException(
+ String.format(Locale.ROOT, "Invalid package ID: [%s]. Must not contain path separators or '..' sequences.", packageId)
+ );
+ }
+ if (locale.contains("/") || locale.contains("\\") || locale.contains("..")) {
+ throw new IllegalArgumentException(
+ String.format(Locale.ROOT, "Invalid locale: [%s]. Must not contain path separators or '..' sequences.", locale)
+ );
+ }
+
+ // Resolve analyzers base directory: config/analyzers/
+ Path analyzersBaseDir = env.configDir().resolve("analyzers");
+
+ // Resolve package directory: config/analyzers/{packageId}/
+ Path packageDir = analyzersBaseDir.resolve(packageId);
+
+ // Security check: ensure path stays under config/analyzers/ (prevent path traversal attacks)
+ // Both paths must be converted to absolute and normalized before comparison
+ // Defense-in-depth: raw input validation above should prevent this, but we verify
+ // the resolved path as a secondary safeguard against any future code path changes
+ Path analyzersBaseDirAbsolute = analyzersBaseDir.toAbsolutePath().normalize();
+ Path packageDirAbsolute = packageDir.toAbsolutePath().normalize();
+ if (!packageDirAbsolute.startsWith(analyzersBaseDirAbsolute)) {
+ throw new IllegalArgumentException(
+ String.format(Locale.ROOT, "Package path must be under config/analyzers directory. Package: [%s]", packageId)
+ );
+ }
+
+ // Additional check: ensure the resolved package directory is exactly one level under analyzers/
+ // This prevents packageId=".." or "foo/../bar" from escaping
+ if (!packageDirAbsolute.getParent().equals(analyzersBaseDirAbsolute)) {
+ throw new IllegalArgumentException(
+ String.format(Locale.ROOT, "Invalid package ID: [%s]. Package ID cannot contain path traversal sequences.", packageId)
+ );
+ }
+
+ // Check if package directory exists
+ if (!Files.isDirectory(packageDir)) {
+ throw new OpenSearchException(
+ String.format(Locale.ROOT, "Package directory not found: [%s]. Expected at: %s", packageId, packageDir)
+ );
+ }
+
+ // Auto-detect hunspell directory within package
+ Path packageHunspellDir = packageDir.resolve("hunspell");
+ if (!Files.isDirectory(packageHunspellDir)) {
+ throw new OpenSearchException(
+ String.format(
+ Locale.ROOT,
+ "Hunspell directory not found in package [%s]. " + "Expected 'hunspell' subdirectory at: %s",
+ packageId,
+ packageHunspellDir
+ )
+ );
+ }
+
+ // Resolve locale directory within hunspell
+ Path dicDir = packageHunspellDir.resolve(locale);
+
+ // Security check: ensure locale path doesn't escape hunspell directory (prevent path traversal)
+ Path hunspellDirAbsolute = packageHunspellDir.toAbsolutePath().normalize();
+ Path dicDirAbsolute = dicDir.toAbsolutePath().normalize();
+ if (!dicDirAbsolute.startsWith(hunspellDirAbsolute)) {
+ throw new IllegalArgumentException(
+ String.format(Locale.ROOT, "Locale path must be under hunspell directory. Locale: [%s]", locale)
+ );
+ }
+
+ if (logger.isDebugEnabled()) {
+ logger.debug("Loading hunspell dictionary from package [{}] locale [{}] at [{}]...", packageId, locale, dicDirAbsolute);
+ }
+
+ if (!FileSystemUtils.isAccessibleDirectory(dicDir, logger)) {
+ throw new OpenSearchException(
+ String.format(
+ Locale.ROOT,
+ "Locale [%s] not found in package [%s]. " + "Expected directory at: %s",
+ locale,
+ packageId,
+ dicDirAbsolute
+ )
+ );
+ }
+
+ // Delegate to loadDictionary with the package's hunspell directory as base
+ return loadDictionary(locale, Settings.EMPTY, env, packageHunspellDir);
+ }
+
private Path resolveHunspellDirectory(Environment env) {
return env.configDir().resolve("hunspell");
}
@@ -179,29 +319,33 @@ private void scanAndLoadDictionaries() throws IOException {
}
/**
- * Loads the hunspell dictionary for the given local.
+ * Loads a hunspell dictionary from a base directory by resolving the locale subdirectory,
+ * finding .aff and .dic files, and creating the Dictionary object.
+ * Used by both traditional locale-based loading (baseDir=hunspellDir) and
+ * package-based loading (baseDir=packageHunspellDir).
*
- * @param locale The locale of the hunspell dictionary to be loaded.
- * @param nodeSettings The node level settings
- * @param env The node environment (from which the conf path will be resolved)
+ * @param locale The locale of the hunspell dictionary to be loaded
+ * @param nodeSettings The node level settings (pass Settings.EMPTY for package-based loading)
+ * @param env The node environment
+ * @param baseDir The base directory containing locale subdirectories with .aff/.dic files
* @return The loaded Hunspell dictionary
- * @throws Exception when loading fails (due to IO errors or malformed dictionary files)
+ * @throws Exception when loading fails
*/
- private Dictionary loadDictionary(String locale, Settings nodeSettings, Environment env) throws Exception {
+ private Dictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Path baseDir) throws Exception {
if (logger.isDebugEnabled()) {
- logger.debug("Loading hunspell dictionary [{}]...", locale);
+ logger.debug("Loading hunspell dictionary [{}] from [{}]...", locale, baseDir);
}
- Path dicDir = hunspellDir.resolve(locale);
+ Path dicDir = baseDir.resolve(locale);
if (FileSystemUtils.isAccessibleDirectory(dicDir, logger) == false) {
throw new OpenSearchException(String.format(Locale.ROOT, "Could not find hunspell dictionary [%s]", locale));
}
- // merging node settings with hunspell dictionary specific settings
+ // Merge node settings with hunspell dictionary specific settings
Settings dictSettings = HUNSPELL_DICTIONARY_OPTIONS.get(nodeSettings);
nodeSettings = loadDictionarySettings(dicDir, dictSettings.getByPrefix(locale + "."));
-
boolean ignoreCase = nodeSettings.getAsBoolean("ignore_case", defaultIgnoreCase);
+ // Find and validate affix files
Path[] affixFiles = FileSystemUtils.files(dicDir, "*.aff");
if (affixFiles.length == 0) {
throw new OpenSearchException(String.format(Locale.ROOT, "Missing affix file for hunspell dictionary [%s]", locale));
@@ -209,22 +353,20 @@ private Dictionary loadDictionary(String locale, Settings nodeSettings, Environm
if (affixFiles.length != 1) {
throw new OpenSearchException(String.format(Locale.ROOT, "Too many affix files exist for hunspell dictionary [%s]", locale));
}
- InputStream affixStream = null;
+ // Load dictionary files and create Dictionary object
Path[] dicFiles = FileSystemUtils.files(dicDir, "*.dic");
List dicStreams = new ArrayList<>(dicFiles.length);
+ InputStream affixStream = null;
try {
-
- for (int i = 0; i < dicFiles.length; i++) {
- dicStreams.add(Files.newInputStream(dicFiles[i]));
+ for (Path dicFile : dicFiles) {
+ dicStreams.add(Files.newInputStream(dicFile));
}
-
affixStream = Files.newInputStream(affixFiles[0]);
try (Directory tmp = new NIOFSDirectory(env.tmpDir())) {
return new Dictionary(tmp, "hunspell", affixStream, dicStreams, ignoreCase);
}
-
} catch (Exception e) {
logger.error(() -> new ParameterizedMessage("Could not load hunspell dictionary [{}]", locale), e);
throw e;
@@ -255,4 +397,17 @@ private static Settings loadDictionarySettings(Path dir, Settings defaults) thro
return defaults;
}
+
+ /**
+ * Builds the cache key for a package-based dictionary.
+ * Format: "{packageId}:{locale}" (e.g., "pkg-1234:en_US")
+ *
+ * @param packageId The package ID
+ * @param locale The locale
+ * @return The cache key
+ */
+ public static String buildPackageCacheKey(String packageId, String locale) {
+ return packageId + ":" + locale;
+ }
+
}
diff --git a/server/src/test/java/org/opensearch/index/analysis/HunspellTokenFilterFactoryTests.java b/server/src/test/java/org/opensearch/index/analysis/HunspellTokenFilterFactoryTests.java
index 665235b01b88f..7878bc72b6d2a 100644
--- a/server/src/test/java/org/opensearch/index/analysis/HunspellTokenFilterFactoryTests.java
+++ b/server/src/test/java/org/opensearch/index/analysis/HunspellTokenFilterFactoryTests.java
@@ -37,10 +37,12 @@
import java.io.IOException;
+import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
public class HunspellTokenFilterFactoryTests extends OpenSearchTestCase {
+
public void testDedup() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
@@ -67,4 +69,233 @@ public void testDedup() throws IOException {
hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
assertThat(hunspellTokenFilter.dedup(), is(false));
}
+
+ /**
+ * Test dedup and longestOnly settings work with ref_path.
+ */
+ public void testRefPathWithDedupAndLongestOnly() throws IOException {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .put("index.analysis.filter.my_hunspell.type", "hunspell")
+ .put("index.analysis.filter.my_hunspell.ref_path", "test-pkg")
+ .put("index.analysis.filter.my_hunspell.locale", "en_US")
+ .put("index.analysis.filter.my_hunspell.dedup", false)
+ .put("index.analysis.filter.my_hunspell.longest_only", true)
+ .build();
+
+ TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, getDataPath("/indices/analyze/conf_dir"));
+ TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_hunspell");
+ assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
+ HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
+
+ assertThat(hunspellTokenFilter.dedup(), is(false));
+ assertThat(hunspellTokenFilter.longestOnly(), is(true));
+ }
+
+ /**
+ * Test traditional locale-only loading still works (backward compatibility).
+ */
+ public void testTraditionalLocaleOnlyLoadingStillWorks() throws IOException {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .put("index.analysis.filter.my_hunspell.type", "hunspell")
+ .put("index.analysis.filter.my_hunspell.locale", "en_US")
+ // No ref_path - should load from config/hunspell/en_US/
+ .build();
+
+ TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, getDataPath("/indices/analyze/conf_dir"));
+ TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_hunspell");
+ assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
+ }
+
+ /**
+ * Test that missing both ref_path and locale throws exception.
+ */
+ public void testMissingBothRefPathAndLocaleThrowsException() throws IOException {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .put("index.analysis.filter.my_hunspell.type", "hunspell")
+ .build();
+
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, getDataPath("/indices/analyze/conf_dir"))
+ );
+ assertThat(e.getMessage(), containsString("locale"));
+ }
+
+ /**
+ * Test validatePackageIdentifier accepts valid identifiers.
+ */
+ public void testValidatePackageIdentifierAcceptsValid() {
+ // These should not throw
+ HunspellTokenFilterFactory.validatePackageIdentifier("pkg-1234", "ref_path");
+ HunspellTokenFilterFactory.validatePackageIdentifier("en_US", "locale");
+ HunspellTokenFilterFactory.validatePackageIdentifier("my-package-v2", "ref_path");
+ HunspellTokenFilterFactory.validatePackageIdentifier("en_US_custom", "locale");
+ HunspellTokenFilterFactory.validatePackageIdentifier("a", "ref_path"); // single char
+ HunspellTokenFilterFactory.validatePackageIdentifier("AB", "ref_path"); // two chars
+ }
+
+ /**
+ * Test validatePackageIdentifier rejects null.
+ */
+ public void testValidatePackageIdentifierRejectsNull() {
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> HunspellTokenFilterFactory.validatePackageIdentifier(null, "ref_path")
+ );
+ assertThat(e.getMessage(), containsString("null or empty"));
+ }
+
+ /**
+ * Test validatePackageIdentifier rejects empty string.
+ */
+ public void testValidatePackageIdentifierRejectsEmpty() {
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> HunspellTokenFilterFactory.validatePackageIdentifier("", "ref_path")
+ );
+ assertThat(e.getMessage(), containsString("null or empty"));
+ }
+
+ /**
+ * Test validatePackageIdentifier rejects slash.
+ */
+ public void testValidatePackageIdentifierRejectsSlash() {
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> HunspellTokenFilterFactory.validatePackageIdentifier("foo/bar", "ref_path")
+ );
+ assertThat(e.getMessage(), containsString("Only alphanumeric"));
+ }
+
+ /**
+ * Test validatePackageIdentifier rejects backslash.
+ */
+ public void testValidatePackageIdentifierRejectsBackslash() {
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> HunspellTokenFilterFactory.validatePackageIdentifier("foo\\bar", "ref_path")
+ );
+ assertThat(e.getMessage(), containsString("Only alphanumeric"));
+ }
+
+ /**
+ * Test validatePackageIdentifier rejects colon (cache key separator).
+ */
+ public void testValidatePackageIdentifierRejectsColon() {
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> HunspellTokenFilterFactory.validatePackageIdentifier("pkg:inject", "ref_path")
+ );
+ assertThat(e.getMessage(), containsString("Only alphanumeric"));
+ }
+
+ /**
+ * Test validatePackageIdentifier rejects dots.
+ */
+ public void testValidatePackageIdentifierRejectsDots() {
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> HunspellTokenFilterFactory.validatePackageIdentifier("pkg.v1", "ref_path")
+ );
+ assertThat(e.getMessage(), containsString("Only alphanumeric"));
+ }
+
+ /**
+ * Test validatePackageIdentifier rejects double dots (path traversal).
+ */
+ public void testValidatePackageIdentifierRejectsDoubleDots() {
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> HunspellTokenFilterFactory.validatePackageIdentifier("foo..bar", "ref_path")
+ );
+ assertThat(e.getMessage(), containsString("Only alphanumeric"));
+ }
+
+ /**
+ * Test validatePackageIdentifier rejects ".." (pure path traversal).
+ */
+ public void testValidatePackageIdentifierRejectsPureDotDot() {
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> HunspellTokenFilterFactory.validatePackageIdentifier("..", "ref_path")
+ );
+ assertThat(e.getMessage(), containsString("Only alphanumeric"));
+ }
+
+ /**
+ * Test validatePackageIdentifier rejects spaces.
+ */
+ public void testValidatePackageIdentifierRejectsSpaces() {
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> HunspellTokenFilterFactory.validatePackageIdentifier("my package", "ref_path")
+ );
+ assertThat(e.getMessage(), containsString("Only alphanumeric"));
+ }
+
+ /**
+ * Test validatePackageIdentifier rejects special characters.
+ */
+ public void testValidatePackageIdentifierRejectsSpecialChars() {
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> HunspellTokenFilterFactory.validatePackageIdentifier("pkg@v1", "ref_path")
+ );
+ assertThat(e.getMessage(), containsString("Only alphanumeric"));
+ }
+
+ /**
+ * Test that create() method produces a valid HunspellStemFilter token stream.
+ */
+ public void testCreateProducesTokenStream() throws IOException {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .put("index.analysis.filter.my_hunspell.type", "hunspell")
+ .put("index.analysis.filter.my_hunspell.ref_path", "test-pkg")
+ .put("index.analysis.filter.my_hunspell.locale", "en_US")
+ .build();
+
+ TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, getDataPath("/indices/analyze/conf_dir"));
+ TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_hunspell");
+ assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
+
+ // Call create() to cover the HunspellStemFilter creation line
+ org.apache.lucene.analysis.TokenStream ts = tokenFilter.create(new org.apache.lucene.tests.analysis.CannedTokenStream());
+ assertNotNull(ts);
+ }
+
+ /**
+ * Test that traditional locale create() method also works.
+ */
+ public void testCreateWithTraditionalLocale() throws IOException {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .put("index.analysis.filter.my_hunspell.type", "hunspell")
+ .put("index.analysis.filter.my_hunspell.locale", "en_US")
+ .build();
+
+ TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, getDataPath("/indices/analyze/conf_dir"));
+ TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_hunspell");
+
+ org.apache.lucene.analysis.TokenStream ts = tokenFilter.create(new org.apache.lucene.tests.analysis.CannedTokenStream());
+ assertNotNull(ts);
+ }
+
+ /**
+ * Test that 'language' alias works for locale parameter (backward compatibility).
+ */
+ public void testLanguageAliasForLocale() throws IOException {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .put("index.analysis.filter.my_hunspell.type", "hunspell")
+ .put("index.analysis.filter.my_hunspell.language", "en_US")
+ .build();
+
+ TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, getDataPath("/indices/analyze/conf_dir"));
+ TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_hunspell");
+ assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
+ }
}
diff --git a/server/src/test/java/org/opensearch/indices/analyze/HunspellServiceTests.java b/server/src/test/java/org/opensearch/indices/analyze/HunspellServiceTests.java
index f66045898f4a3..12149661b278f 100644
--- a/server/src/test/java/org/opensearch/indices/analyze/HunspellServiceTests.java
+++ b/server/src/test/java/org/opensearch/indices/analyze/HunspellServiceTests.java
@@ -106,4 +106,285 @@ public void testDicWithTwoAffs() {
assertEquals("Failed to load hunspell dictionary for locale: en_US", e.getMessage());
assertNull(e.getCause());
}
+
+ // ========== REF_PATH (Package-based Dictionary) TESTS ==========
+
+ public void testGetDictionaryFromPackage() throws Exception {
+ Path tempDir = createTempDir();
+ // Create package directory structure: config/analyzers/pkg-1234/hunspell/en_US/
+ Path packageDir = tempDir.resolve("config").resolve("analyzers").resolve("pkg-1234").resolve("hunspell").resolve("en_US");
+ java.nio.file.Files.createDirectories(packageDir);
+
+ // Create minimal hunspell files
+ createHunspellFiles(packageDir, "en_US");
+
+ Settings settings = Settings.builder()
+ .put(HUNSPELL_LAZY_LOAD.getKey(), randomBoolean())
+ .put(Environment.PATH_HOME_SETTING.getKey(), tempDir)
+ .build();
+
+ Environment environment = new Environment(settings, tempDir.resolve("config"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ // Test getDictionaryFromPackage
+ Dictionary dictionary = hunspellService.getDictionaryFromPackage("pkg-1234", "en_US");
+ assertThat(dictionary, notNullValue());
+ }
+
+ public void testGetDictionaryFromPackageCaching() throws Exception {
+ Path tempDir = createTempDir();
+ Path packageDir = tempDir.resolve("config").resolve("analyzers").resolve("pkg-1234").resolve("hunspell").resolve("en_US");
+ java.nio.file.Files.createDirectories(packageDir);
+ createHunspellFiles(packageDir, "en_US");
+
+ Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
+
+ Environment environment = new Environment(settings, tempDir.resolve("config"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ // First call - loads from disk
+ Dictionary dict1 = hunspellService.getDictionaryFromPackage("pkg-1234", "en_US");
+ assertThat(dict1, notNullValue());
+
+ // Second call - should return cached instance
+ Dictionary dict2 = hunspellService.getDictionaryFromPackage("pkg-1234", "en_US");
+ assertSame("Should return same cached instance", dict1, dict2);
+ }
+
+ public void testMultiplePackagesCaching() throws Exception {
+ Path tempDir = createTempDir();
+
+ // Create two different package directories
+ Path pkg1Dir = tempDir.resolve("config").resolve("analyzers").resolve("pkg-1234").resolve("hunspell").resolve("en_US");
+ Path pkg2Dir = tempDir.resolve("config").resolve("analyzers").resolve("pkg-5678").resolve("hunspell").resolve("en_US");
+ java.nio.file.Files.createDirectories(pkg1Dir);
+ java.nio.file.Files.createDirectories(pkg2Dir);
+ createHunspellFiles(pkg1Dir, "en_US");
+ createHunspellFiles(pkg2Dir, "en_US");
+
+ Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
+
+ Environment environment = new Environment(settings, tempDir.resolve("config"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ // Load both package dictionaries
+ Dictionary dict1 = hunspellService.getDictionaryFromPackage("pkg-1234", "en_US");
+ Dictionary dict2 = hunspellService.getDictionaryFromPackage("pkg-5678", "en_US");
+
+ assertThat(dict1, notNullValue());
+ assertThat(dict2, notNullValue());
+ assertNotSame("Different package directories should have different Dictionary instances", dict1, dict2);
+
+ }
+
+ public void testBuildPackageCacheKey() {
+ assertEquals("pkg-1234:en_US", HunspellService.buildPackageCacheKey("pkg-1234", "en_US"));
+ assertEquals("my-package:fr_FR", HunspellService.buildPackageCacheKey("my-package", "fr_FR"));
+ }
+
+ public void testGetDictionaryFromPackageNotFound() throws Exception {
+ Path tempDir = createTempDir();
+ // Don't create the package directory - it doesn't exist
+
+ Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
+
+ Environment environment = new Environment(settings, tempDir.resolve("config"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
+ hunspellService.getDictionaryFromPackage("nonexistent-pkg", "en_US");
+ });
+ assertTrue(e.getMessage().contains("Failed to load hunspell dictionary for package"));
+ }
+
+ public void testMixedCacheKeysTraditionalAndPackage() throws Exception {
+ Path tempDir = createTempDir();
+
+ // Create traditional hunspell directory
+ Path traditionalDir = tempDir.resolve("config").resolve("hunspell").resolve("en_US");
+ java.nio.file.Files.createDirectories(traditionalDir);
+ createHunspellFiles(traditionalDir, "en_US");
+
+ // Create package directory
+ Path packageDir = tempDir.resolve("config").resolve("analyzers").resolve("pkg-1234").resolve("hunspell").resolve("en_US");
+ java.nio.file.Files.createDirectories(packageDir);
+ createHunspellFiles(packageDir, "en_US");
+
+ Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
+
+ Environment environment = new Environment(settings, tempDir.resolve("config"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ // Load traditional dictionary
+ Dictionary traditionalDict = hunspellService.getDictionary("en_US");
+ // Load package-based dictionary
+ Dictionary packageDict = hunspellService.getDictionaryFromPackage("pkg-1234", "en_US");
+
+ assertThat(traditionalDict, notNullValue());
+ assertThat(packageDict, notNullValue());
+ assertNotSame("Traditional and package dictionaries should be different instances", traditionalDict, packageDict);
+
+ }
+
+ public void testGetDictionaryFromPackageWithNullPackageId() throws Exception {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+ .put(HUNSPELL_LAZY_LOAD.getKey(), true)
+ .build();
+ Environment environment = new Environment(settings, getDataPath("/indices/analyze/conf_dir"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> hunspellService.getDictionaryFromPackage(null, "en_US")
+ );
+ assertThat(e.getMessage(), org.hamcrest.Matchers.containsString("packageId"));
+ }
+
+ public void testGetDictionaryFromPackageWithEmptyPackageId() throws Exception {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+ .put(HUNSPELL_LAZY_LOAD.getKey(), true)
+ .build();
+ Environment environment = new Environment(settings, getDataPath("/indices/analyze/conf_dir"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> hunspellService.getDictionaryFromPackage("", "en_US")
+ );
+ assertThat(e.getMessage(), org.hamcrest.Matchers.containsString("packageId"));
+ }
+
+ public void testGetDictionaryFromPackageWithNullLocale() throws Exception {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+ .put(HUNSPELL_LAZY_LOAD.getKey(), true)
+ .build();
+ Environment environment = new Environment(settings, getDataPath("/indices/analyze/conf_dir"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> hunspellService.getDictionaryFromPackage("test-pkg", null)
+ );
+ assertThat(e.getMessage(), org.hamcrest.Matchers.containsString("locale"));
+ }
+
+ public void testGetDictionaryFromPackageWithEmptyLocale() throws Exception {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+ .put(HUNSPELL_LAZY_LOAD.getKey(), true)
+ .build();
+ Environment environment = new Environment(settings, getDataPath("/indices/analyze/conf_dir"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ IllegalArgumentException e = expectThrows(
+ IllegalArgumentException.class,
+ () -> hunspellService.getDictionaryFromPackage("test-pkg", "")
+ );
+ assertThat(e.getMessage(), org.hamcrest.Matchers.containsString("locale"));
+ }
+
+ public void testPackageWithMissingHunspellSubdir() throws Exception {
+ Path tempDir = createTempDir();
+ // Create package dir WITHOUT hunspell subdirectory
+ Path packageDir = tempDir.resolve("config").resolve("analyzers").resolve("bad-pkg");
+ java.nio.file.Files.createDirectories(packageDir);
+
+ Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
+ Environment environment = new Environment(settings, tempDir.resolve("config"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ Exception e = expectThrows(Exception.class, () -> hunspellService.getDictionaryFromPackage("bad-pkg", "en_US"));
+ assertTrue(e.getMessage().contains("bad-pkg"));
+ }
+
+ public void testPackageMissingLocaleDir() throws Exception {
+ Path tempDir = createTempDir();
+ // Create package + hunspell dir but no locale subdir
+ Path hunspellDir = tempDir.resolve("config").resolve("analyzers").resolve("pkg-empty").resolve("hunspell");
+ java.nio.file.Files.createDirectories(hunspellDir);
+
+ Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
+ Environment environment = new Environment(settings, tempDir.resolve("config"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ Exception e = expectThrows(Exception.class, () -> hunspellService.getDictionaryFromPackage("pkg-empty", "en_US"));
+ assertTrue(e.getMessage().contains("en_US") || e.getMessage().contains("pkg-empty"));
+ }
+
+ public void testPackageMissingAffFile() throws Exception {
+ Path tempDir = createTempDir();
+ Path localeDir = tempDir.resolve("config").resolve("analyzers").resolve("pkg-noaff").resolve("hunspell").resolve("en_US");
+ java.nio.file.Files.createDirectories(localeDir);
+ // Only create .dic, no .aff
+ java.nio.file.Files.write(localeDir.resolve("en_US.dic"), java.util.Arrays.asList("1", "test"));
+
+ Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
+ Environment environment = new Environment(settings, tempDir.resolve("config"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ Exception e = expectThrows(Exception.class, () -> hunspellService.getDictionaryFromPackage("pkg-noaff", "en_US"));
+ assertTrue(e.getMessage().contains("affix") || e.getMessage().contains("pkg-noaff"));
+ }
+
+ public void testPathTraversalInPackageId() throws Exception {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+ .put(HUNSPELL_LAZY_LOAD.getKey(), true)
+ .build();
+ Environment environment = new Environment(settings, getDataPath("/indices/analyze/conf_dir"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ Exception e = expectThrows(Exception.class, () -> hunspellService.getDictionaryFromPackage("..", "en_US"));
+ assertNotNull(e);
+ }
+
+ public void testPathTraversalInLocale() throws Exception {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+ .put(HUNSPELL_LAZY_LOAD.getKey(), true)
+ .build();
+ Environment environment = new Environment(settings, getDataPath("/indices/analyze/conf_dir"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ Exception e = expectThrows(Exception.class, () -> hunspellService.getDictionaryFromPackage("test-pkg", "../en_US"));
+ assertNotNull(e);
+ }
+
+ public void testSlashInPackageId() throws Exception {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+ .put(HUNSPELL_LAZY_LOAD.getKey(), true)
+ .build();
+ Environment environment = new Environment(settings, getDataPath("/indices/analyze/conf_dir"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ Exception e = expectThrows(Exception.class, () -> hunspellService.getDictionaryFromPackage("foo/bar", "en_US"));
+ assertNotNull(e);
+ }
+
+ public void testBackslashInLocale() throws Exception {
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+ .put(HUNSPELL_LAZY_LOAD.getKey(), true)
+ .build();
+ Environment environment = new Environment(settings, getDataPath("/indices/analyze/conf_dir"));
+ HunspellService hunspellService = new HunspellService(settings, environment, emptyMap());
+
+ Exception e = expectThrows(Exception.class, () -> hunspellService.getDictionaryFromPackage("test-pkg", "en\\US"));
+ assertNotNull(e);
+ }
+
+ // Helper method to create minimal hunspell files for testing
+ private void createHunspellFiles(Path directory, String locale) throws java.io.IOException {
+ // Create .aff file
+ Path affFile = directory.resolve(locale + ".aff");
+ java.nio.file.Files.write(affFile, java.util.Arrays.asList("SET UTF-8", "SFX S Y 1", "SFX S 0 s ."));
+
+ // Create .dic file
+ Path dicFile = directory.resolve(locale + ".dic");
+ java.nio.file.Files.write(dicFile, java.util.Arrays.asList("3", "test/S", "word/S", "hello"));
+ }
}
diff --git a/server/src/test/resources/indices/analyze/conf_dir/analyzers/test-pkg/hunspell/en_US/en_US.aff b/server/src/test/resources/indices/analyze/conf_dir/analyzers/test-pkg/hunspell/en_US/en_US.aff
new file mode 100644
index 0000000000000..2ddd985437187
--- /dev/null
+++ b/server/src/test/resources/indices/analyze/conf_dir/analyzers/test-pkg/hunspell/en_US/en_US.aff
@@ -0,0 +1,201 @@
+SET ISO8859-1
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
+NOSUGGEST !
+
+# ordinal numbers
+COMPOUNDMIN 1
+# only in compounds: 1th, 2th, 3th
+ONLYINCOMPOUND c
+# compound rules:
+# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
+# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
+COMPOUNDRULE 2
+COMPOUNDRULE n*1t
+COMPOUNDRULE n*mp
+WORDCHARS 0123456789
+
+PFX A Y 1
+PFX A 0 re .
+
+PFX I Y 1
+PFX I 0 in .
+
+PFX U Y 1
+PFX U 0 un .
+
+PFX C Y 1
+PFX C 0 de .
+
+PFX E Y 1
+PFX E 0 dis .
+
+PFX F Y 1
+PFX F 0 con .
+
+PFX K Y 1
+PFX K 0 pro .
+
+SFX V N 2
+SFX V e ive e
+SFX V 0 ive [^e]
+
+SFX N Y 3
+SFX N e ion e
+SFX N y ication y
+SFX N 0 en [^ey]
+
+SFX X Y 3
+SFX X e ions e
+SFX X y ications y
+SFX X 0 ens [^ey]
+
+SFX H N 2
+SFX H y ieth y
+SFX H 0 th [^y]
+
+SFX Y Y 1
+SFX Y 0 ly .
+
+SFX G Y 2
+SFX G e ing e
+SFX G 0 ing [^e]
+
+SFX J Y 2
+SFX J e ings e
+SFX J 0 ings [^e]
+
+SFX D Y 4
+SFX D 0 d e
+SFX D y ied [^aeiou]y
+SFX D 0 ed [^ey]
+SFX D 0 ed [aeiou]y
+
+SFX T N 4
+SFX T 0 st e
+SFX T y iest [^aeiou]y
+SFX T 0 est [aeiou]y
+SFX T 0 est [^ey]
+
+SFX R Y 4
+SFX R 0 r e
+SFX R y ier [^aeiou]y
+SFX R 0 er [aeiou]y
+SFX R 0 er [^ey]
+
+SFX Z Y 4
+SFX Z 0 rs e
+SFX Z y iers [^aeiou]y
+SFX Z 0 ers [aeiou]y
+SFX Z 0 ers [^ey]
+
+SFX S Y 4
+SFX S y ies [^aeiou]y
+SFX S 0 s [aeiou]y
+SFX S 0 es [sxzh]
+SFX S 0 s [^sxzhy]
+
+SFX P Y 3
+SFX P y iness [^aeiou]y
+SFX P 0 ness [aeiou]y
+SFX P 0 ness [^y]
+
+SFX M Y 1
+SFX M 0 's .
+
+SFX B Y 3
+SFX B 0 able [^aeiou]
+SFX B 0 able ee
+SFX B e able [^aeiou]e
+
+SFX L Y 1
+SFX L 0 ment .
+
+REP 88
+REP a ei
+REP ei a
+REP a ey
+REP ey a
+REP ai ie
+REP ie ai
+REP are air
+REP are ear
+REP are eir
+REP air are
+REP air ere
+REP ere air
+REP ere ear
+REP ere eir
+REP ear are
+REP ear air
+REP ear ere
+REP eir are
+REP eir ere
+REP ch te
+REP te ch
+REP ch ti
+REP ti ch
+REP ch tu
+REP tu ch
+REP ch s
+REP s ch
+REP ch k
+REP k ch
+REP f ph
+REP ph f
+REP gh f
+REP f gh
+REP i igh
+REP igh i
+REP i uy
+REP uy i
+REP i ee
+REP ee i
+REP j di
+REP di j
+REP j gg
+REP gg j
+REP j ge
+REP ge j
+REP s ti
+REP ti s
+REP s ci
+REP ci s
+REP k cc
+REP cc k
+REP k qu
+REP qu k
+REP kw qu
+REP o eau
+REP eau o
+REP o ew
+REP ew o
+REP oo ew
+REP ew oo
+REP ew ui
+REP ui ew
+REP oo ui
+REP ui oo
+REP ew u
+REP u ew
+REP oo u
+REP u oo
+REP u oe
+REP oe u
+REP u ieu
+REP ieu u
+REP ue ew
+REP ew ue
+REP uff ough
+REP oo ieu
+REP ieu oo
+REP ier ear
+REP ear ier
+REP ear air
+REP air ear
+REP w qu
+REP qu w
+REP z ss
+REP ss z
+REP shun tion
+REP shun sion
+REP shun cion
diff --git a/server/src/test/resources/indices/analyze/conf_dir/analyzers/test-pkg/hunspell/en_US/en_US.dic b/server/src/test/resources/indices/analyze/conf_dir/analyzers/test-pkg/hunspell/en_US/en_US.dic
new file mode 100644
index 0000000000000..d278da593c573
--- /dev/null
+++ b/server/src/test/resources/indices/analyze/conf_dir/analyzers/test-pkg/hunspell/en_US/en_US.dic
@@ -0,0 +1,106 @@
+100
+test/S
+word/S
+hello
+world/S
+example/S
+package/S
+dictionary/S
+hunspell
+analysis
+search/S
+index/S
+document/S
+cluster/S
+node/S
+shard/S
+replica/S
+query/S
+filter/S
+token/S
+analyzer/S
+mapping/S
+setting/S
+request/S
+response/S
+action/S
+cache/S
+locale
+config
+plugin/S
+module/S
+server/S
+client/S
+service/S
+manager/S
+factory/S
+handler/S
+transport/S
+network/S
+thread/S
+pool/S
+memory
+storage
+engine/S
+snapshot/S
+restore
+backup/S
+monitor/S
+metric/S
+health
+status
+version/S
+update/S
+delete
+create
+read
+write
+merge
+refresh
+flush
+commit
+recover
+replicate
+allocate
+balance
+route
+forward
+ingest
+process
+transform
+validate
+authenticate
+authorize
+encrypt
+decrypt
+compress
+decompress
+serialize
+deserialize
+compute
+execute
+invoke
+dispatch
+publish
+subscribe
+notify
+broadcast
+stream
+buffer/S
+pipeline/S
+workflow/S
+template/S
+pattern/S
+schema/S
+format/S
+protocol/S
+endpoint/S
+interface/S
+abstract
+concrete
+virtual
+static
+dynamic
+public
+private
+secure
\ No newline at end of file