Skip to content

Commit 8b62851

Browse files
Load synonyms from system index for analyzers (#96674)
- Create a new option of "synonyms_set" for synonym set filter that specifies which synonyms set to be loaded from the system ".synonyms" index - On index creation for this option load synonyms set from index - If synonyms set doesn't exist, index creation request still succeeds, but shards are not allocated, so the cluster state will be read. Note: this is a temporary solution, as: - No check is done on master node, as fake synonyms are provided - On shard on index creation we use a blocking operation in the cluster applier thread
1 parent 946ffa3 commit 8b62851

File tree

9 files changed

+264
-15
lines changed

9 files changed

+264
-15
lines changed

modules/analysis-common/build.gradle

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
* in compliance with, at your election, the Elastic License 2.0 or the Server
66
* Side Public License, v 1.
77
*/
8+
import org.elasticsearch.gradle.Version
9+
810
apply plugin: 'elasticsearch.legacy-yaml-rest-test'
911
apply plugin: 'elasticsearch.legacy-yaml-rest-compat-test'
1012
apply plugin: 'elasticsearch.internal-cluster-test'
@@ -17,10 +19,16 @@ esplugin {
1719

1820
restResources {
1921
restApi {
20-
include '_common', 'indices', 'index', 'cluster', 'search', 'nodes', 'bulk', 'termvectors', 'explain', 'count'
22+
include '_common', 'indices', 'index', 'cluster', 'search', 'nodes', 'bulk', 'termvectors', 'explain', 'count', 'synonyms.put'
2123
}
2224
}
2325

26+
testClusters.configureEach {
27+
module ':modules:reindex'
28+
module ':modules:mapper-extras'
29+
requiresFeature 'es.synonyms_feature_flag_enabled', Version.fromString("8.9.0")
30+
}
31+
2432
dependencies {
2533
compileOnly project(':modules:lang-painless:spi')
2634
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
import org.elasticsearch.repositories.RepositoriesService;
130130
import org.elasticsearch.script.ScriptContext;
131131
import org.elasticsearch.script.ScriptService;
132+
import org.elasticsearch.synonyms.SynonymsManagementAPIService;
132133
import org.elasticsearch.threadpool.ThreadPool;
133134
import org.elasticsearch.tracing.Tracer;
134135
import org.elasticsearch.watcher.ResourceWatcherService;
@@ -151,6 +152,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
151152
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonAnalysisPlugin.class);
152153

153154
private final SetOnce<ScriptService> scriptServiceHolder = new SetOnce<>();
155+
private final SetOnce<SynonymsManagementAPIService> synonymsManagementServiceHolder = new SetOnce<>();
156+
private final SetOnce<ThreadPool> threadPoolHolder = new SetOnce<>();
154157

155158
@Override
156159
public Collection<Object> createComponents(
@@ -169,6 +172,8 @@ public Collection<Object> createComponents(
169172
AllocationService allocationService
170173
) {
171174
this.scriptServiceHolder.set(scriptService);
175+
this.synonymsManagementServiceHolder.set(new SynonymsManagementAPIService(client));
176+
this.threadPoolHolder.set(threadPool);
172177
return Collections.emptyList();
173178
}
174179

@@ -332,8 +337,25 @@ public TokenStream create(TokenStream tokenStream) {
332337
filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
333338
filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
334339
filters.put("stemmer", StemmerTokenFilterFactory::new);
335-
filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
336-
filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
340+
filters.put(
341+
"synonym",
342+
requiresAnalysisSettings(
343+
(i, e, n, s) -> new SynonymTokenFilterFactory(i, e, n, s, synonymsManagementServiceHolder.get(), threadPoolHolder.get())
344+
)
345+
);
346+
filters.put(
347+
"synonym_graph",
348+
requiresAnalysisSettings(
349+
(i, e, n, s) -> new SynonymGraphTokenFilterFactory(
350+
i,
351+
e,
352+
n,
353+
s,
354+
synonymsManagementServiceHolder.get(),
355+
threadPoolHolder.get()
356+
)
357+
)
358+
);
337359
filters.put("trim", TrimTokenFilterFactory::new);
338360
filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
339361
filters.put("unique", UniqueTokenFilterFactory::new);

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymGraphTokenFilterFactory.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,23 @@
1919
import org.elasticsearch.index.analysis.CharFilterFactory;
2020
import org.elasticsearch.index.analysis.TokenFilterFactory;
2121
import org.elasticsearch.index.analysis.TokenizerFactory;
22+
import org.elasticsearch.synonyms.SynonymsManagementAPIService;
23+
import org.elasticsearch.threadpool.ThreadPool;
2224

2325
import java.util.List;
2426
import java.util.function.Function;
2527

2628
public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
2729

28-
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
29-
super(indexSettings, env, name, settings);
30+
SynonymGraphTokenFilterFactory(
31+
IndexSettings indexSettings,
32+
Environment env,
33+
String name,
34+
Settings settings,
35+
SynonymsManagementAPIService synonymsManagementAPIService,
36+
ThreadPool threadPool
37+
) {
38+
super(indexSettings, env, name, settings, synonymsManagementAPIService, threadPool);
3039
}
3140

3241
@Override

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.apache.lucene.analysis.TokenStream;
1313
import org.apache.lucene.analysis.synonym.SynonymFilter;
1414
import org.apache.lucene.analysis.synonym.SynonymMap;
15+
import org.elasticsearch.cluster.service.MasterService;
1516
import org.elasticsearch.common.logging.DeprecationCategory;
1617
import org.elasticsearch.common.logging.DeprecationLogger;
1718
import org.elasticsearch.common.settings.Settings;
@@ -24,6 +25,9 @@
2425
import org.elasticsearch.index.analysis.CustomAnalyzer;
2526
import org.elasticsearch.index.analysis.TokenFilterFactory;
2627
import org.elasticsearch.index.analysis.TokenizerFactory;
28+
import org.elasticsearch.synonyms.SynonymsAPI;
29+
import org.elasticsearch.synonyms.SynonymsManagementAPIService;
30+
import org.elasticsearch.threadpool.ThreadPool;
2731

2832
import java.io.Reader;
2933
import java.io.StringReader;
@@ -40,8 +44,17 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
4044
protected final Settings settings;
4145
protected final Environment environment;
4246
protected final AnalysisMode analysisMode;
43-
44-
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
47+
private final SynonymsManagementAPIService synonymsManagementAPIService;
48+
private final ThreadPool threadPool;
49+
50+
SynonymTokenFilterFactory(
51+
IndexSettings indexSettings,
52+
Environment env,
53+
String name,
54+
Settings settings,
55+
SynonymsManagementAPIService synonymsManagementAPIService,
56+
ThreadPool threadPool
57+
) {
4558
super(name, settings);
4659
this.settings = settings;
4760

@@ -53,13 +66,14 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
5366
+ "Instead, insert a lowercase filter in the filter chain before the synonym_graph filter."
5467
);
5568
}
56-
5769
this.expand = settings.getAsBoolean("expand", true);
5870
this.lenient = settings.getAsBoolean("lenient", false);
5971
this.format = settings.get("format", "");
6072
boolean updateable = settings.getAsBoolean("updateable", false);
6173
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
6274
this.environment = env;
75+
this.synonymsManagementAPIService = synonymsManagementAPIService;
76+
this.threadPool = threadPool;
6377
}
6478

6579
@Override
@@ -145,11 +159,30 @@ protected ReaderWithOrigin getRulesFromSettings(Environment env) {
145159
sb.append(line).append(System.lineSeparator());
146160
}
147161
return new ReaderWithOrigin(new StringReader(sb.toString()), "'" + name() + "' analyzer settings");
162+
} else if ((settings.get("synonyms_set") != null) && SynonymsAPI.isEnabled()) {
163+
if (analysisMode != AnalysisMode.SEARCH_TIME) {
164+
throw new IllegalArgumentException(
165+
"Can't apply [synonyms_set]! " + "Loading synonyms from index is supported only for search time synonyms!"
166+
);
167+
}
168+
String synonymsSet = settings.get("synonyms_set", null);
169+
// provide fake synonyms on master thread, as on Master an analyzer is built for validation only
170+
if (MasterService.isMasterUpdateThread()) {
171+
return new ReaderWithOrigin(
172+
new StringReader("fake rule => fake"),
173+
"fake [" + synonymsSet + "] synonyms_set in .synonyms index"
174+
);
175+
}
176+
return new ReaderWithOrigin(
177+
Analysis.getReaderFromIndex(synonymsSet, threadPool, synonymsManagementAPIService),
178+
"[" + synonymsSet + "] synonyms_set in .synonyms index"
179+
);
148180
} else if (settings.get("synonyms_path") != null) {
149181
String synonyms_path = settings.get("synonyms_path", null);
150182
return new ReaderWithOrigin(Analysis.getReaderFromFile(env, synonyms_path, "synonyms_path"), synonyms_path);
151183
} else {
152-
throw new IllegalArgumentException("synonym requires either `synonyms` or `synonyms_path` to be configured");
184+
String err = SynonymsAPI.isEnabled() ? "`synonyms_set`," : "";
185+
throw new IllegalArgumentException("synonym requires either `synonyms`," + err + " or `synonyms_path` to be configured");
153186
}
154187
}
155188

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PredicateTokenScriptFilterTests.java

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
package org.elasticsearch.analysis.common;
1010

1111
import org.elasticsearch.Version;
12+
import org.elasticsearch.action.ActionListener;
13+
import org.elasticsearch.action.ActionRequest;
14+
import org.elasticsearch.action.ActionResponse;
15+
import org.elasticsearch.action.ActionType;
16+
import org.elasticsearch.client.internal.Client;
17+
import org.elasticsearch.client.internal.support.AbstractClient;
1218
import org.elasticsearch.cluster.metadata.IndexMetadata;
1319
import org.elasticsearch.common.settings.Settings;
1420
import org.elasticsearch.env.Environment;
@@ -23,6 +29,7 @@
2329
import org.elasticsearch.script.ScriptService;
2430
import org.elasticsearch.test.ESTokenStreamTestCase;
2531
import org.elasticsearch.test.IndexSettingsModule;
32+
import org.elasticsearch.threadpool.ThreadPool;
2633
import org.elasticsearch.tracing.Tracer;
2734

2835
import java.io.IOException;
@@ -58,9 +65,9 @@ public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryTyp
5865
return (FactoryType) factory;
5966
}
6067
};
61-
68+
Client client = new MockClient(Settings.EMPTY, null);
6269
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
63-
plugin.createComponents(null, null, null, null, scriptService, null, null, null, null, null, null, Tracer.NOOP, null);
70+
plugin.createComponents(client, null, null, null, scriptService, null, null, null, null, null, null, Tracer.NOOP, null);
6471
AnalysisModule module = new AnalysisModule(
6572
TestEnvironment.newEnvironment(settings),
6673
Collections.singletonList(plugin),
@@ -76,4 +83,20 @@ public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryTyp
7683

7784
}
7885

86+
private class MockClient extends AbstractClient {
87+
MockClient(Settings settings, ThreadPool threadPool) {
88+
super(settings, threadPool);
89+
}
90+
91+
@Override
92+
public void close() {}
93+
94+
@Override
95+
protected <Request extends ActionRequest, Response extends ActionResponse> void doExecute(
96+
ActionType<Response> action,
97+
Request request,
98+
ActionListener<Response> listener
99+
) {}
100+
}
101+
79102
}

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
package org.elasticsearch.analysis.common;
1010

1111
import org.elasticsearch.Version;
12+
import org.elasticsearch.action.ActionListener;
13+
import org.elasticsearch.action.ActionRequest;
14+
import org.elasticsearch.action.ActionResponse;
15+
import org.elasticsearch.action.ActionType;
16+
import org.elasticsearch.client.internal.Client;
17+
import org.elasticsearch.client.internal.support.AbstractClient;
1218
import org.elasticsearch.cluster.metadata.IndexMetadata;
1319
import org.elasticsearch.common.settings.Settings;
1420
import org.elasticsearch.env.Environment;
@@ -23,6 +29,7 @@
2329
import org.elasticsearch.script.ScriptService;
2430
import org.elasticsearch.test.ESTokenStreamTestCase;
2531
import org.elasticsearch.test.IndexSettingsModule;
32+
import org.elasticsearch.threadpool.ThreadPool;
2633
import org.elasticsearch.tracing.Tracer;
2734

2835
import java.util.Collections;
@@ -58,9 +65,9 @@ public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryTyp
5865
return (FactoryType) factory;
5966
}
6067
};
61-
68+
Client client = new MockClient(Settings.EMPTY, null);
6269
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
63-
plugin.createComponents(null, null, null, null, scriptService, null, null, null, null, null, null, Tracer.NOOP, null);
70+
plugin.createComponents(client, null, null, null, scriptService, null, null, null, null, null, null, Tracer.NOOP, null);
6471
AnalysisModule module = new AnalysisModule(
6572
TestEnvironment.newEnvironment(settings),
6673
Collections.singletonList(plugin),
@@ -76,4 +83,20 @@ public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryTyp
7683

7784
}
7885

86+
private class MockClient extends AbstractClient {
87+
MockClient(Settings settings, ThreadPool threadPool) {
88+
super(settings, threadPool);
89+
}
90+
91+
@Override
92+
public void close() {}
93+
94+
@Override
95+
protected <Request extends ActionRequest, Response extends ActionResponse> void doExecute(
96+
ActionType<Response> action,
97+
Request request,
98+
ActionListener<Response> listener
99+
) {}
100+
}
101+
79102
}

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SynonymsAnalysisTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ public void testTokenFiltersBypassSynonymAnalysis() throws IOException {
268268
for (String factory : bypassingFactories) {
269269
TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, null, factory, settings);
270270
TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, null, "keyword", settings);
271-
SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings);
271+
SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings, null, null);
272272
Analyzer analyzer = SynonymTokenFilterFactory.buildSynonymAnalyzer(
273273
tok,
274274
Collections.emptyList(),
@@ -338,7 +338,7 @@ public void testDisallowedTokenFilters() throws IOException {
338338
for (String factory : disallowedFactories) {
339339
TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, null, factory, settings);
340340
TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, null, "keyword", settings);
341-
SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings);
341+
SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings, null, null);
342342

343343
IllegalArgumentException e = expectThrows(
344344
IllegalArgumentException.class,

0 commit comments

Comments
 (0)