Skip to content

Commit 7625490

Browse files
kderussoelasticsearchmachine
andauthored
Refactor ChunkingSettings classes from Inference plugin to XPack core (#136219)
* Refactor ChunkingSettings classes from Inference plugin to XPack core * [CI] Auto commit changes from spotless * Move tests from ServiceUtils to InferenceUtils * Refactor/move chunking settings tests * [CI] Auto commit changes from spotless --------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent fe44986 commit 7625490

File tree

88 files changed

+1091
-868
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+1091
-868
lines changed

x-pack/plugin/core/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@
237237
exports org.elasticsearch.xpack.core.watcher.watch;
238238
exports org.elasticsearch.xpack.core.watcher;
239239
exports org.elasticsearch.xpack.core.common.chunks;
240+
exports org.elasticsearch.xpack.core.inference.chunking;
240241

241242
provides org.elasticsearch.action.admin.cluster.node.info.ComponentVersionNumber
242243
with

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java

Lines changed: 215 additions & 175 deletions
Large diffs are not rendered by default.
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.core.inference;
9+
10+
import org.elasticsearch.common.ValidationException;
11+
import org.elasticsearch.core.Strings;
12+
13+
import java.util.Arrays;
14+
import java.util.EnumSet;
15+
import java.util.List;
16+
import java.util.Locale;
17+
import java.util.Map;
18+
19+
import static org.elasticsearch.core.Strings.format;
20+
21+
public class InferenceUtils {
22+
23+
private InferenceUtils() {}
24+
25+
/**
26+
* Remove the object from the map and cast to the expected type.
27+
* If the object cannot be cast to type and error is added to the
28+
* {@code validationException} parameter
29+
*
30+
* @param sourceMap Map containing fields
31+
* @param key The key of the object to remove
32+
* @param type The expected type of the removed object
33+
* @param validationException If the value is not of type {@code type}
34+
* @return {@code null} if not present else the object cast to type T
35+
* @param <T> The expected type
36+
*/
37+
@SuppressWarnings("unchecked")
38+
public static <T> T removeAsType(Map<String, Object> sourceMap, String key, Class<T> type, ValidationException validationException) {
39+
if (sourceMap == null) {
40+
validationException.addValidationError(Strings.format("Encountered a null input map while parsing field [%s]", key));
41+
return null;
42+
}
43+
44+
Object o = sourceMap.remove(key);
45+
if (o == null) {
46+
return null;
47+
}
48+
49+
if (type.isAssignableFrom(o.getClass())) {
50+
return (T) o;
51+
} else {
52+
validationException.addValidationError(invalidTypeErrorMsg(key, o, type.getSimpleName()));
53+
return null;
54+
}
55+
}
56+
57+
public static String extractOptionalString(
58+
Map<String, Object> map,
59+
String settingName,
60+
String scope,
61+
ValidationException validationException
62+
) {
63+
int initialValidationErrorCount = validationException.validationErrors().size();
64+
String optionalField = removeAsType(map, settingName, String.class, validationException);
65+
66+
if (validationException.validationErrors().size() > initialValidationErrorCount) {
67+
// new validation error occurred
68+
return null;
69+
}
70+
71+
if (optionalField != null && optionalField.isEmpty()) {
72+
validationException.addValidationError(mustBeNonEmptyString(settingName, scope));
73+
}
74+
75+
if (validationException.validationErrors().size() > initialValidationErrorCount) {
76+
return null;
77+
}
78+
79+
return optionalField;
80+
}
81+
82+
public static Integer extractRequiredPositiveInteger(
83+
Map<String, Object> map,
84+
String settingName,
85+
String scope,
86+
ValidationException validationException
87+
) {
88+
int initialValidationErrorCount = validationException.validationErrors().size();
89+
Integer field = InferenceUtils.removeAsType(map, settingName, Integer.class, validationException);
90+
91+
if (validationException.validationErrors().size() > initialValidationErrorCount) {
92+
return null;
93+
}
94+
95+
if (field == null) {
96+
validationException.addValidationError(InferenceUtils.missingSettingErrorMsg(settingName, scope));
97+
} else if (field <= 0) {
98+
validationException.addValidationError(InferenceUtils.mustBeAPositiveIntegerErrorMessage(settingName, scope, field));
99+
}
100+
101+
if (validationException.validationErrors().size() > initialValidationErrorCount) {
102+
return null;
103+
}
104+
105+
return field;
106+
}
107+
108+
public static Integer extractRequiredPositiveIntegerGreaterThanOrEqualToMin(
109+
Map<String, Object> map,
110+
String settingName,
111+
int minValue,
112+
String scope,
113+
ValidationException validationException
114+
) {
115+
Integer field = extractRequiredPositiveInteger(map, settingName, scope, validationException);
116+
117+
if (field != null && field < minValue) {
118+
validationException.addValidationError(
119+
InferenceUtils.mustBeGreaterThanOrEqualNumberErrorMessage(settingName, scope, field, minValue)
120+
);
121+
return null;
122+
}
123+
124+
return field;
125+
}
126+
127+
public static Integer extractRequiredPositiveIntegerLessThanOrEqualToMax(
128+
Map<String, Object> map,
129+
String settingName,
130+
int maxValue,
131+
String scope,
132+
ValidationException validationException
133+
) {
134+
Integer field = extractRequiredPositiveInteger(map, settingName, scope, validationException);
135+
136+
if (field != null && field > maxValue) {
137+
validationException.addValidationError(
138+
InferenceUtils.mustBeLessThanOrEqualNumberErrorMessage(settingName, scope, field, maxValue)
139+
);
140+
}
141+
142+
return field;
143+
}
144+
145+
@SuppressWarnings("unchecked")
146+
public static <T> List<T> extractOptionalList(
147+
Map<String, Object> map,
148+
String settingName,
149+
Class<T> type,
150+
ValidationException validationException
151+
) {
152+
int initialValidationErrorCount = validationException.validationErrors().size();
153+
var optionalField = InferenceUtils.removeAsType(map, settingName, List.class, validationException);
154+
155+
if (validationException.validationErrors().size() > initialValidationErrorCount) {
156+
return null;
157+
}
158+
159+
if (optionalField != null) {
160+
for (Object o : optionalField) {
161+
if (o.getClass().equals(type) == false) {
162+
validationException.addValidationError(InferenceUtils.invalidTypeErrorMsg(settingName, o, "String"));
163+
}
164+
}
165+
}
166+
167+
if (validationException.validationErrors().size() > initialValidationErrorCount) {
168+
return null;
169+
}
170+
171+
return (List<T>) optionalField;
172+
}
173+
174+
public static <E extends Enum<E>> E extractOptionalEnum(
175+
Map<String, Object> map,
176+
String settingName,
177+
String scope,
178+
EnumConstructor<E> constructor,
179+
EnumSet<E> validValues,
180+
ValidationException validationException
181+
) {
182+
var enumString = extractOptionalString(map, settingName, scope, validationException);
183+
if (enumString == null) {
184+
return null;
185+
}
186+
187+
try {
188+
var createdEnum = constructor.apply(enumString);
189+
validateEnumValue(createdEnum, validValues);
190+
191+
return createdEnum;
192+
} catch (IllegalArgumentException e) {
193+
var validValuesAsStrings = validValues.stream().map(value -> value.toString().toLowerCase(Locale.ROOT)).toArray(String[]::new);
194+
validationException.addValidationError(invalidValue(settingName, scope, enumString, validValuesAsStrings));
195+
}
196+
197+
return null;
198+
}
199+
200+
private static <E extends Enum<E>> void validateEnumValue(E enumValue, EnumSet<E> validValues) {
201+
if (validValues.contains(enumValue) == false) {
202+
throw new IllegalArgumentException(Strings.format("Enum value [%s] is not one of the acceptable values", enumValue.toString()));
203+
}
204+
}
205+
206+
public static String mustBeNonEmptyString(String settingName, String scope) {
207+
return Strings.format("[%s] Invalid value empty string. [%s] must be a non-empty string", scope, settingName);
208+
}
209+
210+
public static String invalidValue(String settingName, String scope, String invalidType, String[] requiredValues) {
211+
var copyOfRequiredValues = requiredValues.clone();
212+
Arrays.sort(copyOfRequiredValues);
213+
214+
return Strings.format(
215+
"[%s] Invalid value [%s] received. [%s] must be one of [%s]",
216+
scope,
217+
invalidType,
218+
settingName,
219+
String.join(", ", copyOfRequiredValues)
220+
);
221+
}
222+
223+
public static String invalidTypeErrorMsg(String settingName, Object foundObject, String expectedType) {
224+
return Strings.format(
225+
"field [%s] is not of the expected type. The value [%s] cannot be converted to a [%s]",
226+
settingName,
227+
foundObject,
228+
expectedType
229+
);
230+
}
231+
232+
public static String missingSettingErrorMsg(String settingName, String scope) {
233+
return Strings.format("[%s] does not contain the required setting [%s]", scope, settingName);
234+
}
235+
236+
public static String mustBeGreaterThanOrEqualNumberErrorMessage(String settingName, String scope, double value, double minValue) {
237+
return format("[%s] Invalid value [%s]. [%s] must be a greater than or equal to [%s]", scope, value, settingName, minValue);
238+
}
239+
240+
public static String mustBeLessThanOrEqualNumberErrorMessage(String settingName, String scope, double value, double maxValue) {
241+
return format("[%s] Invalid value [%s]. [%s] must be a less than or equal to [%s]", scope, value, settingName, maxValue);
242+
}
243+
244+
public static String mustBeAPositiveIntegerErrorMessage(String settingName, String scope, int value) {
245+
return format("[%s] Invalid value [%s]. [%s] must be a positive integer", scope, value, settingName);
246+
}
247+
248+
/**
249+
* Functional interface for creating an enum from a string.
250+
* @param <E>
251+
*/
252+
@FunctionalInterface
253+
public interface EnumConstructor<E extends Enum<E>> {
254+
E apply(String name) throws IllegalArgumentException;
255+
}
256+
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.inference.chunking;
8+
package org.elasticsearch.xpack.core.inference.chunking;
99

1010
import org.elasticsearch.inference.ChunkingSettings;
1111
import org.elasticsearch.inference.ChunkingStrategy;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.inference.chunking;
8+
package org.elasticsearch.xpack.core.inference.chunking;
99

1010
public enum ChunkingSettingsOptions {
1111
STRATEGY("strategy"),
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.inference.chunking;
8+
package org.elasticsearch.xpack.core.inference.chunking;
99

1010
import org.elasticsearch.TransportVersion;
1111
import org.elasticsearch.common.Strings;
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.inference.chunking;
8+
package org.elasticsearch.xpack.core.inference.chunking;
99

1010
import org.elasticsearch.TransportVersion;
1111
import org.elasticsearch.common.Strings;
@@ -16,7 +16,7 @@
1616
import org.elasticsearch.inference.ChunkingStrategy;
1717
import org.elasticsearch.inference.ModelConfigurations;
1818
import org.elasticsearch.xcontent.XContentBuilder;
19-
import org.elasticsearch.xpack.inference.services.ServiceUtils;
19+
import org.elasticsearch.xpack.core.inference.InferenceUtils;
2020

2121
import java.io.IOException;
2222
import java.util.Arrays;
@@ -81,15 +81,15 @@ public static RecursiveChunkingSettings fromMap(Map<String, Object> map) {
8181
);
8282
}
8383

84-
Integer maxChunkSize = ServiceUtils.extractRequiredPositiveIntegerGreaterThanOrEqualToMin(
84+
Integer maxChunkSize = InferenceUtils.extractRequiredPositiveIntegerGreaterThanOrEqualToMin(
8585
map,
8686
ChunkingSettingsOptions.MAX_CHUNK_SIZE.toString(),
8787
MAX_CHUNK_SIZE_LOWER_LIMIT,
8888
ModelConfigurations.CHUNKING_SETTINGS,
8989
validationException
9090
);
9191

92-
SeparatorGroup separatorGroup = ServiceUtils.extractOptionalEnum(
92+
SeparatorGroup separatorGroup = InferenceUtils.extractOptionalEnum(
9393
map,
9494
ChunkingSettingsOptions.SEPARATOR_GROUP.toString(),
9595
ModelConfigurations.CHUNKING_SETTINGS,
@@ -98,7 +98,7 @@ public static RecursiveChunkingSettings fromMap(Map<String, Object> map) {
9898
validationException
9999
);
100100

101-
List<String> separators = ServiceUtils.extractOptionalList(
101+
List<String> separators = InferenceUtils.extractOptionalList(
102102
map,
103103
ChunkingSettingsOptions.SEPARATORS.toString(),
104104
String.class,
Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.inference.chunking;
8+
package org.elasticsearch.xpack.core.inference.chunking;
99

1010
import org.elasticsearch.TransportVersion;
1111
import org.elasticsearch.TransportVersions;
@@ -18,7 +18,7 @@
1818
import org.elasticsearch.inference.ChunkingStrategy;
1919
import org.elasticsearch.inference.ModelConfigurations;
2020
import org.elasticsearch.xcontent.XContentBuilder;
21-
import org.elasticsearch.xpack.inference.services.ServiceUtils;
21+
import org.elasticsearch.xpack.core.inference.InferenceUtils;
2222

2323
import java.io.IOException;
2424
import java.util.Arrays;
@@ -57,6 +57,10 @@ public Integer maxChunkSize() {
5757
return maxChunkSize;
5858
}
5959

60+
public int sentenceOverlap() {
61+
return sentenceOverlap;
62+
}
63+
6064
@Override
6165
public void validate() {
6266
ValidationException validationException = new ValidationException();
@@ -100,15 +104,15 @@ public static SentenceBoundaryChunkingSettings fromMap(Map<String, Object> map)
100104
);
101105
}
102106

103-
Integer maxChunkSize = ServiceUtils.extractRequiredPositiveIntegerGreaterThanOrEqualToMin(
107+
Integer maxChunkSize = InferenceUtils.extractRequiredPositiveIntegerGreaterThanOrEqualToMin(
104108
map,
105109
ChunkingSettingsOptions.MAX_CHUNK_SIZE.toString(),
106110
MAX_CHUNK_SIZE_LOWER_LIMIT,
107111
ModelConfigurations.CHUNKING_SETTINGS,
108112
validationException
109113
);
110114

111-
Integer sentenceOverlap = ServiceUtils.removeAsType(
115+
Integer sentenceOverlap = InferenceUtils.removeAsType(
112116
map,
113117
ChunkingSettingsOptions.SENTENCE_OVERLAP.toString(),
114118
Integer.class,
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.inference.chunking;
8+
package org.elasticsearch.xpack.core.inference.chunking;
99

1010
import java.util.List;
1111
import java.util.Locale;

0 commit comments

Comments
 (0)