Skip to content

Commit e265931

Browse files
Finishing tests
1 parent a5718ec commit e265931

File tree

2 files changed

+335
-37
lines changed

2 files changed

+335
-37
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/common/MapPathExtractor.java

Lines changed: 153 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,125 @@
77

88
package org.elasticsearch.xpack.inference.common;
99

10+
import org.elasticsearch.common.Strings;
11+
1012
import java.util.ArrayList;
1113
import java.util.List;
1214
import java.util.Map;
13-
import java.util.regex.Matcher;
1415
import java.util.regex.Pattern;
1516

17+
/**
18+
* Extracts fields from a {@link Map}.
19+
*
20+
* Uses a subset of the JSONPath schema to extract fields from a map.
21+
* For more information <a href="https://en.wikipedia.org/wiki/JSONPath">see here</a>.
22+
*
23+
* This implementation differs in out it handles lists in that JSONPath will flatten inner lists. This implementation
24+
* preserves inner lists.
25+
*
26+
* Examples of the schema:
27+
*
28+
* <pre>
29+
* {@code
30+
* $.field1.array[*].field2
31+
* $.field1.field2
32+
* }
33+
* </pre>
34+
*
35+
* Given the map
36+
* <pre>
37+
* {@code
38+
* {
39+
* "request_id": "B4AB89C8-B135-xxxx-A6F8-2BAB801A2CE4",
40+
* "latency": 38,
41+
* "usage": {
42+
* "token_count": 3072
43+
* },
44+
* "result": {
45+
* "embeddings": [
46+
* {
47+
* "index": 0,
48+
* "embedding": [
49+
* 2,
50+
* 4
51+
* ]
52+
* },
53+
* {
54+
* "index": 1,
55+
* "embedding": [
56+
* 1,
57+
* 2
58+
* ]
59+
* }
60+
* ]
61+
* }
62+
* }
63+
* }
64+
* </pre>
65+
*
66+
* <pre>
67+
* {@code
68+
* var embeddings = MapPathExtractor.extract(map, "$.result.embeddings[*].embedding");
69+
* }
70+
* </pre>
71+
*
72+
* Will result in:
73+
*
74+
* <pre>
75+
* {@code
76+
* [
77+
* [2, 4],
78+
* [1, 2]
79+
* ]
80+
* }
81+
* </pre>
82+
*
83+
* This implementation differs from JSONPath when handling a list of maps. JSONPath will flatten the result and return a single array.
84+
* this implementation will preserve each nested list while gathering the results.
85+
*
86+
* For example
87+
*
88+
* <pre>
89+
* {@code
90+
* {
91+
* "result": [
92+
* {
93+
* "key": [
94+
* {
95+
* "a": 1.1
96+
* },
97+
* {
98+
* "a": 2.2
99+
* }
100+
* ]
101+
* },
102+
* {
103+
* "key": [
104+
* {
105+
* "a": 3.3
106+
* },
107+
* {
108+
* "a": 4.4
109+
* }
110+
* ]
111+
* }
112+
* ]
113+
* }
114+
* }
115+
* {@code var embeddings = MapPathExtractor.extract(map, "$.result[*].key[*].a");}
116+
*
117+
* JSONPath: {@code [1.1, 2.2, 3.3, 4.4]}
118+
* This implementation: {@code [[1.1, 2.2], [3.3, 4.4]]}
119+
* </pre>
120+
*/
16121
public class MapPathExtractor {
17122

18-
private static final String DOLLAR_DOT = "$.";
19123
private static final String DOLLAR = "$";
20124

125+
// default for testing
126+
static final Pattern dotFieldPattern = Pattern.compile("^\\.([^.\\[]+)(.*)");
127+
static final Pattern arrayWildcardPattern = Pattern.compile("^\\[\\*\\](.*)");
128+
21129
public static Object extract(Map<String, Object> data, String path) {
22130
if (data == null || data.isEmpty() || path == null || path.trim().isEmpty()) {
23131
return null;
@@ -26,67 +134,75 @@ public static Object extract(Map<String, Object> data, String path) {
26134
var cleanedPath = path.trim();
27135

28136
// Remove the prefix if it exists
29-
if (cleanedPath.startsWith(DOLLAR_DOT)) {
30-
cleanedPath = cleanedPath.substring(DOLLAR_DOT.length());
31-
} else if (cleanedPath.startsWith(DOLLAR)) {
137+
if (cleanedPath.startsWith(DOLLAR)) {
32138
cleanedPath = cleanedPath.substring(DOLLAR.length());
33139
}
34140

35141
return navigate(data, cleanedPath);
36142
}
37143

38144
private static Object navigate(Object current, String remainingPath) {
39-
if (remainingPath == null || remainingPath.isEmpty()) {
145+
if (current == null || remainingPath == null || remainingPath.isEmpty()) {
40146
return current;
41147
}
42148

43-
var dotFieldPattern = Pattern.compile("^\\.([^.\\[]+)(.*)");
44-
// var arrayIndexPattern = Pattern.compile("^\\[(\\d+)\\](.*)");
45-
var arrayWildcardPattern = Pattern.compile("^\\[\\*\\](.*)");
46-
47-
Matcher dotFieldMatcher = dotFieldPattern.matcher(remainingPath);
48-
// Matcher arrayIndexMatcher = arrayIndexPattern.matcher(remainingPath);
49-
Matcher arrayWildcardMatcher = arrayWildcardPattern.matcher(remainingPath);
149+
var dotFieldMatcher = dotFieldPattern.matcher(remainingPath);
150+
var arrayWildcardMatcher = arrayWildcardPattern.matcher(remainingPath);
50151

51152
if (dotFieldMatcher.matches()) {
52153
String field = dotFieldMatcher.group(1);
53-
String nextPath = dotFieldMatcher.group(2);
54-
if (current instanceof Map) {
55-
return navigate(((Map<?, ?>) current).get(field), nextPath);
154+
if (field == null || field.isEmpty()) {
155+
throw new IllegalArgumentException(
156+
Strings.format(
157+
"Unable to extract field from remaining path [%s]. Fields must be delimited by a dot character.",
158+
remainingPath
159+
)
160+
);
56161
}
57-
} else if (arrayIndexMatcher.matches()) {
58-
String indexStr = arrayIndexMatcher.group(1);
59-
String nextPath = arrayIndexMatcher.group(2);
60-
try {
61-
int index = Integer.parseInt(indexStr);
62-
if (current instanceof List) {
63-
List<?> list = (List<?>) current;
64-
if (index >= 0 && index < list.size()) {
65-
return navigate(list.get(index), nextPath);
66-
}
162+
163+
String nextPath = dotFieldMatcher.group(2);
164+
if (current instanceof Map<?, ?> currentMap) {
165+
var fieldFromMap = currentMap.get(field);
166+
if (fieldFromMap == null) {
167+
throw new IllegalArgumentException(Strings.format("Unable to find field [%s] in map", field));
67168
}
68-
} catch (NumberFormatException e) {
69-
// Ignore invalid index
169+
170+
return navigate(currentMap.get(field), nextPath);
171+
} else {
172+
throw new IllegalArgumentException(
173+
Strings.format(
174+
"Current path [%s] matched the dot field pattern but the current object is not a map, "
175+
+ "found invalid type [%s] instead.",
176+
remainingPath,
177+
current.getClass().getSimpleName()
178+
)
179+
);
70180
}
71181
} else if (arrayWildcardMatcher.matches()) {
72182
String nextPath = arrayWildcardMatcher.group(1);
73-
if (current instanceof List) {
74-
List<?> list = (List<?>) current;
183+
if (current instanceof List<?> list) {
75184
List<Object> results = new ArrayList<>();
185+
76186
for (Object item : list) {
77187
Object result = navigate(item, nextPath);
78188
if (result != null) {
79-
if (result instanceof List) {
80-
results.addAll((List<?>) result);
81-
} else {
82-
results.add(result);
83-
}
189+
results.add(result);
84190
}
85191
}
86-
return results.isEmpty() ? null : results;
192+
193+
return results;
194+
} else {
195+
throw new IllegalArgumentException(
196+
Strings.format(
197+
"Current path [%s] matched the array field pattern but the current object is not a list, "
198+
+ "found invalid type [%s] instead.",
199+
remainingPath,
200+
current.getClass().getSimpleName()
201+
)
202+
);
87203
}
88204
}
89205

90-
return null; // Path not found or invalid
206+
throw new IllegalArgumentException(Strings.format("Invalid path received [%s], unable to extract a field name.", remainingPath));
91207
}
92208
}

0 commit comments

Comments
 (0)