77
88package org .elasticsearch .xpack .inference .common ;
99
10+ import org .elasticsearch .common .Strings ;
11+
1012import java .util .ArrayList ;
1113import java .util .List ;
1214import java .util .Map ;
13- import java .util .regex .Matcher ;
1415import java .util .regex .Pattern ;
1516
17+ /**
18+ * Extracts fields from a {@link Map}.
19+ *
20+ * Uses a subset of the JSONPath schema to extract fields from a map.
21+ * For more information <a href="https://en.wikipedia.org/wiki/JSONPath">see here</a>.
22+ *
23+ * This implementation differs in out it handles lists in that JSONPath will flatten inner lists. This implementation
24+ * preserves inner lists.
25+ *
26+ * Examples of the schema:
27+ *
28+ * <pre>
29+ * {@code
30+ * $.field1.array[*].field2
31+ * $.field1.field2
32+ * }
33+ * </pre>
34+ *
35+ * Given the map
36+ * <pre>
37+ * {@code
38+ * {
39+ * "request_id": "B4AB89C8-B135-xxxx-A6F8-2BAB801A2CE4",
40+ * "latency": 38,
41+ * "usage": {
42+ * "token_count": 3072
43+ * },
44+ * "result": {
45+ * "embeddings": [
46+ * {
47+ * "index": 0,
48+ * "embedding": [
49+ * 2,
50+ * 4
51+ * ]
52+ * },
53+ * {
54+ * "index": 1,
55+ * "embedding": [
56+ * 1,
57+ * 2
58+ * ]
59+ * }
60+ * ]
61+ * }
62+ * }
63+ * }
64+ * </pre>
65+ *
66+ * <pre>
67+ * {@code
68+ * var embeddings = MapPathExtractor.extract(map, "$.result.embeddings[*].embedding");
69+ * }
70+ * </pre>
71+ *
72+ * Will result in:
73+ *
74+ * <pre>
75+ * {@code
76+ * [
77+ * [2, 4],
78+ * [1, 2]
79+ * ]
80+ * }
81+ * </pre>
82+ *
83+ * This implementation differs from JSONPath when handling a list of maps. JSONPath will flatten the result and return a single array.
84+ * this implementation will preserve each nested list while gathering the results.
85+ *
86+ * For example
87+ *
88+ * <pre>
89+ * {@code
90+ * {
91+ * "result": [
92+ * {
93+ * "key": [
94+ * {
95+ * "a": 1.1
96+ * },
97+ * {
98+ * "a": 2.2
99+ * }
100+ * ]
101+ * },
102+ * {
103+ * "key": [
104+ * {
105+ * "a": 3.3
106+ * },
107+ * {
108+ * "a": 4.4
109+ * }
110+ * ]
111+ * }
112+ * ]
113+ * }
114+ * }
115+ * {@code var embeddings = MapPathExtractor.extract(map, "$.result[*].key[*].a");}
116+ *
117+ * JSONPath: {@code [1.1, 2.2, 3.3, 4.4]}
118+ * This implementation: {@code [[1.1, 2.2], [3.3, 4.4]]}
119+ * </pre>
120+ */
16121public class MapPathExtractor {
17122
18- private static final String DOLLAR_DOT = "$." ;
19123 private static final String DOLLAR = "$" ;
20124
125+ // default for testing
126+ static final Pattern dotFieldPattern = Pattern .compile ("^\\ .([^.\\ []+)(.*)" );
127+ static final Pattern arrayWildcardPattern = Pattern .compile ("^\\ [\\ *\\ ](.*)" );
128+
21129 public static Object extract (Map <String , Object > data , String path ) {
22130 if (data == null || data .isEmpty () || path == null || path .trim ().isEmpty ()) {
23131 return null ;
@@ -26,67 +134,75 @@ public static Object extract(Map<String, Object> data, String path) {
26134 var cleanedPath = path .trim ();
27135
28136 // Remove the prefix if it exists
29- if (cleanedPath .startsWith (DOLLAR_DOT )) {
30- cleanedPath = cleanedPath .substring (DOLLAR_DOT .length ());
31- } else if (cleanedPath .startsWith (DOLLAR )) {
137+ if (cleanedPath .startsWith (DOLLAR )) {
32138 cleanedPath = cleanedPath .substring (DOLLAR .length ());
33139 }
34140
35141 return navigate (data , cleanedPath );
36142 }
37143
38144 private static Object navigate (Object current , String remainingPath ) {
39- if (remainingPath == null || remainingPath .isEmpty ()) {
145+ if (current == null || remainingPath == null || remainingPath .isEmpty ()) {
40146 return current ;
41147 }
42148
43- var dotFieldPattern = Pattern .compile ("^\\ .([^.\\ []+)(.*)" );
44- // var arrayIndexPattern = Pattern.compile("^\\[(\\d+)\\](.*)");
45- var arrayWildcardPattern = Pattern .compile ("^\\ [\\ *\\ ](.*)" );
46-
47- Matcher dotFieldMatcher = dotFieldPattern .matcher (remainingPath );
48- // Matcher arrayIndexMatcher = arrayIndexPattern.matcher(remainingPath);
49- Matcher arrayWildcardMatcher = arrayWildcardPattern .matcher (remainingPath );
149+ var dotFieldMatcher = dotFieldPattern .matcher (remainingPath );
150+ var arrayWildcardMatcher = arrayWildcardPattern .matcher (remainingPath );
50151
51152 if (dotFieldMatcher .matches ()) {
52153 String field = dotFieldMatcher .group (1 );
53- String nextPath = dotFieldMatcher .group (2 );
54- if (current instanceof Map ) {
55- return navigate (((Map <?, ?>) current ).get (field ), nextPath );
154+ if (field == null || field .isEmpty ()) {
155+ throw new IllegalArgumentException (
156+ Strings .format (
157+ "Unable to extract field from remaining path [%s]. Fields must be delimited by a dot character." ,
158+ remainingPath
159+ )
160+ );
56161 }
57- } else if (arrayIndexMatcher .matches ()) {
58- String indexStr = arrayIndexMatcher .group (1 );
59- String nextPath = arrayIndexMatcher .group (2 );
60- try {
61- int index = Integer .parseInt (indexStr );
62- if (current instanceof List ) {
63- List <?> list = (List <?>) current ;
64- if (index >= 0 && index < list .size ()) {
65- return navigate (list .get (index ), nextPath );
66- }
162+
163+ String nextPath = dotFieldMatcher .group (2 );
164+ if (current instanceof Map <?, ?> currentMap ) {
165+ var fieldFromMap = currentMap .get (field );
166+ if (fieldFromMap == null ) {
167+ throw new IllegalArgumentException (Strings .format ("Unable to find field [%s] in map" , field ));
67168 }
68- } catch (NumberFormatException e ) {
69- // Ignore invalid index
169+
170+ return navigate (currentMap .get (field ), nextPath );
171+ } else {
172+ throw new IllegalArgumentException (
173+ Strings .format (
174+ "Current path [%s] matched the dot field pattern but the current object is not a map, "
175+ + "found invalid type [%s] instead." ,
176+ remainingPath ,
177+ current .getClass ().getSimpleName ()
178+ )
179+ );
70180 }
71181 } else if (arrayWildcardMatcher .matches ()) {
72182 String nextPath = arrayWildcardMatcher .group (1 );
73- if (current instanceof List ) {
74- List <?> list = (List <?>) current ;
183+ if (current instanceof List <?> list ) {
75184 List <Object > results = new ArrayList <>();
185+
76186 for (Object item : list ) {
77187 Object result = navigate (item , nextPath );
78188 if (result != null ) {
79- if (result instanceof List ) {
80- results .addAll ((List <?>) result );
81- } else {
82- results .add (result );
83- }
189+ results .add (result );
84190 }
85191 }
86- return results .isEmpty () ? null : results ;
192+
193+ return results ;
194+ } else {
195+ throw new IllegalArgumentException (
196+ Strings .format (
197+ "Current path [%s] matched the array field pattern but the current object is not a list, "
198+ + "found invalid type [%s] instead." ,
199+ remainingPath ,
200+ current .getClass ().getSimpleName ()
201+ )
202+ );
87203 }
88204 }
89205
90- return null ; // Path not found or invalid
206+ throw new IllegalArgumentException ( Strings . format ( "Invalid path received [%s], unable to extract a field name." , remainingPath ));
91207 }
92208}
0 commit comments