Skip to content

Commit 3de1466

Browse files
committed
add some tests and stuff
1 parent 0a8bdf3 commit 3de1466

File tree

8 files changed

+94
-86
lines changed

8 files changed

+94
-86
lines changed

libs/grok/src/main/java/org/elasticsearch/grok/Grok.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,4 +257,32 @@ public Regex getCompiledExpression() {
257257
return compiledExpression;
258258
}
259259

260+
public static String combinePatterns(List<String> patterns) {
261+
return combinePatterns(patterns, null);
262+
}
263+
264+
public static String combinePatterns(List<String> patterns, String traceMatchKey) {
265+
String combinedPattern;
266+
if (patterns.size() > 1) {
267+
combinedPattern = "";
268+
for (int i = 0; i < patterns.size(); i++) {
269+
String pattern = patterns.get(i);
270+
String valueWrap;
271+
if (traceMatchKey != null) {
272+
valueWrap = "(?<" + traceMatchKey + "." + i + ">" + pattern + ")";
273+
} else {
274+
valueWrap = "(?:" + patterns.get(i) + ")";
275+
}
276+
if (combinedPattern.isEmpty()) {
277+
combinedPattern = valueWrap;
278+
} else {
279+
combinedPattern = combinedPattern + "|" + valueWrap;
280+
}
281+
}
282+
} else {
283+
combinedPattern = patterns.getFirst();
284+
}
285+
286+
return combinedPattern;
287+
}
260288
}

libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,25 @@ private void testLogCallBack(boolean ecsCompatibility) {
927927
assertThat(message.get(), containsString("regular expression has redundant nested repeat operator"));
928928
}
929929

930+
public void testCombinedPatterns() {
931+
String matchKey = "_ingest._grok_match_index";
932+
String combined;
933+
combined = Grok.combinePatterns(List.of(""), null);
934+
assertThat(combined, equalTo(""));
935+
combined = Grok.combinePatterns(List.of(""), matchKey);
936+
assertThat(combined, equalTo(""));
937+
combined = Grok.combinePatterns(List.of("foo"), null);
938+
assertThat(combined, equalTo("foo"));
939+
combined = Grok.combinePatterns(List.of("foo"), matchKey);
940+
assertThat(combined, equalTo("foo"));
941+
combined = Grok.combinePatterns(List.of("foo", "bar"), null);
942+
assertThat(combined, equalTo("(?:foo)|(?:bar)"));
943+
combined = Grok.combinePatterns(List.of("foo", "bar"));
944+
assertThat(combined, equalTo("(?:foo)|(?:bar)"));
945+
combined = Grok.combinePatterns(List.of("foo", "bar"), matchKey);
946+
assertThat(combined, equalTo("(?<_ingest._grok_match_index.0>foo)|(?<_ingest._grok_match_index.1>bar)"));
947+
}
948+
930949
private void assertGrokedField(String fieldName) {
931950
String line = "foo";
932951
// test both with and without ECS compatibility

modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,15 @@ public final class GrokProcessor extends AbstractProcessor {
5353
MatcherWatchdog matcherWatchdog
5454
) {
5555
super(tag, description);
56+
String combinedPattern = Grok.combinePatterns(matchPatterns, traceMatch ? PATTERN_MATCH_KEY : null);
5657
this.matchField = matchField;
5758
this.matchPatterns = matchPatterns;
58-
this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::debug);
59+
this.grok = new Grok(patternBank, combinedPattern, matcherWatchdog, logger::debug);
5960
this.traceMatch = traceMatch;
6061
this.ignoreMissing = ignoreMissing;
6162
// Joni warnings are only emitted on an attempt to match, and the warning emitted for every call to match which is too verbose
6263
// so here we emit a warning (if there is one) to the logfile at warn level on construction / processor creation.
63-
new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::warn).match("___nomatch___");
64+
new Grok(patternBank, combinedPattern, matcherWatchdog, logger::warn).match("___nomatch___");
6465
}
6566

6667
@Override
@@ -113,31 +114,6 @@ List<String> getMatchPatterns() {
113114
return matchPatterns;
114115
}
115116

116-
static String combinePatterns(List<String> patterns, boolean traceMatch) {
117-
String combinedPattern;
118-
if (patterns.size() > 1) {
119-
combinedPattern = "";
120-
for (int i = 0; i < patterns.size(); i++) {
121-
String pattern = patterns.get(i);
122-
String valueWrap;
123-
if (traceMatch) {
124-
valueWrap = "(?<" + PATTERN_MATCH_KEY + "." + i + ">" + pattern + ")";
125-
} else {
126-
valueWrap = "(?:" + patterns.get(i) + ")";
127-
}
128-
if (combinedPattern.equals("")) {
129-
combinedPattern = valueWrap;
130-
} else {
131-
combinedPattern = combinedPattern + "|" + valueWrap;
132-
}
133-
}
134-
} else {
135-
combinedPattern = patterns.get(0);
136-
}
137-
138-
return combinedPattern;
139-
}
140-
141117
public static final class Factory implements Processor.Factory {
142118

143119
private final MatcherWatchdog matcherWatchdog;

modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokProcessorTests.java

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -296,21 +296,7 @@ public void testTraceWithOnePattern() throws Exception {
296296
assertThat(doc.getFieldValue("_ingest._grok_match_index", String.class), equalTo("0"));
297297
}
298298

299-
public void testCombinedPatterns() {
300-
String combined;
301-
combined = GrokProcessor.combinePatterns(List.of(""), false);
302-
assertThat(combined, equalTo(""));
303-
combined = GrokProcessor.combinePatterns(List.of(""), true);
304-
assertThat(combined, equalTo(""));
305-
combined = GrokProcessor.combinePatterns(List.of("foo"), false);
306-
assertThat(combined, equalTo("foo"));
307-
combined = GrokProcessor.combinePatterns(List.of("foo"), true);
308-
assertThat(combined, equalTo("foo"));
309-
combined = GrokProcessor.combinePatterns(List.of("foo", "bar"), false);
310-
assertThat(combined, equalTo("(?:foo)|(?:bar)"));
311-
combined = GrokProcessor.combinePatterns(List.of("foo", "bar"), true);
312-
assertThat(combined, equalTo("(?<_ingest._grok_match_index.0>foo)|(?<_ingest._grok_match_index.1>bar)"));
313-
}
299+
314300

315301
public void testCombineSamePatternNameAcrossPatterns() throws Exception {
316302
String fieldName = RandomDocumentPicks.randomFieldName(random());

x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,13 @@ emp_no:integer | a:keyword | b:keyword
290290
10005 | null | null | null | null
291291
;
292292

293+
multiPatterns
294+
row text = "XYZ extractme" | grok text "ABC %{WORD:foo}","XYZ %{WORD:bar}" | keep foo, bar;
295+
296+
foo:keyword | bar:keyword
297+
null | extractme
298+
;
299+
293300
overwriteInputName
294301
required_capability: grok_dissect_masking
295302
row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{WORD:description}" | keep text, int, description;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -221,31 +221,46 @@ public PlanFactory visitGrokCommand(EsqlBaseParser.GrokCommandContext ctx) {
221221
.map(stringContext -> BytesRefs.toString(visitString(stringContext).fold(FoldContext.small() /* TODO remove me */)))
222222
.toList();
223223

224-
String pattern = Grok.combinePatterns(patterns);
224+
String combinePattern = org.elasticsearch.grok.Grok.combinePatterns(patterns);
225225

226226
Grok.Parser grokParser;
227227
try {
228-
grokParser = Grok.pattern(source, pattern);
228+
grokParser = Grok.pattern(source, combinePattern);
229229
} catch (SyntaxException e) {
230-
throw new ParsingException(source, "Invalid grok pattern [{}]: [{}]", patterns, e.getMessage());
230+
if (patterns.size() == 1) {
231+
throw new ParsingException(source, "Invalid GROK pattern [{}]: [{}]", patterns.getFirst(), e.getMessage());
232+
} else {
233+
throw new ParsingException(source, "Invalid GROK patterns {}: [{}]", patterns, e.getMessage());
234+
}
231235
}
232-
validateGrokPattern(source, grokParser, pattern);
236+
validateGrokPattern(source, grokParser, combinePattern, patterns);
233237
Grok result = new Grok(source(ctx), p, expression(ctx.primaryExpression()), grokParser);
234238
return result;
235239
};
236240
}
237241

238-
private void validateGrokPattern(Source source, Grok.Parser grokParser, String pattern) {
242+
private void validateGrokPattern(Source source, Grok.Parser grokParser, String pattern, List<String> originalPatterns) {
239243
Map<String, DataType> definedAttributes = new HashMap<>();
240244
for (Attribute field : grokParser.extractedFields()) {
241245
String name = field.name();
242246
DataType type = field.dataType();
243247
DataType prev = definedAttributes.put(name, type);
244248
if (prev != null) {
245-
throw new ParsingException(
246-
source,
247-
"Invalid GROK pattern [" + pattern + "]: the attribute [" + name + "] is defined multiple times with different types"
248-
);
249+
if (originalPatterns.size() == 1) {
250+
throw new ParsingException(
251+
source,
252+
"Invalid GROK pattern [{}]: the attribute [{}] is defined multiple times with different types",
253+
originalPatterns.getFirst(),
254+
name
255+
);
256+
} else {
257+
throw new ParsingException(
258+
source,
259+
"Invalid GROK patterns {}: the attribute [{}] is defined multiple times with different types",
260+
originalPatterns,
261+
name
262+
);
263+
}
249264
}
250265
}
251266
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,6 @@ public int hashCode() {
7171
}
7272
}
7373

74-
public static Parser pattern(Source source, List<String> pattern) {
75-
String combinedPattern = combinePatterns(pattern);
76-
try {
77-
var builtinPatterns = GrokBuiltinPatterns.get(true);
78-
org.elasticsearch.grok.Grok grok = new org.elasticsearch.grok.Grok(builtinPatterns, combinedPattern, logger::warn);
79-
return new Parser(combinedPattern, grok);
80-
} catch (IllegalArgumentException e) {
81-
throw new ParsingException(source, "Invalid pattern [{}] for grok: {}", combinedPattern, e.getMessage());
82-
}
83-
}
84-
8574
public static Parser pattern(Source source, String pattern) {
8675
try {
8776
var builtinPatterns = GrokBuiltinPatterns.get(true);
@@ -91,29 +80,6 @@ public static Parser pattern(Source source, String pattern) {
9180
throw new ParsingException(source, "Invalid pattern [{}] for grok: {}", pattern, e.getMessage());
9281
}
9382
}
94-
95-
public static String combinePatterns(List<String> patterns) {
96-
String combinedPattern;
97-
if (patterns.size() > 1) {
98-
combinedPattern = "";
99-
for (int i = 0; i < patterns.size(); i++) {
100-
String pattern = patterns.get(i);
101-
String valueWrap;
102-
valueWrap = "(?:" + patterns.get(i) + ")";
103-
104-
if (combinedPattern.equals("")) {
105-
combinedPattern = valueWrap;
106-
} else {
107-
combinedPattern = combinedPattern + "|" + valueWrap;
108-
}
109-
}
110-
} else {
111-
combinedPattern = patterns.get(0);
112-
}
113-
114-
return combinedPattern;
115-
}
116-
11783
private static final Logger logger = LogManager.getLogger(Grok.class);
11884

11985
private final Parser parser;

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1293,7 +1293,18 @@ public void testGrokPattern() {
12931293

12941294
expectError(
12951295
"row a = \"foo\" | GROK a \"(?P<justification>.+)\"",
1296-
"line 1:17: Invalid grok pattern [(?P<justification>.+)]: [undefined group option]"
1296+
"line 1:17: Invalid GROK pattern [(?P<justification>.+)]: [undefined group option]"
1297+
);
1298+
1299+
expectError(
1300+
"row a = \"foo bar\" | GROK a \"%{NUMBER:foo}\", \"%{WORD:foo}\"",
1301+
"line 1:21: Invalid GROK patterns [%{NUMBER:foo}, %{WORD:foo}]:"
1302+
+ " the attribute [foo] is defined multiple times with different types"
1303+
);
1304+
1305+
expectError(
1306+
"row a = \"foo\" | GROK a \"%{WORD:foo}\", \"(?P<justification>.+)\"",
1307+
"line 1:17: Invalid GROK patterns [%{WORD:foo}, (?P<justification>.+)]: [undefined group option]"
12971308
);
12981309
}
12991310

0 commit comments

Comments
 (0)