Skip to content

Commit e053f21

Browse files
author
Pablo Alcantar Morales
authored
Grok returns a list of matches for repeated pattern names elastic#92092 (elastic#92586)
Grok returns a list of matches for repeated pattern names This change makes the Elasticsearch Grok processor behaves in the same way that Logstash's grok, when handling repeated pattern names, returning a list of matches instead only the first only Closes elastic#92092
1 parent 8fc2d6a commit e053f21

File tree

5 files changed

+52
-24
lines changed

5 files changed

+52
-24
lines changed

docs/changelog/92586.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 92586
2+
summary: "Grok returns a list of matches for repeated pattern names #92092"
3+
area: Ingest Node
4+
type: bug
5+
issues:
6+
- 92092

libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureExtracter.java

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,34 +20,55 @@
2020
/**
2121
* How to extract matches.
2222
*/
23-
public abstract class GrokCaptureExtracter {
23+
public interface GrokCaptureExtracter {
24+
2425
/**
2526
* Extract {@link Map} results. This implementation of {@link GrokCaptureExtracter}
2627
* is mutable and should be discarded after collecting a single result.
2728
*/
28-
static class MapExtracter extends GrokCaptureExtracter {
29+
class MapExtracter implements GrokCaptureExtracter {
2930
private final Map<String, Object> result;
3031
private final List<GrokCaptureExtracter> fieldExtracters;
3132

33+
@SuppressWarnings("unchecked")
3234
MapExtracter(List<GrokCaptureConfig> captureConfig) {
3335
result = captureConfig.isEmpty() ? emptyMap() : new HashMap<>();
3436
fieldExtracters = new ArrayList<>(captureConfig.size());
3537
for (GrokCaptureConfig config : captureConfig) {
36-
fieldExtracters.add(config.objectExtracter(v -> result.put(config.name(), v)));
38+
fieldExtracters.add(config.objectExtracter(value -> {
39+
var key = config.name();
40+
41+
// Logstash's Grok processor flattens the list of values to a single value in case there's only 1 match,
42+
// so we have to do the same to be compatible.
43+
// e.g.:
44+
// pattern = `%{SINGLEDIGIT:name}(%{SINGLEDIGIT:name})?`
45+
// - GROK(pattern, "1") => { name: 1 }
46+
// - GROK(pattern, "12") => { name: [1, 2] }
47+
if (result.containsKey(key)) {
48+
if (result.get(key)instanceof List<?> values) {
49+
((ArrayList<Object>) values).add(value);
50+
} else {
51+
var values = new ArrayList<>();
52+
values.add(result.get(key));
53+
values.add(value);
54+
result.put(key, values);
55+
}
56+
} else {
57+
result.put(key, value);
58+
}
59+
}));
3760
}
3861
}
3962

4063
@Override
41-
void extract(byte[] utf8Bytes, int offset, Region region) {
42-
for (GrokCaptureExtracter extracter : fieldExtracters) {
43-
extracter.extract(utf8Bytes, offset, region);
44-
}
64+
public void extract(byte[] utf8Bytes, int offset, Region region) {
65+
fieldExtracters.forEach(extracter -> extracter.extract(utf8Bytes, offset, region));
4566
}
4667

4768
Map<String, Object> result() {
4869
return result;
4970
}
5071
}
5172

52-
abstract void extract(byte[] utf8Bytes, int offset, Region region);
73+
void extract(byte[] utf8Bytes, int offset, Region region);
5374
}

libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureType.java

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
package org.elasticsearch.grok;
1010

1111
import org.elasticsearch.grok.GrokCaptureConfig.NativeExtracterMap;
12-
import org.joni.Region;
1312

1413
import java.nio.charset.StandardCharsets;
1514
import java.util.function.Consumer;
@@ -70,16 +69,12 @@ static GrokCaptureType fromString(String str) {
7069
}
7170

7271
protected final GrokCaptureExtracter rawExtracter(int[] backRefs, Consumer<? super String> emit) {
73-
return new GrokCaptureExtracter() {
74-
@Override
75-
void extract(byte[] utf8Bytes, int offset, Region region) {
76-
for (int number : backRefs) {
77-
if (region.beg[number] >= 0) {
78-
int matchOffset = offset + region.beg[number];
79-
int matchLength = region.end[number] - region.beg[number];
80-
emit.accept(new String(utf8Bytes, matchOffset, matchLength, StandardCharsets.UTF_8));
81-
return; // Capture only the first value.
82-
}
72+
return (utf8Bytes, offset, region) -> {
73+
for (int number : backRefs) {
74+
if (region.beg[number] >= 0) {
75+
int matchOffset = offset + region.beg[number];
76+
int matchLength = region.end[number] - region.beg[number];
77+
emit.accept(new String(utf8Bytes, matchOffset, matchLength, StandardCharsets.UTF_8));
8378
}
8479
}
8580
};

libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -801,9 +801,15 @@ public void testMultipleNamedCapturesWithSameName() {
801801
Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}", logger::warn);
802802
assertCaptureConfig(grok, Map.of("num", STRING));
803803

804-
Map<String, Object> expected = new HashMap<>();
805-
expected.put("num", "1");
806-
assertThat(grok.captures("12"), equalTo(expected));
804+
assertThat(grok.captures("12"), equalTo(Map.of("num", List.of("1", "2"))));
805+
806+
grok = new Grok(bank, "%{SINGLEDIGIT:num:int}(%{SINGLEDIGIT:num:int})?", logger::warn);
807+
assertCaptureConfig(grok, Map.of("num", INTEGER));
808+
assertEquals(grok.captures("1"), Map.of("num", 1));
809+
assertEquals(grok.captures("1a"), Map.of("num", 1));
810+
assertEquals(grok.captures("a1"), Map.of("num", 1));
811+
assertEquals(grok.captures("12"), Map.of("num", List.of(1, 2)));
812+
assertEquals(grok.captures("123"), Map.of("num", List.of(1, 2)));
807813
}
808814

809815
public void testExponentialExpressions() {

modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokProcessorTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ public void testCombineSamePatternNameAcrossPatterns() throws Exception {
333333
assertThat(doc.getFieldValue("second", String.class), equalTo("3"));
334334
}
335335

336-
public void testFirstWinNamedCapture() throws Exception {
336+
public void testShouldCaptureAllSameNameGroupsAsList() throws Exception {
337337
String fieldName = RandomDocumentPicks.randomFieldName(random());
338338
IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
339339
doc.setFieldValue(fieldName, "12");
@@ -350,7 +350,7 @@ public void testFirstWinNamedCapture() throws Exception {
350350
MatcherWatchdog.noop()
351351
);
352352
processor.execute(doc);
353-
assertThat(doc.getFieldValue("first", String.class), equalTo("1"));
353+
assertEquals(doc.getFieldValue("first", List.class), List.of("1", "2"));
354354
}
355355

356356
public void testUnmatchedNamesNotIncludedInDocument() throws Exception {

0 commit comments

Comments
 (0)