Skip to content

Commit b51951f

Browse files
Pablo Alcantar Moralesjoegallo
andauthored
New GrokPatternBank data structure (#95269)
This refactor introduces a new data structure called `PatternBank` which is an abstraction over the old `Map<String, String>` used all over the place. This data structure has handy methods to extend the pattern bank with new patterns and also centralize the validation of pattern banks into one place. Thanks to this, the repeated code to create Grok Pattern banks is 0. --------- Co-authored-by: Joe Gallo <[email protected]>
1 parent e560b81 commit b51951f

File tree

12 files changed

+331
-258
lines changed

12 files changed

+331
-258
lines changed

libs/grok/src/main/java/org/elasticsearch/grok/Grok.java

Lines changed: 6 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -51,38 +51,34 @@ public final class Grok {
5151

5252
private static final int MAX_TO_REGEX_ITERATIONS = 100_000; // sanity limit
5353

54-
private final Map<String, String> patternBank;
5554
private final boolean namedCaptures;
5655
private final Regex compiledExpression;
5756
private final MatcherWatchdog matcherWatchdog;
5857
private final List<GrokCaptureConfig> captureConfig;
5958

60-
public Grok(Map<String, String> patternBank, String grokPattern, Consumer<String> logCallBack) {
59+
public Grok(PatternBank patternBank, String grokPattern, Consumer<String> logCallBack) {
6160
this(patternBank, grokPattern, true, MatcherWatchdog.noop(), logCallBack);
6261
}
6362

64-
public Grok(Map<String, String> patternBank, String grokPattern, MatcherWatchdog matcherWatchdog, Consumer<String> logCallBack) {
63+
public Grok(PatternBank patternBank, String grokPattern, MatcherWatchdog matcherWatchdog, Consumer<String> logCallBack) {
6564
this(patternBank, grokPattern, true, matcherWatchdog, logCallBack);
6665
}
6766

68-
Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, Consumer<String> logCallBack) {
67+
Grok(PatternBank patternBank, String grokPattern, boolean namedCaptures, Consumer<String> logCallBack) {
6968
this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop(), logCallBack);
7069
}
7170

7271
private Grok(
73-
Map<String, String> patternBank,
72+
PatternBank patternBank,
7473
String grokPattern,
7574
boolean namedCaptures,
7675
MatcherWatchdog matcherWatchdog,
7776
Consumer<String> logCallBack
7877
) {
79-
this.patternBank = patternBank;
8078
this.namedCaptures = namedCaptures;
8179
this.matcherWatchdog = matcherWatchdog;
8280

83-
forbidCircularReferences();
84-
85-
String expression = toRegex(grokPattern);
81+
String expression = toRegex(patternBank, grokPattern);
8682
byte[] expressionBytes = expression.getBytes(StandardCharsets.UTF_8);
8783
this.compiledExpression = new Regex(
8884
expressionBytes,
@@ -100,78 +96,6 @@ private Grok(
10096
this.captureConfig = List.copyOf(grokCaptureConfigs);
10197
}
10298

103-
/**
104-
* Checks whether patterns reference each other in a circular manner and if so fail with an exception
105-
*
106-
* In a pattern, anything between <code>%{</code> and <code>}</code> or <code>:</code> is considered
107-
* a reference to another named pattern. This method will navigate to all these named patterns and
108-
* check for a circular reference.
109-
*/
110-
private void forbidCircularReferences() {
111-
112-
// first ensure that the pattern bank contains no simple circular references (i.e., any pattern
113-
// containing an immediate reference to itself) as those can cause the remainder of this algorithm
114-
// to recurse infinitely
115-
for (Map.Entry<String, String> entry : patternBank.entrySet()) {
116-
if (patternReferencesItself(entry.getValue(), entry.getKey())) {
117-
throw new IllegalArgumentException("circular reference in pattern [" + entry.getKey() + "][" + entry.getValue() + "]");
118-
}
119-
}
120-
121-
// next, recursively check any other pattern names referenced in each pattern
122-
for (Map.Entry<String, String> entry : patternBank.entrySet()) {
123-
String name = entry.getKey();
124-
String pattern = entry.getValue();
125-
innerForbidCircularReferences(name, new ArrayList<>(), pattern);
126-
}
127-
}
128-
129-
private void innerForbidCircularReferences(String patternName, List<String> path, String pattern) {
130-
if (patternReferencesItself(pattern, patternName)) {
131-
String message;
132-
if (path.isEmpty()) {
133-
message = "circular reference in pattern [" + patternName + "][" + pattern + "]";
134-
} else {
135-
message = "circular reference in pattern ["
136-
+ path.remove(path.size() - 1)
137-
+ "]["
138-
+ pattern
139-
+ "] back to pattern ["
140-
+ patternName
141-
+ "]";
142-
// add rest of the path:
143-
if (path.isEmpty() == false) {
144-
message += " via patterns [" + String.join("=>", path) + "]";
145-
}
146-
}
147-
throw new IllegalArgumentException(message);
148-
}
149-
150-
// next check any other pattern names found in the pattern
151-
for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) {
152-
int begin = i + 2;
153-
int bracketIndex = pattern.indexOf('}', begin);
154-
int columnIndex = pattern.indexOf(':', begin);
155-
int end;
156-
if (bracketIndex != -1 && columnIndex == -1) {
157-
end = bracketIndex;
158-
} else if (columnIndex != -1 && bracketIndex == -1) {
159-
end = columnIndex;
160-
} else if (bracketIndex != -1 && columnIndex != -1) {
161-
end = Math.min(bracketIndex, columnIndex);
162-
} else {
163-
throw new IllegalArgumentException("pattern [" + pattern + "] has circular references to other pattern definitions");
164-
}
165-
String otherPatternName = pattern.substring(begin, end);
166-
path.add(otherPatternName);
167-
innerForbidCircularReferences(patternName, path, patternBank.get(otherPatternName));
168-
}
169-
}
170-
171-
private static boolean patternReferencesItself(String pattern, String patternName) {
172-
return pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":");
173-
}
174-
17599
private String groupMatch(String name, Region region, String pattern) {
176100
int number = GROK_PATTERN_REGEX.nameToBackrefNumber(
177101
name.getBytes(StandardCharsets.UTF_8),
@@ -192,7 +116,7 @@ private String groupMatch(String name, Region region, String pattern) {
192116
*
193117
* @return named regex expression
194118
*/
195-
protected String toRegex(String grokPattern) {
119+
protected String toRegex(PatternBank patternBank, String grokPattern) {
196120
StringBuilder res = new StringBuilder();
197121
for (int i = 0; i < MAX_TO_REGEX_ITERATIONS; i++) {
198122
byte[] grokPatternBytes = grokPattern.getBytes(StandardCharsets.UTF_8);

libs/grok/src/main/java/org/elasticsearch/grok/GrokBuiltinPatterns.java

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import java.io.InputStream;
1414
import java.io.InputStreamReader;
1515
import java.nio.charset.StandardCharsets;
16-
import java.util.Collections;
1716
import java.util.LinkedHashMap;
1817
import java.util.List;
1918
import java.util.Map;
@@ -27,21 +26,21 @@ public class GrokBuiltinPatterns {
2726
/**
2827
* Patterns built in to the grok library.
2928
*/
30-
private static Map<String, String> LEGACY_PATTERNS;
31-
private static Map<String, String> ECS_V1_PATTERNS;
29+
private static PatternBank LEGACY_PATTERNS;
30+
private static PatternBank ECS_V1_PATTERNS;
3231

33-
public static synchronized Map<String, String> legacyPatterns() {
32+
public static synchronized PatternBank legacyPatterns() {
3433
return get(false);
3534
}
3635

37-
public static synchronized Map<String, String> ecsV1Patterns() {
36+
public static synchronized PatternBank ecsV1Patterns() {
3837
return get(true);
3938
}
4039

4140
/**
4241
* Load built-in patterns.
4342
*/
44-
public static synchronized Map<String, String> get(boolean ecsCompatibility) {
43+
public static synchronized PatternBank get(boolean ecsCompatibility) {
4544
if (ecsCompatibility) {
4645
if (ECS_V1_PATTERNS == null) {
4746
ECS_V1_PATTERNS = loadEcsPatterns();
@@ -55,7 +54,7 @@ public static synchronized Map<String, String> get(boolean ecsCompatibility) {
5554
}
5655
}
5756

58-
public static Map<String, String> get(String ecsCompatibility) {
57+
public static PatternBank get(String ecsCompatibility) {
5958
if (isValidEcsCompatibilityMode(ecsCompatibility)) {
6059
return get(ECS_COMPATIBILITY_V1.equals(ecsCompatibility));
6160
} else {
@@ -67,7 +66,7 @@ public static boolean isValidEcsCompatibilityMode(String ecsCompatibility) {
6766
return ECS_COMPATIBILITY_MODES.contains(ecsCompatibility);
6867
}
6968

70-
private static Map<String, String> loadLegacyPatterns() {
69+
private static PatternBank loadLegacyPatterns() {
7170
var patternNames = List.of(
7271
"aws",
7372
"bacula",
@@ -94,7 +93,7 @@ private static Map<String, String> loadLegacyPatterns() {
9493
return loadPatternsFromDirectory(patternNames, "/patterns/legacy/");
9594
}
9695

97-
private static Map<String, String> loadEcsPatterns() {
96+
private static PatternBank loadEcsPatterns() {
9897
var patternNames = List.of(
9998
"aws",
10099
"bacula",
@@ -122,7 +121,7 @@ private static Map<String, String> loadEcsPatterns() {
122121
return loadPatternsFromDirectory(patternNames, "/patterns/ecs-v1/");
123122
}
124123

125-
private static Map<String, String> loadPatternsFromDirectory(List<String> patternNames, String directory) {
124+
private static PatternBank loadPatternsFromDirectory(List<String> patternNames, String directory) {
126125
Map<String, String> builtinPatterns = new LinkedHashMap<>();
127126
for (String pattern : patternNames) {
128127
try {
@@ -133,7 +132,7 @@ private static Map<String, String> loadPatternsFromDirectory(List<String> patter
133132
throw new RuntimeException("failed to load built-in patterns", e);
134133
}
135134
}
136-
return Collections.unmodifiableMap(builtinPatterns);
135+
return new PatternBank(builtinPatterns);
137136
}
138137

139138
private static void loadPatternsFromFile(Map<String, String> patternBank, InputStream inputStream) throws IOException {
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.grok;
10+
11+
import java.util.ArrayList;
12+
import java.util.Collections;
13+
import java.util.LinkedHashMap;
14+
import java.util.List;
15+
import java.util.Map;
16+
import java.util.Objects;
17+
18+
public class PatternBank {
19+
20+
public static PatternBank EMPTY = new PatternBank(Map.of());
21+
22+
private final Map<String, String> bank;
23+
24+
public PatternBank(Map<String, String> bank) {
25+
Objects.requireNonNull(bank, "bank must not be null");
26+
forbidCircularReferences(bank);
27+
28+
// the bank reference should be unmodifiable, based on a defensive copy of the passed-in bank, and
29+
// maintain the iteration order of the passed-in bank (assuming there was a meaningful order)
30+
this.bank = Collections.unmodifiableMap(new LinkedHashMap<>(bank));
31+
}
32+
33+
public String get(String patternName) {
34+
return bank.get(patternName);
35+
}
36+
37+
public Map<String, String> bank() {
38+
return bank;
39+
}
40+
41+
/**
42+
* Extends a pattern bank with extra patterns, returning a new pattern bank.
43+
* <p>
44+
* The returned bank will be the same reference as the original pattern bank if the extra patterns map is null or empty.
45+
*
46+
* @param extraPatterns the patterns to extend this bank with (may be empty or null)
47+
* @return the extended pattern bank
48+
*/
49+
public PatternBank extendWith(Map<String, String> extraPatterns) {
50+
if (extraPatterns == null || extraPatterns.isEmpty()) {
51+
return this;
52+
}
53+
54+
var extendedBank = new LinkedHashMap<>(bank);
55+
extendedBank.putAll(extraPatterns);
56+
return new PatternBank(extendedBank);
57+
}
58+
59+
/**
60+
* Checks whether patterns reference each other in a circular manner and if so fail with an exception.
61+
* <p>
62+
* In a pattern, anything between <code>%{</code> and <code>}</code> or <code>:</code> is considered
63+
* a reference to another named pattern. This method will navigate to all these named patterns and
64+
* check for a circular reference.
65+
*/
66+
static void forbidCircularReferences(Map<String, String> bank) {
67+
// first ensure that the pattern bank contains no simple circular references (i.e., any pattern
68+
// containing an immediate reference to itself) as those can cause the remainder of this algorithm
69+
// to recurse infinitely
70+
for (Map.Entry<String, String> entry : bank.entrySet()) {
71+
if (patternReferencesItself(entry.getValue(), entry.getKey())) {
72+
throw new IllegalArgumentException("circular reference in pattern [" + entry.getKey() + "][" + entry.getValue() + "]");
73+
}
74+
}
75+
76+
// next, recursively check any other pattern names referenced in each pattern
77+
for (Map.Entry<String, String> entry : bank.entrySet()) {
78+
String name = entry.getKey();
79+
String pattern = entry.getValue();
80+
innerForbidCircularReferences(bank, name, new ArrayList<>(), pattern);
81+
}
82+
}
83+
84+
private static void innerForbidCircularReferences(Map<String, String> bank, String patternName, List<String> path, String pattern) {
85+
if (patternReferencesItself(pattern, patternName)) {
86+
String message;
87+
if (path.isEmpty()) {
88+
message = "circular reference in pattern [" + patternName + "][" + pattern + "]";
89+
} else {
90+
message = "circular reference in pattern ["
91+
+ path.remove(path.size() - 1)
92+
+ "]["
93+
+ pattern
94+
+ "] back to pattern ["
95+
+ patternName
96+
+ "]";
97+
// add rest of the path:
98+
if (path.isEmpty() == false) {
99+
message += " via patterns [" + String.join("=>", path) + "]";
100+
}
101+
}
102+
throw new IllegalArgumentException(message);
103+
}
104+
105+
// next check any other pattern names found in the pattern
106+
for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) {
107+
int begin = i + 2;
108+
int bracketIndex = pattern.indexOf('}', begin);
109+
int columnIndex = pattern.indexOf(':', begin);
110+
int end;
111+
if (bracketIndex != -1 && columnIndex == -1) {
112+
end = bracketIndex;
113+
} else if (columnIndex != -1 && bracketIndex == -1) {
114+
end = columnIndex;
115+
} else if (bracketIndex != -1 && columnIndex != -1) {
116+
end = Math.min(bracketIndex, columnIndex);
117+
} else {
118+
throw new IllegalArgumentException("pattern [" + pattern + "] has an invalid syntax");
119+
}
120+
String otherPatternName = pattern.substring(begin, end);
121+
path.add(otherPatternName);
122+
String otherPattern = bank.get(otherPatternName);
123+
if (otherPattern == null) {
124+
throw new IllegalArgumentException(
125+
"pattern [" + patternName + "] is referencing a non-existent pattern [" + otherPatternName + "]"
126+
);
127+
}
128+
129+
innerForbidCircularReferences(bank, patternName, path, otherPattern);
130+
}
131+
}
132+
133+
private static boolean patternReferencesItself(String pattern, String patternName) {
134+
return pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":");
135+
}
136+
}

0 commit comments

Comments
 (0)