Skip to content

Commit 009d1ac

Browse files
committed
Rewrite the header processing to avoid regexes
This uses constant stack and it's about 20% faster overall for the entire processor. That latter bit is impressive considering that the vast majority of the time spent in the CefParser is spent in processExtensions (the other regex-y stuff that I didn't touch).
1 parent 2fecc52 commit 009d1ac

File tree

1 file changed

+21
-12
lines changed
  • modules/ingest-common/src/main/java/org/elasticsearch/ingest/common

1 file changed

+21
-12
lines changed

modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CefParser.java

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,6 @@ final class CefParser {
5858
this.timezone = timezone;
5959
}
6060

61-
private static final Pattern HEADER_PATTERN = Pattern.compile("(?:\\\\\\||\\\\\\\\|[^|])*?");
62-
private static final Pattern HEADER_NEXT_FIELD_PATTERN = Pattern.compile("(" + HEADER_PATTERN.pattern() + ")\\|");
63-
private static final Pattern HEADER_ESCAPE_CAPTURE = Pattern.compile("\\\\([\\\\|])");
64-
6561
// New patterns for extension parsing
6662
private static final String EXTENSION_KEY_PATTERN = "(?:[\\w-]+(?:\\.[^\\.=\\s\\|\\\\\\[\\]]+)*(?:\\[[0-9]+\\])?(?==))";
6763
private static final Pattern EXTENSION_KEY_ARRAY_CAPTURE = Pattern.compile("^([^\\[\\]]+)((?:\\[[0-9]+\\])+)$");
@@ -302,14 +298,27 @@ enum DataType {
302298

303299
CefEvent process(String cefString) {
304300
List<String> headers = new ArrayList<>();
305-
Matcher matcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString);
306-
int extensionStart = 0;
307-
308-
for (int i = 0; i < 7 && matcher.find(); i++) {
309-
String field = matcher.group(1);
310-
field = HEADER_ESCAPE_CAPTURE.matcher(field).replaceAll("$1");
311-
headers.add(field);
312-
extensionStart = matcher.end();
301+
int extensionStart = -1;
302+
final StringBuilder buffer = new StringBuilder();
303+
for (int i = 0; i < cefString.length(); i++) {
304+
char curr = cefString.charAt(i);
305+
char next = i < cefString.length() - 1 ? cefString.charAt(i + 1) : '\0';
306+
if (curr == '\\' && next == '\\') { // an escaped backslash
307+
buffer.append('\\'); // emit a backslash
308+
i++; // and skip the next character
309+
} else if (curr == '\\' && next == '|') { // an escaped pipe
310+
buffer.append('|'); // emit a pipe
311+
i++; // and skip the next character
312+
} else if (curr == '|') { // a pipe, it's the end of a header
313+
headers.add(buffer.toString()); // emit the header
314+
buffer.setLength(0); // and reset the buffer
315+
if (headers.size() == 7) {
316+
extensionStart = i + 1; // the extensions begin after this pipe
317+
break; // we've processed all the headers, so exit the loop
318+
}
319+
} else { // any other character
320+
buffer.append(curr); // is just added to the header
321+
}
313322
}
314323

315324
if (headers.isEmpty() == false && headers.getFirst().startsWith("CEF:")) {

0 commit comments

Comments
 (0)