Skip to content

Commit 99e74d0

Browse files
committed
Extracting api package and add javadoc
1 parent 96d920a commit 99e74d0

File tree

21 files changed

+497
-273
lines changed

21 files changed

+497
-273
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/index/mapper/PatternedTextParserBenchmark.java

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,13 @@
99

1010
package org.elasticsearch.benchmark.index.mapper;
1111

12-
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.compiler.SchemaCompiler;
13-
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.parser.Parser;
14-
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.parser.TimestampFormat;
15-
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned.Argument;
16-
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned.IPv4Argument;
17-
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned.IntegerArgument;
18-
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned.PatternedMessage;
19-
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned.Timestamp;
20-
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.schema.Schema;
12+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.api.Parser;
13+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.api.ParserFactory;
14+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.api.Argument;
15+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.api.IPv4Argument;
16+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.api.IntegerArgument;
17+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.api.PatternedMessage;
18+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.api.Timestamp;
2119
import org.openjdk.jmh.annotations.Benchmark;
2220
import org.openjdk.jmh.annotations.BenchmarkMode;
2321
import org.openjdk.jmh.annotations.Fork;
@@ -51,7 +49,7 @@
5149
@Fork(1)
5250
public class PatternedTextParserBenchmark {
5351

54-
private Parser charParser;
52+
private Parser parser;
5553
private RegexParser regexParser;
5654
private String testMessageWithComma;
5755
private String testMessageNoComma;
@@ -60,7 +58,7 @@ public class PatternedTextParserBenchmark {
6058

6159
@Setup
6260
public void setup() {
63-
charParser = new Parser(SchemaCompiler.compile(Schema.getInstance()));
61+
parser = ParserFactory.createParser();
6462
regexParser = new RegexParser();
6563
testMessageWithComma = "Oct 05, 2023 02:48:00 PM INFO Response from 127.0.0.1 took 2000 ms";
6664
testMessageNoComma = "Oct 05 2023 02:48:00 PM INFO Response from 127.0.0.1 took 2000 ms";
@@ -69,7 +67,7 @@ public void setup() {
6967

7068
@Benchmark
7169
public void parseWithCharParser(Blackhole blackhole) {
72-
PatternedMessage result = charParser.parse(testMessageNoComma);
70+
PatternedMessage result = parser.parse(testMessageNoComma);
7371
blackhole.consume(result);
7472
// long timestamp = TimestampFormat.parseTimestamp(dateTimeFormatter, "Oct 05 2023 02:48:00 PM");
7573
// blackhole.consume(timestamp);
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.api;
9+
10+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.common.EncodingType;
11+
12+
/**
13+
* Represents a typed argument extracted from a text message.
14+
* <p>
15+
* An argument holds the original value and its encoding type, and can provide a string representation of the value.
16+
*
17+
* @param <T> the type of the argument's value
18+
*/
19+
public interface Argument<T> {
20+
/**
21+
* Returns the original value of the argument.
22+
*
23+
* @return the argument's value
24+
*/
25+
T value();
26+
27+
/**
28+
* Returns the encoding type of the argument.
29+
*
30+
* @return the encoding type
31+
*/
32+
EncodingType type();
33+
34+
/**
35+
* Returns a string representation of the argument's value.
36+
*
37+
* @return the string representation of the value
38+
*/
39+
String encode();
40+
}

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/patterned/ByteEncodedArgument.java renamed to x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/api/ByteEncodedArgument.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,16 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned;
8+
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.api;
99

1010
import java.util.Base64;
1111

12+
/**
13+
* An abstract class for arguments that are encoded as a byte array.
14+
* <p>
15+
* This class provides a base implementation for arguments that are represented as a byte array.
16+
* It handles the storage of the byte array and provides a Base64 encoder for the `encode()` method.
17+
*/
1218
public abstract class ByteEncodedArgument implements Argument<byte[]> {
1319
protected final byte[] encodedBytes;
1420
protected final Base64.Encoder encoder = Base64.getEncoder().withoutPadding();

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/patterned/HexadecimalArgument.java renamed to x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/api/HexadecimalArgument.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned;
8+
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.api;
99

1010
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.common.EncodingType;
1111

12+
/**
13+
* Represents a hexadecimal argument extracted from a text message.
14+
* <p>
15+
* The value is a byte array decoded from a hexadecimal string.
16+
*/
1217
public final class HexadecimalArgument extends ByteEncodedArgument {
1318

1419
public HexadecimalArgument(String s, int start, int length) {

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/patterned/IPv4Argument.java renamed to x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/api/IPv4Argument.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned;
8+
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.api;
99

1010
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.common.EncodingType;
1111

12+
/**
13+
* Represents an IPv4 address argument extracted from a text message.
14+
* <p>
15+
* The value is a byte array of the four octets of the IPv4 address.
16+
*/
1217
public final class IPv4Argument extends ByteEncodedArgument {
1318

1419
public IPv4Argument(int[] octets) {

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/patterned/IntegerArgument.java renamed to x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/api/IntegerArgument.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,16 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned;
8+
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.api;
99

1010
import org.elasticsearch.common.util.ByteUtils;
1111
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.common.EncodingType;
1212

1313
import java.util.Base64;
1414

15+
/**
16+
* Represents an integer argument extracted from a text message.
17+
*/
1518
public final class IntegerArgument implements Argument<Integer> {
1619
private final int value;
1720

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/patterned/KeywordArgument.java renamed to x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/api/KeywordArgument.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,20 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned;
8+
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.api;
99

1010
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.common.EncodingType;
1111

12+
/**
13+
* Represents a keyword argument extracted from a text message.
14+
* <p>
15+
* A keyword is different from a simple text token in that it describes a token that is encoded as a string,
16+
* but it represents a message argument and not a static token.
17+
* Ideally, only arguments with low cardinality should be represented by a Keyword.
18+
* High cardinality ones (like UUIDs for example) should be represented by a different type, as much as possible.
19+
* Since we rely on a generic schema for the identification of arguments, we take into account that it would be used
20+
* for high cardinality arguments as well.
21+
*/
1222
public final class KeywordArgument implements Argument<String> {
1323
private final StringBuilder value;
1424

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.api;
9+
10+
/**
11+
* Interface for parsing raw text messages into structured patterns with typed arguments.
12+
*
13+
* <p>Implementations of this interface are responsible for analyzing input text and extracting
14+
* meaningful patterns, timestamps, and parameter values.
15+
* The parsing process converts unstructured log messages into a standardized format that
16+
* separates the static template from the variable data.
17+
*
18+
* <p>The parser operates by recognizing tokens and sub-tokens within the input text,
19+
* matching them against configured patterns, and producing a structured representation
20+
* that includes:
21+
* <ul>
22+
* <li>A template string with parameter placeholders</li>
23+
* <li>Extracted timestamp information (if present)</li>
24+
* <li>Typed arguments corresponding to the template parameters</li>
25+
* </ul>
26+
*/
27+
public interface Parser {
28+
29+
/**
30+
* Parses a raw text message into a structured pattern with extracted components.
31+
*
32+
* <p>This method analyzes the input message and extracts variable data while preserving the overall structure as a template.
33+
*
34+
* <p><strong>Example:</strong>
35+
* <pre>
36+
* Input: "2023-10-05 14:30:25 INFO received 305 packets from 135.122.123.222"
37+
* Output: PatternedMessage with:
38+
* - template: "%T INFO received %I packets from %4"
39+
* - timestamp: parsed datetime value
40+
* - arguments: [Integer:{305}, IPv4:{135.122.123.222}]
41+
* </pre>
42+
*
43+
* @param rawMessage the input text message to parse, must not be null
44+
* @return a {@link PatternedMessage} containing the extracted template, timestamp, and typed arguments
45+
* @throws IllegalArgumentException if rawMessage is null
46+
*/
47+
PatternedMessage parse(String rawMessage);
48+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.api;
9+
10+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.compiler.CompiledSchema;
11+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.compiler.SchemaCompiler;
12+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.parser.CharParser;
13+
import org.elasticsearch.xpack.logsdb.patternedtext.charparser.schema.Schema;
14+
15+
/**
16+
* Factory for creating {@link Parser} instances with a pre-compiled schema.
17+
*
18+
* <p>All parser instances share the same compiled schema for efficiency.
19+
* This factory is thread-safe.
20+
*/
21+
public class ParserFactory {
22+
23+
private static final CompiledSchema compiledSchema = SchemaCompiler.compile(Schema.getInstance());
24+
25+
private ParserFactory() {
26+
// Prevent instantiation
27+
}
28+
29+
/**
30+
* Creates a new parser instance.
31+
*
32+
* @return a new {@link Parser} instance
33+
*/
34+
public static Parser createParser() {
35+
return new CharParser(compiledSchema);
36+
}
37+
}

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/patterned/PatternedMessage.java renamed to x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/charparser/api/PatternedMessage.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,18 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.patterned;
8+
package org.elasticsearch.xpack.logsdb.patternedtext.charparser.api;
99

10+
/**
11+
* Represents a structured message parsed from raw text.
12+
* <p>
13+
* A patterned message contains the original text's structure as a pattern,
14+
* an extracted timestamp, and an array of typed arguments.
15+
*
16+
* @param pattern the pattern of the message, with placeholders for arguments
17+
* @param timestamp the timestamp extracted from the message
18+
* @param arguments an array of typed arguments extracted from the message
19+
*/
1020
public record PatternedMessage(String pattern, Timestamp timestamp, Argument<?>[] arguments) {
1121

1222
@Override

0 commit comments

Comments
 (0)