Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.logsdb.patternedtext;

import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;

public class Arg {

private static final String SPACE = " ";

public enum Type {
GENERAL(0),
IP4(1),
INTEGER(2);

private final int code;
private static final Type[] lookup = new Type[values().length];
static {
for (var type : values()) {
lookup[type.code] = type;
}
}

Type(int code) {
this.code = code;
}

public int toCode() {
return code;
}

public static Type fromCode(int code) {
return lookup[code];
}
}

record Schema(Type type, int offsetFromPrevArg) {
void writeTo(ByteArrayDataOutput out) throws IOException {
out.writeVInt(type.toCode());
out.writeVInt(offsetFromPrevArg);
}

static Schema readFrom(ByteArrayDataInput in) {
return new Schema(Type.fromCode(in.readVInt()), in.readVInt());
}
}

private static final Base64.Decoder DECODER = Base64.getUrlDecoder();
private static final Base64.Encoder ENCODER = Base64.getUrlEncoder().withoutPadding();

public static String encodeSchema(List<Schema> arguments) throws IOException {
int maxSize = Integer.BYTES + arguments.size() * (Integer.BYTES + Integer.BYTES);
byte[] buffer = new byte[maxSize];
var dataInput = new ByteArrayDataOutput(buffer);
dataInput.writeVInt(arguments.size());
for (var arg : arguments) {
arg.writeTo(dataInput);
}

int size = dataInput.getPosition();
byte[] data = Arrays.copyOfRange(buffer, 0, size);
return ENCODER.encodeToString(data);
}

public static List<Schema> decodeSchema(String encoded) {
byte[] encodedBytes = DECODER.decode(encoded);
var input = new ByteArrayDataInput(encodedBytes);

int numArgs = input.readVInt();
List<Schema> arguments = new ArrayList<>(numArgs);
for (int i = 0; i < numArgs; i++) {
arguments.add(Schema.readFrom(input));
}
return arguments;
}

static boolean isArg(String text) {
for (int i = 0; i < text.length(); i++) {
if (Character.isDigit(text.charAt(i))) {
return true;
}
}
return false;
}

static String encodeRemainingArgs(PatternedTextValueProcessor.Parts parts) {
return String.join(SPACE, parts.args());
}

static String[] decodeRemainingArgs(String mergedArgs) {
return mergedArgs.split(SPACE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,42 @@
import org.apache.lucene.util.BytesRef;

import java.io.IOException;
import java.util.List;

public class PatternedTextDocValues extends BinaryDocValues {
private final SortedSetDocValues templateDocValues;
private final SortedSetDocValues argsDocValues;
private final SortedSetDocValues argsSchemaDocValues;

PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues) {
PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues, SortedSetDocValues argsSchemaDocValues) {
this.templateDocValues = templateDocValues;
this.argsDocValues = argsDocValues;
this.argsSchemaDocValues = argsSchemaDocValues;
}

static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName) throws IOException {
static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName, String argsSchemaFieldName)
throws IOException {
SortedSetDocValues templateDocValues = DocValues.getSortedSet(leafReader, templateFieldName);
if (templateDocValues.getValueCount() == 0) {
return null;
}

SortedSetDocValues argsDocValues = DocValues.getSortedSet(leafReader, argsFieldName);
return new PatternedTextDocValues(templateDocValues, argsDocValues);
SortedSetDocValues argsSchemaDocValues = DocValues.getSortedSet(leafReader, argsSchemaFieldName);
return new PatternedTextDocValues(templateDocValues, argsDocValues, argsSchemaDocValues);
}

private String getNextStringValue() throws IOException {
assert templateDocValues.docValueCount() == 1;
String template = templateDocValues.lookupOrd(templateDocValues.nextOrd()).utf8ToString();
int argsCount = PatternedTextValueProcessor.countArgs(template);
if (argsCount > 0) {
List<Arg.Schema> argsSchema = Arg.decodeSchema(argsSchemaDocValues.lookupOrd(argsSchemaDocValues.nextOrd()).utf8ToString());

if (argsSchema.isEmpty() == false) {
assert argsDocValues.docValueCount() == 1;
assert argsSchemaDocValues.docValueCount() == 1;
var mergedArgs = argsDocValues.lookupOrd(argsDocValues.nextOrd());
var args = PatternedTextValueProcessor.decodeRemainingArgs(mergedArgs.utf8ToString());
return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, args));
var args = Arg.decodeRemainingArgs(mergedArgs.utf8ToString());
return PatternedTextValueProcessor.merge(template, args, argsSchema);
} else {
return template;
}
Expand All @@ -56,6 +63,7 @@ public BytesRef binaryValue() throws IOException {
@Override
public boolean advanceExact(int i) throws IOException {
argsDocValues.advanceExact(i);
argsSchemaDocValues.advanceExact(i);
// If template has a value, then message has a value. We don't have to check args here, since there may not be args for the doc
return templateDocValues.advanceExact(i);
}
Expand All @@ -69,20 +77,24 @@ public int docID() {
public int nextDoc() throws IOException {
int templateNext = templateDocValues.nextDoc();
var argsAdvance = argsDocValues.advance(templateNext);
var argsSchemaAdvance = argsSchemaDocValues.advance(templateNext);
assert argsAdvance >= templateNext;
assert argsSchemaAdvance == templateNext;
return templateNext;
}

@Override
public int advance(int i) throws IOException {
int templateAdvance = templateDocValues.advance(i);
var argsAdvance = argsDocValues.advance(templateAdvance);
var argsSchemaAdvance = argsSchemaDocValues.advance(templateAdvance);
assert argsAdvance >= templateAdvance;
assert argsSchemaAdvance == templateAdvance;
return templateAdvance;
}

@Override
public long cost() {
return templateDocValues.cost() + argsDocValues.cost();
return templateDocValues.cost() + argsDocValues.cost() + argsSchemaDocValues.cost();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,13 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
// Add template_id doc_values
context.doc().add(templateIdMapper.buildKeywordField(new BytesRef(parts.templateId())));

// Add args schema
String argsSchemaEncoded = Arg.encodeSchema(parts.schemas());
context.doc().add(new SortedSetDocValuesField(fieldType().argsSchemaFieldName(), new BytesRef(argsSchemaEncoded)));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think schema field name can be stored using SortedDocValuesField? Given that encodeSchema (...) stores the schemas as one value so store only one value per document?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, they should be able to be stored as regular SortedDocValues. This is true for all the doc values columns in the patterned_text type. But I ran into an issue where a mapper test class defined in Lucene did not handle SortedDocValues correctly. I submitted this fix: apache/lucene#14839, and it has been merged. If we're using 10.3, I could go ahead and update all doc values in this type to SortedDocValues. But I'm inclined to do it in a separate PR

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, and thanks for fixing this in Lucene 🚀


// Add args doc_values
if (parts.args().isEmpty() == false) {
String remainingArgs = PatternedTextValueProcessor.encodeRemainingArgs(parts);
String remainingArgs = Arg.encodeRemainingArgs(parts);
context.doc().add(new SortedSetDocValuesField(fieldType().argsFieldName(), new BytesRef(remainingArgs)));
}
}
Expand All @@ -207,7 +211,12 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
() -> new CompositeSyntheticFieldLoader(
leafName(),
fullPath(),
new PatternedTextSyntheticFieldLoaderLayer(fieldType().name(), fieldType().templateFieldName(), fieldType().argsFieldName())
new PatternedTextSyntheticFieldLoaderLayer(
fieldType().name(),
fieldType().templateFieldName(),
fieldType().argsFieldName(),
fieldType().argsSchemaFieldName()
)
)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public class PatternedTextFieldType extends StringFieldType {
private static final String TEMPLATE_SUFFIX = ".template";
private static final String TEMPLATE_ID_SUFFIX = ".template_id";
private static final String ARGS_SUFFIX = ".args";
private static final String ARGS_SCHEMA_SUFFIX = ".args_schema";

public static final String CONTENT_TYPE = "patterned_text";

Expand Down Expand Up @@ -272,4 +273,8 @@ String argsFieldName() {
return name() + ARGS_SUFFIX;
}

String argsSchemaFieldName() {
return name() + ARGS_SCHEMA_SUFFIX;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ public LeafFieldData loadDirect(LeafReaderContext context) throws IOException {
PatternedTextDocValues docValues = PatternedTextDocValues.from(
leafReader,
fieldType.templateFieldName(),
fieldType.argsFieldName()
fieldType.argsFieldName(),
fieldType.argsSchemaFieldName()
);
return new LeafFieldData() {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ class PatternedTextSyntheticFieldLoaderLayer implements CompositeSyntheticFieldL
private final String name;
private final String templateFieldName;
private final String argsFieldName;
private final String argsSchemaFieldName;
private PatternedTextSyntheticFieldLoader loader;

PatternedTextSyntheticFieldLoaderLayer(String name, String templateFieldName, String argsFieldName) {
PatternedTextSyntheticFieldLoaderLayer(String name, String templateFieldName, String argsFieldName, String argsSchemaFieldName) {
this.name = name;
this.templateFieldName = templateFieldName;
this.argsFieldName = argsFieldName;
this.argsSchemaFieldName = argsSchemaFieldName;
}

@Override
Expand All @@ -34,7 +36,7 @@ public long valueCount() {

@Override
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, argsFieldName);
var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, argsFieldName, argsSchemaFieldName);
if (docValues == null) {
return null;
}
Expand Down
Loading