Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.logsdb.patternedtext;

import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataInput;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;

/**
* Describes the type and location of an argument in the template. A list of argument infos is encoded and stored in a doc value
* column, this is used to re-combine the template and argument columns. Documents with identical templates share the same
* of argument infos, and since indices are sorted by template_id, this doc value column compresses very well.
*/
public class Arg {

private static final String SPACE = " ";
private static final Base64.Decoder DECODER = Base64.getUrlDecoder();
private static final Base64.Encoder ENCODER = Base64.getUrlEncoder().withoutPadding();
private static int VINT_MAX_BYTES = 5;

public enum Type {
GENERIC(0);

private final int code;
private static final Type[] lookup = new Type[values().length];
static {
for (var type : values()) {
lookup[type.code] = type;
}
}

Type(int code) {
this.code = code;
}

public int toCode() {
return code;
}

public static Type fromCode(int code) {
return lookup[code];
}
}

record Info(Type type, int offsetInTemplate) {
public Info {
assert offsetInTemplate >= 0;
}

void writeTo(ByteArrayDataOutput out, int previousOffset) throws IOException {
out.writeVInt(type.toCode());
int diff = offsetInTemplate - previousOffset;
out.writeVInt(diff);
}

static Info readFrom(DataInput in, int previousOffset) throws IOException {
var type = Type.fromCode(in.readVInt());
int diffFromPrevious = in.readVInt();
int offsetInfoTemplate = previousOffset + diffFromPrevious;
return new Info(type, offsetInfoTemplate);
}
}

static boolean isArg(String text) {
for (int i = 0; i < text.length(); i++) {
if (Character.isDigit(text.charAt(i))) {
return true;
}
}
return false;
}

static String encodeInfo(List<Info> arguments) throws IOException {
int maxSize = VINT_MAX_BYTES + arguments.size() * (VINT_MAX_BYTES + VINT_MAX_BYTES);
byte[] buffer = new byte[maxSize];
var dataInput = new ByteArrayDataOutput(buffer);
dataInput.writeVInt(arguments.size());
int previousOffset = 0;
for (var arg : arguments) {
arg.writeTo(dataInput, previousOffset);
previousOffset = arg.offsetInTemplate;
}

int size = dataInput.getPosition();
byte[] data = Arrays.copyOfRange(buffer, 0, size);
return ENCODER.encodeToString(data);
}

static List<Info> decodeInfo(String encoded) throws IOException {
byte[] encodedBytes = DECODER.decode(encoded);
var input = new ByteArrayDataInput(encodedBytes);

int numArgs = input.readVInt();
int previousOffset = 0;
List<Info> arguments = new ArrayList<>(numArgs);
for (int i = 0; i < numArgs; i++) {
var argInfo = Info.readFrom(input, previousOffset);
arguments.add(argInfo);
previousOffset = argInfo.offsetInTemplate;
}
return arguments;
}

static String encodeRemainingArgs(PatternedTextValueProcessor.Parts parts) {
return String.join(SPACE, parts.args());
}

static String[] decodeRemainingArgs(String mergedArgs) {
return mergedArgs.split(SPACE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,42 @@
import org.apache.lucene.util.BytesRef;

import java.io.IOException;
import java.util.List;

public class PatternedTextDocValues extends BinaryDocValues {
private final SortedSetDocValues templateDocValues;
private final SortedSetDocValues argsDocValues;
private final SortedSetDocValues argsInfoDocValues;

PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues) {
PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues, SortedSetDocValues argsInfoDocValues) {
this.templateDocValues = templateDocValues;
this.argsDocValues = argsDocValues;
this.argsInfoDocValues = argsInfoDocValues;
}

static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName) throws IOException {
static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName, String argsInfoFieldName)
throws IOException {
SortedSetDocValues templateDocValues = DocValues.getSortedSet(leafReader, templateFieldName);
if (templateDocValues.getValueCount() == 0) {
return null;
}

SortedSetDocValues argsDocValues = DocValues.getSortedSet(leafReader, argsFieldName);
return new PatternedTextDocValues(templateDocValues, argsDocValues);
SortedSetDocValues argsInfoDocValues = DocValues.getSortedSet(leafReader, argsInfoFieldName);
return new PatternedTextDocValues(templateDocValues, argsDocValues, argsInfoDocValues);
}

private String getNextStringValue() throws IOException {
assert templateDocValues.docValueCount() == 1;
String template = templateDocValues.lookupOrd(templateDocValues.nextOrd()).utf8ToString();
int argsCount = PatternedTextValueProcessor.countArgs(template);
if (argsCount > 0) {
List<Arg.Info> argsInfo = Arg.decodeInfo(argsInfoDocValues.lookupOrd(argsInfoDocValues.nextOrd()).utf8ToString());

if (argsInfo.isEmpty() == false) {
assert argsDocValues.docValueCount() == 1;
assert argsInfoDocValues.docValueCount() == 1;
var mergedArgs = argsDocValues.lookupOrd(argsDocValues.nextOrd());
var args = PatternedTextValueProcessor.decodeRemainingArgs(mergedArgs.utf8ToString());
return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, args));
var args = Arg.decodeRemainingArgs(mergedArgs.utf8ToString());
return PatternedTextValueProcessor.merge(template, args, argsInfo);
} else {
return template;
}
Expand All @@ -56,6 +63,7 @@ public BytesRef binaryValue() throws IOException {
@Override
public boolean advanceExact(int i) throws IOException {
argsDocValues.advanceExact(i);
argsInfoDocValues.advanceExact(i);
// If template has a value, then message has a value. We don't have to check args here, since there may not be args for the doc
return templateDocValues.advanceExact(i);
}
Expand All @@ -69,20 +77,24 @@ public int docID() {
public int nextDoc() throws IOException {
int templateNext = templateDocValues.nextDoc();
var argsAdvance = argsDocValues.advance(templateNext);
var argsInfoAdvance = argsInfoDocValues.advance(templateNext);
assert argsAdvance >= templateNext;
assert argsInfoAdvance == templateNext;
return templateNext;
}

@Override
public int advance(int i) throws IOException {
int templateAdvance = templateDocValues.advance(i);
var argsAdvance = argsDocValues.advance(templateAdvance);
var argsInfoAdvance = argsInfoDocValues.advance(templateAdvance);
assert argsAdvance >= templateAdvance;
assert argsInfoAdvance == templateAdvance;
return templateAdvance;
}

@Override
public long cost() {
return templateDocValues.cost() + argsDocValues.cost();
return templateDocValues.cost() + argsDocValues.cost() + argsInfoDocValues.cost();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,13 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
// Add template_id doc_values
context.doc().add(templateIdMapper.buildKeywordField(new BytesRef(parts.templateId())));

// Add args Info
String argsInfoEncoded = Arg.encodeInfo(parts.argsInfo());
context.doc().add(new SortedSetDocValuesField(fieldType().argsInfoFieldName(), new BytesRef(argsInfoEncoded)));

// Add args doc_values
if (parts.args().isEmpty() == false) {
String remainingArgs = PatternedTextValueProcessor.encodeRemainingArgs(parts);
String remainingArgs = Arg.encodeRemainingArgs(parts);
context.doc().add(new SortedSetDocValuesField(fieldType().argsFieldName(), new BytesRef(remainingArgs)));
}
}
Expand All @@ -207,7 +211,12 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
() -> new CompositeSyntheticFieldLoader(
leafName(),
fullPath(),
new PatternedTextSyntheticFieldLoaderLayer(fieldType().name(), fieldType().templateFieldName(), fieldType().argsFieldName())
new PatternedTextSyntheticFieldLoaderLayer(
fieldType().name(),
fieldType().templateFieldName(),
fieldType().argsFieldName(),
fieldType().argsInfoFieldName()
)
)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public class PatternedTextFieldType extends StringFieldType {
private static final String TEMPLATE_SUFFIX = ".template";
private static final String TEMPLATE_ID_SUFFIX = ".template_id";
private static final String ARGS_SUFFIX = ".args";
private static final String ARGS_INFO_SUFFIX = ".args_info";

public static final String CONTENT_TYPE = "patterned_text";

Expand Down Expand Up @@ -272,4 +273,8 @@ String argsFieldName() {
return name() + ARGS_SUFFIX;
}

String argsInfoFieldName() {
return name() + ARGS_INFO_SUFFIX;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ public LeafFieldData loadDirect(LeafReaderContext context) throws IOException {
PatternedTextDocValues docValues = PatternedTextDocValues.from(
leafReader,
fieldType.templateFieldName(),
fieldType.argsFieldName()
fieldType.argsFieldName(),
fieldType.argsInfoFieldName()
);
return new LeafFieldData() {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ class PatternedTextSyntheticFieldLoaderLayer implements CompositeSyntheticFieldL
private final String name;
private final String templateFieldName;
private final String argsFieldName;
private final String argsInfoFieldName;
private PatternedTextSyntheticFieldLoader loader;

PatternedTextSyntheticFieldLoaderLayer(String name, String templateFieldName, String argsFieldName) {
PatternedTextSyntheticFieldLoaderLayer(String name, String templateFieldName, String argsFieldName, String argsInfoFieldName) {
this.name = name;
this.templateFieldName = templateFieldName;
this.argsFieldName = argsFieldName;
this.argsInfoFieldName = argsInfoFieldName;
}

@Override
Expand All @@ -34,7 +36,7 @@ public long valueCount() {

@Override
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, argsFieldName);
var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, argsFieldName, argsInfoFieldName);
if (docValues == null) {
return null;
}
Expand Down
Loading