Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -886,12 +886,10 @@ public AllReader reader(LeafReaderContext context) throws IOException {
}
}

private static class BytesRefsFromBinary extends BlockDocValuesReader {
private final BinaryDocValues docValues;
private final ByteArrayStreamInput in = new ByteArrayStreamInput();
private final BytesRef scratch = new BytesRef();
abstract static class AbstractBytesRefsFromBinary extends BlockDocValuesReader {
protected final BinaryDocValues docValues;

BytesRefsFromBinary(BinaryDocValues docValues) {
AbstractBytesRefsFromBinary(BinaryDocValues docValues) {
this.docValues = docValues;
}

Expand All @@ -911,7 +909,24 @@ public void read(int docId, BlockLoader.StoredFields storedFields, Builder build
read(docId, (BytesRefBuilder) builder);
}

private void read(int doc, BytesRefBuilder builder) throws IOException {
@Override
public int docId() {
return docValues.docID();
}

abstract void read(int docId, BytesRefBuilder builder) throws IOException;
}

static class BytesRefsFromBinary extends AbstractBytesRefsFromBinary {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe rename this BytesRefsFromBinary to BytesRefsFromCustomBinary? Given that it only works with bytes created by CustomBinaryDocValuesField?

private final ByteArrayStreamInput in = new ByteArrayStreamInput();
private final BytesRef scratch = new BytesRef();

BytesRefsFromBinary(BinaryDocValues docValues) {
super(docValues);
}

@Override
void read(int doc, BytesRefBuilder builder) throws IOException {
if (false == docValues.advanceExact(doc)) {
builder.appendNull();
return;
Expand All @@ -937,15 +952,30 @@ private void read(int doc, BytesRefBuilder builder) throws IOException {
}
builder.endPositionEntry();
}
@Override
public String toString() {
return "BlockDocValuesReader.Bytes";
}
}

public static class BytesRefsFromSimpleBinary extends AbstractBytesRefsFromBinary {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The existing BytesRefsFromBinary reader assumes some structure within the ByteRefs provided by the BinaryDocValues. Specifically, a number of value and some length and offsets for each value. The BinaryDocValues created for patterned_text has no such structure: the returned BytesRef contains a single value, which is the message string.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can tell, BytesRefsFromBinary is expecting the bytes read out of the BinaryDocValues to be encoded in the format used by BinaryFieldMapper.CustomBinaryDocValuesField.

Maybe add comments to both BytesRefsFromBinary and BytesRefsFromSimpleBinary explaining which format each expects?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, that's where it's from, thanks for tracking it down! Yeah, definitely makes sense to document how they are encoded. Thanks for the review!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And then let's just rename this BytesRefsFromBinary?

public BytesRefsFromSimpleBinary(BinaryDocValues docValues) {
super(docValues);
}

@Override
public int docId() {
return docValues.docID();
void read(int doc, BytesRefBuilder builder) throws IOException {
if (false == docValues.advanceExact(doc)) {
builder.appendNull();
return;
}
BytesRef bytes = docValues.binaryValue();
builder.appendBytesRef(bytes);
}

@Override
public String toString() {
return "BlockDocValuesReader.Bytes";
return "BlockDocValuesReader.BytesSimple";
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.logsdb.patternedtext;

import org.apache.lucene.index.LeafReaderContext;
import org.elasticsearch.index.mapper.BlockDocValuesReader;

import java.io.IOException;

public class PatternedTextBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {

private final String templateFieldName;
private final String argsFieldName;
private final String argsInfoFieldName;

PatternedTextBlockLoader(String templateFieldName, String argsFieldName, String argsInfoFieldName) {
this.templateFieldName = templateFieldName;
this.argsFieldName = argsFieldName;
this.argsInfoFieldName = argsInfoFieldName;
}

@Override
public BytesRefBuilder builder(BlockFactory factory, int expectedCount) {
return factory.bytesRefs(expectedCount);
}

@Override
public AllReader reader(LeafReaderContext context) throws IOException {
var docValues = PatternedTextDocValues.from(context.reader(), templateFieldName, argsFieldName, argsInfoFieldName);
if (docValues == null) {
return new ConstantNullsReader();
}
return new BlockDocValuesReader.BytesRefsFromSimpleBinary(docValues);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,

@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
return new BlockDocValuesReader.BytesRefsFromBinaryBlockLoader(name());
return new PatternedTextBlockLoader(templateFieldName(), argsFieldName(), argsInfoFieldName());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
---
setup:
- requires:
cluster_features: [ "mapper.patterned_text" ]
reason: "patterned_text mappings are used in this test"

- do:
indices.create:
index: test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
"@timestamp":
type: date
message:
type: patterned_text
# defaults to index_options: docs

- do:
bulk:
index: test
refresh: true
body:
- { "index": { "_id": "1" } }
- { "@timestamp": "2025-07-17T00:00:01Z" }
- { "index": { "_id": "2" } }
- { "message": "Found 5 errors for service [cheddar1]", "@timestamp": "2025-07-17T00:00:02Z" }
# template_id: mOVsnxlxdac
- { "index": { "_id": "3" } }
- { "message": "[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]", "@timestamp": "2025-07-17T00:00:03Z" }
# template_id: 1l_PtCLQ5xY
- { "index": { "_id": "4" } }
- { "message": "Found some errors for cheddar data service", "@timestamp": "2025-07-17T00:00:04Z"}
# template_id: k-2qtjujOCw
- { "index": { "_id": "5" } }
- { "message": "Found 123 errors for service [gorgonzola-24]", "@timestamp": "2025-07-17T00:00:05Z" }
# template_id: mOVsnxlxdac

---
teardown:
- do:
indices.delete:
index: test

---
"Simple from":
- do:
esql.query:
body:
query: 'FROM test | SORT @timestamp | KEEP message, message.template_id | LIMIT 10'

- match: {columns.0.name: "message"}
- match: {columns.0.type: "text"}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be updated to be patterned_text? This comes from the familyType, which is text. So I think leaving as text is correct 🤔

- match: {columns.1.name: "message.template_id"}
- match: {columns.1.type: "keyword"}

- length: {values: 5}
- match: {values.0.0: null }
- match: {values.0.1: null }
- match: {values.1.0: "Found 5 errors for service [cheddar1]" }
- match: {values.1.1: "mOVsnxlxdac" }
- match: {values.2.0: "[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]" }
- match: {values.2.1: "1l_PtCLQ5xY" }
- match: {values.3.0: "Found some errors for cheddar data service" }
- match: {values.3.1: "k-2qtjujOCw" }
- match: {values.4.0: "Found 123 errors for service [gorgonzola-24]" }
- match: {values.4.1: "mOVsnxlxdac" }

---
"match query":
- do:
esql.query:
body:
query: 'FROM test | WHERE MATCH(message, "gorgonzola-24") | KEEP message | LIMIT 10'

- length: {values: 1}
- match: {values.0.0: "Found 123 errors for service [gorgonzola-24]" }

---
"match phrase query":
- do:
esql.query:
body:
query: 'FROM test | WHERE MATCH_PHRASE(message, "123 errors") | SORT @timestamp | KEEP message | LIMIT 10'

- length: {values: 2}
- match: {values.0.0: "[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]" }
- match: {values.1.0: "Found 123 errors for service [gorgonzola-24]" }

---
"template_id stats":
- do:
esql.query:
body:
query: 'FROM test | STATS count(*) BY message.template_id | SORT message.template_id | LIMIT 10'

- length: {values: 4}
- match: {values.0.0: 1 }
- match: {values.0.1: "1l_PtCLQ5xY" }
- match: {values.1.0: 1 }
- match: {values.1.1: "k-2qtjujOCw" }
- match: {values.2.0: 2 }
- match: {values.2.1: "mOVsnxlxdac" }
- match: {values.3.0: 1 }
- match: {values.3.1: null }