elastic · parkertimmins · Aug 28, 2025 · Aug 13, 2025 · Aug 13, 2025 · Aug 14, 2025
diff --git a/...db/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextDocValues.java b/...db/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextDocValues.java
@@ -14,35 +14,44 @@
 import org.apache.lucene.util.BytesRef;
 
 import java.io.IOException;
+import java.util.List;
 
 public class PatternedTextDocValues extends BinaryDocValues {
     private final SortedSetDocValues templateDocValues;
     private final SortedSetDocValues argsDocValues;
+    private final SortedSetDocValues argsSchemaDocValues;
 
-    PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues) {
+    PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues, SortedSetDocValues argsSchemaDocValues) {
         this.templateDocValues = templateDocValues;
         this.argsDocValues = argsDocValues;
+        this.argsSchemaDocValues = argsSchemaDocValues;
     }
 
-    static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName) throws IOException {
+    static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName, String argsSchemaFieldName)
+        throws IOException {
         SortedSetDocValues templateDocValues = DocValues.getSortedSet(leafReader, templateFieldName);
         if (templateDocValues.getValueCount() == 0) {
             return null;
         }
 
         SortedSetDocValues argsDocValues = DocValues.getSortedSet(leafReader, argsFieldName);
-        return new PatternedTextDocValues(templateDocValues, argsDocValues);
+        SortedSetDocValues argsSchemaDocValues = DocValues.getSortedSet(leafReader, argsSchemaFieldName);
+        return new PatternedTextDocValues(templateDocValues, argsDocValues, argsSchemaDocValues);
     }
 
     private String getNextStringValue() throws IOException {
         assert templateDocValues.docValueCount() == 1;
         String template = templateDocValues.lookupOrd(templateDocValues.nextOrd()).utf8ToString();
-        int argsCount = PatternedTextValueProcessor.countArgs(template);
-        if (argsCount > 0) {
+        List<PatternedTextValueProcessor.ArgSchema> argsSchema = PatternedTextValueProcessor.decodeArgumentSchema(
+            argsSchemaDocValues.lookupOrd(argsSchemaDocValues.nextOrd()).utf8ToString()
+        );
+
+        if (argsSchema.isEmpty() == false) {
             assert argsDocValues.docValueCount() == 1;
+            assert argsSchemaDocValues.docValueCount() == 1;
             var mergedArgs = argsDocValues.lookupOrd(argsDocValues.nextOrd());
             var args = PatternedTextValueProcessor.decodeRemainingArgs(mergedArgs.utf8ToString());
-            return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, args));
+            return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, args, argsSchema));
         } else {
             return template;
         }
@@ -56,6 +65,7 @@ public BytesRef binaryValue() throws IOException {
     @Override
     public boolean advanceExact(int i) throws IOException {
         argsDocValues.advanceExact(i);
+        argsSchemaDocValues.advanceExact(i);
         // If template has a value, then message has a value. We don't have to check args here, since there may not be args for the doc
         return templateDocValues.advanceExact(i);
     }
@@ -69,20 +79,24 @@ public int docID() {
     public int nextDoc() throws IOException {
         int templateNext = templateDocValues.nextDoc();
         var argsAdvance = argsDocValues.advance(templateNext);
+        var argsSchemaAdvance = argsSchemaDocValues.advance(templateNext);
         assert argsAdvance >= templateNext;
+        assert argsSchemaAdvance == templateNext;
         return templateNext;
     }
 
     @Override
     public int advance(int i) throws IOException {
         int templateAdvance = templateDocValues.advance(i);
         var argsAdvance = argsDocValues.advance(templateAdvance);
+        var argsSchemaAdvance = argsSchemaDocValues.advance(templateAdvance);
         assert argsAdvance >= templateAdvance;
+        assert argsSchemaAdvance == templateAdvance;
         return templateAdvance;
     }
 
     @Override
     public long cost() {
-        return templateDocValues.cost() + argsDocValues.cost();
+        return templateDocValues.cost() + argsDocValues.cost() + argsSchemaDocValues.cost();
     }
 }
diff --git a/.../src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapper.java b/.../src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapper.java
@@ -184,6 +184,10 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
         // Add template_id doc_values
         context.doc().add(templateIdMapper.buildKeywordField(new BytesRef(parts.templateId())));
 
+        // Add args schema
+        String argsSchemaEncoded = PatternedTextValueProcessor.encodeArgumentSchema(parts.schemas());
+        context.doc().add(new SortedSetDocValuesField(fieldType().argsSchemaFieldName(), new BytesRef(argsSchemaEncoded)));
+
         // Add args doc_values
         if (parts.args().isEmpty() == false) {
             String remainingArgs = PatternedTextValueProcessor.encodeRemainingArgs(parts);
@@ -207,7 +211,12 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
             () -> new CompositeSyntheticFieldLoader(
                 leafName(),
                 fullPath(),
-                new PatternedTextSyntheticFieldLoaderLayer(fieldType().name(), fieldType().templateFieldName(), fieldType().argsFieldName())
+                new PatternedTextSyntheticFieldLoaderLayer(
+                    fieldType().name(),
+                    fieldType().templateFieldName(),
+                    fieldType().argsFieldName(),
+                    fieldType().argsSchemaFieldName()
+                )
             )
         );
     }

diff --git a/...db/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldType.java b/...db/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldType.java
@@ -57,6 +57,7 @@ public class PatternedTextFieldType extends StringFieldType {
     private static final String TEMPLATE_SUFFIX = ".template";
     private static final String TEMPLATE_ID_SUFFIX = ".template_id";
     private static final String ARGS_SUFFIX = ".args";
+    private static final String ARGS_SCHEMA_SUFFIX = ".args_schema";
 
     public static final String CONTENT_TYPE = "patterned_text";
 
@@ -272,4 +273,8 @@ String argsFieldName() {
         return name() + ARGS_SUFFIX;
     }
 
+    String argsSchemaFieldName() {
+        return name() + ARGS_SCHEMA_SUFFIX;
+    }
+
 }
diff --git a/...c/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextIndexFieldData.java b/...c/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextIndexFieldData.java
@@ -75,7 +75,8 @@ public LeafFieldData loadDirect(LeafReaderContext context) throws IOException {
         PatternedTextDocValues docValues = PatternedTextDocValues.from(
             leafReader,
             fieldType.templateFieldName(),
-            fieldType.argsFieldName()
+            fieldType.argsFieldName(),
+            fieldType.argsSchemaFieldName()
         );
         return new LeafFieldData() {
 

diff --git a/.../org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextSyntheticFieldLoaderLayer.java b/.../org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextSyntheticFieldLoaderLayer.java
@@ -19,12 +19,14 @@ class PatternedTextSyntheticFieldLoaderLayer implements CompositeSyntheticFieldL
     private final String name;
     private final String templateFieldName;
     private final String argsFieldName;
+    private final String argsSchemaFieldName;
     private PatternedTextSyntheticFieldLoader loader;
 
-    PatternedTextSyntheticFieldLoaderLayer(String name, String templateFieldName, String argsFieldName) {
+    PatternedTextSyntheticFieldLoaderLayer(String name, String templateFieldName, String argsFieldName, String argsSchemaFieldName) {
         this.name = name;
         this.templateFieldName = templateFieldName;
         this.argsFieldName = argsFieldName;
+        this.argsSchemaFieldName = argsSchemaFieldName;
     }
 
     @Override
@@ -34,7 +36,7 @@ public long valueCount() {
 
     @Override
     public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
-        var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, argsFieldName);
+        var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, argsFieldName, argsSchemaFieldName);
         if (docValues == null) {
             return null;
         }

diff --git a/...c/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextValueProcessor.java b/...c/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextValueProcessor.java
@@ -7,26 +7,96 @@
 
 package org.elasticsearch.xpack.logsdb.patternedtext;
 
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.hash.MurmurHash3;
 import org.elasticsearch.common.util.ByteUtils;
 
+import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Base64;
 import java.util.List;
 
 public class PatternedTextValueProcessor {
-    private static final String TEXT_ARG_PLACEHOLDER = "%W";
+    private static final String ARG_PLACEHOLDER = "%";
     private static final String DELIMITER = "[\\s\\[\\]]";
     private static final String SPACE = " ";
 
-    record Parts(String template, String templateId, List<String> args) {
-        Parts(String template, List<String> args) {
-            this(template, PatternedTextValueProcessor.templateId(template), args);
+    record Parts(String template, String templateId, List<String> args, List<ArgSchema> schemas) {
+        Parts(String template, List<String> args, List<ArgSchema> schemas) {
+            this(template, PatternedTextValueProcessor.templateId(template), args, schemas);
+        }
+
+        Parts(String template, List<String> args, String encodedArgsSchema) throws IOException {
+            this(template, PatternedTextValueProcessor.templateId(template), args, decodeArgumentSchema(encodedArgsSchema));
         }
     }
 
+    enum ArgType {
+        GENERAL(0),
+        IP4(1),
+        INTEGER(2);
+
+        private final int code;
+        private static final ArgType[] lookup = new ArgType[values().length];
+        static {
+            for (ArgType type : values()) {
+                lookup[type.code] = type;
+            }
+        }
+
+        ArgType(int code) {
+            this.code = code;
+        }
+
+        public int toCode() {
+            return code;
+        }
+
+        public static ArgType fromCode(int code) {
+            return lookup[code];
+        }
+    }
+
+    record ArgSchema(ArgType type, int offsetInTemplate) {
+
+    }
+
+    static String encodeArgumentSchema(List<ArgSchema> arguments) throws IOException {
+        int maxSize = Integer.BYTES + arguments.size() * (Integer.BYTES + Integer.BYTES);
+        byte[] buffer = new byte[maxSize];
+        var dataInput = new ByteArrayDataOutput(buffer);
+        dataInput.writeVInt(arguments.size());
+        for (var arg : arguments) {
+            dataInput.writeVInt(arg.type.toCode());
+            dataInput.writeVInt(arg.offsetInTemplate);
+        }
+
+        int size = dataInput.getPosition();
+        byte[] data = Arrays.copyOfRange(buffer, 0, size);
+        return Strings.BASE_64_NO_PADDING_URL_ENCODER.encodeToString(data);
+    }
+
+    private static final Base64.Decoder DECODER = Base64.getUrlDecoder();
+
+    static List<ArgSchema> decodeArgumentSchema(String encoded) throws IOException {
+        byte[] encodedBytes = DECODER.decode(encoded);
+        var input = new ByteArrayDataInput(encodedBytes);
+
+        int numArgs = input.readVInt();
+        List<ArgSchema> arguments = new ArrayList<>(numArgs);
+
+        for (int i = 0; i < numArgs; i++) {
+            var argType = ArgType.fromCode(input.readVInt());
+            int offsetInTemplate = input.readVInt();
+            arguments.add(new ArgSchema(argType, offsetInTemplate));
+        }
+        return arguments;
+    }
+
     static String templateId(String template) {
         byte[] bytes = template.getBytes(StandardCharsets.UTF_8);
         MurmurHash3.Hash128 hash = new MurmurHash3.Hash128();
@@ -36,33 +106,36 @@ static String templateId(String template) {
         return Strings.BASE_64_NO_PADDING_URL_ENCODER.encodeToString(hashBytes);
     }
 
-    static Parts split(String text) {
+    static Parts split(String text) throws IOException {
         StringBuilder template = new StringBuilder();
         List<String> args = new ArrayList<>();
+        List<ArgSchema> schemas = new ArrayList<>();
         String[] tokens = text.split(DELIMITER);
         int textIndex = 0;
         for (String token : tokens) {
             if (token.isEmpty()) {
+                // add the previous delimiter
                 if (textIndex < text.length() - 1) {
                     template.append(text.charAt(textIndex++));
                 }
-                continue;
-            }
-            if (isArg(token)) {
-                args.add(token);
-                template.append(TEXT_ARG_PLACEHOLDER);
             } else {
-                template.append(token);
-            }
-            textIndex += token.length();
-            if (textIndex < text.length()) {
-                template.append(text.charAt(textIndex++));
+                if (isArg(token)) {
+                    args.add(token);
+                    schemas.add(new ArgSchema(ArgType.GENERAL, template.length()));
+                    template.append(ARG_PLACEHOLDER);
+                } else {
+                    template.append(token);
+                }
+                textIndex += token.length();
+                if (textIndex < text.length()) {
+                    template.append(text.charAt(textIndex++));
+                }
             }
         }
         while (textIndex < text.length()) {
             template.append(text.charAt(textIndex++));
         }
-        return new Parts(template.toString(), args);
+        return new Parts(template.toString(), args, schemas);
     }
 
     private static boolean isArg(String text) {
@@ -76,25 +149,20 @@ private static boolean isArg(String text) {
 
     static String merge(Parts parts) {
         StringBuilder builder = new StringBuilder();
-        String[] templateParts = parts.template.split(DELIMITER);
-        int i = 0;
-        int templateIndex = 0;
-        for (String part : templateParts) {
-            if (part.equals(TEXT_ARG_PLACEHOLDER)) {
-                builder.append(parts.args.get(i++));
-                templateIndex += TEXT_ARG_PLACEHOLDER.length();
-            } else if (part.isEmpty() == false) {
-                builder.append(part);
-                templateIndex += part.length();
-            }
-            if (templateIndex < parts.template.length()) {
-                builder.append(parts.template.charAt(templateIndex++));
-            }
+        int numArgs = parts.args.size();
+
+        int lastWritten = 0;
+        for (int i = 0; i < numArgs; i++) {
+            String arg = parts.args.get(i);
+            ArgSchema argSchema = parts.schemas.get(i);
+
+            builder.append(parts.template, lastWritten, argSchema.offsetInTemplate);
+            builder.append(arg);
+            lastWritten = argSchema.offsetInTemplate + ARG_PLACEHOLDER.length();
         }
-        assert i == parts.args.size() : "expected " + i + " but got " + parts.args.size();
-        assert builder.toString().contains(TEXT_ARG_PLACEHOLDER) == false : builder.toString();
-        while (templateIndex < parts.template.length()) {
-            builder.append(parts.template.charAt(templateIndex++));
+
+        if (lastWritten < parts.template.length()) {
+            builder.append(parts.template.substring(lastWritten));
         }
         return builder.toString();
     }
@@ -106,18 +174,4 @@ static String encodeRemainingArgs(Parts parts) {
     static List<String> decodeRemainingArgs(String mergedArgs) {
         return Arrays.asList(mergedArgs.split(SPACE));
     }
-
-    static int countArgs(String template) {
-        int count = 0;
-        for (int i = 0; i < template.length() - 1; i++) {
-            if (template.charAt(i) == '%') {
-                char next = template.charAt(i + 1);
-                if (next == 'W') {
-                    count++;
-                    i++;
-                }
-            }
-        }
-        return count;
-    }
 }