Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/133745.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 133745
summary: Supporting more timestamp formats in `_text_structure/find_structure`
area: Machine Learning
type: feature
issues: []
2 changes: 2 additions & 0 deletions libs/grok/src/main/resources/patterns/ecs-v1/grok-patterns
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ ISO8601_SECOND %{SECOND}
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
DATE %{DATE_US}|%{DATE_EU}
DATESTAMP %{DATE}[- ]%{TIME}
DATE_YMD %{YEAR}[./-]%{MONTHNUM2}[./-]%{MONTHDAY}
TIMESTAMP_YMD %{DATE_YMD}[ ]%{TIME}
TZ (?:[APMCE][SD]T|UTC)
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
Expand Down
2 changes: 2 additions & 0 deletions libs/grok/src/main/resources/patterns/legacy/grok-patterns
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ ISO8601_HOUR (?:2[0123]|[01][0-9])
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{ISO8601_HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
DATE %{DATE_US}|%{DATE_EU}
DATESTAMP %{DATE}[- ]%{TIME}
DATE_YMD %{YEAR}[./-]%{MONTHNUM2}[./-]%{MONTHDAY}
TIMESTAMP_YMD %{DATE_YMD}[ ]%{TIME}
TZ (?:[APMCE][SD]T|UTC)
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
Expand Down
1 change: 1 addition & 0 deletions modules/ingest-common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ tasks.named("thirdPartyAudit").configure {

tasks.named("yamlRestCompatTestTransform").configure({ task ->
task.skipTest("ingest/30_date_processor/Test week based date parsing", "week-date behaviour has changed")
task.skipTest("ingest/120_grok/Test Grok Patterns Retrieval", "only counting the number of patterns, which may change")
})

configurations {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ teardown:
"Test Grok Patterns Retrieval":
- do:
ingest.processor_grok: {}
- length: { patterns: 318 }
- length: { patterns: 320 }
- match: { patterns.PATH: "(?:%{UNIXPATH}|%{WINPATH})" }


Expand Down
14 changes: 14 additions & 0 deletions x-pack/plugin/text-structure/build.gradle
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
apply plugin: 'elasticsearch.internal-es-plugin'
apply plugin: 'elasticsearch.internal-java-rest-test'
apply plugin: 'elasticsearch.yaml-rest-compat-test'

esplugin {
name = 'x-pack-text-structure'
description = 'Elasticsearch Expanded Pack Plugin - Text Structure'
Expand All @@ -9,12 +12,23 @@ base {
archivesName = 'x-pack-text-structure'
}

restResources {
restApi {
include '_common', 'cluster', 'text_structure'
}
}

dependencies {
compileOnly project(path: xpackModule('core'))
testImplementation(testArtifact(project(xpackModule('core'))))
testImplementation project(path: ':test:test-clusters')
api project(':libs:grok')
api "com.ibm.icu:icu4j:${versions.icu4j}"
api "net.sf.supercsv:super-csv:${versions.supercsv}"
}

addQaCheckDependencies(project)

tasks.named('javaRestTest') {
usesDefaultDistribution("to be triaged")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.textstructure.rest;

import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.test.cluster.ElasticsearchCluster;
import org.elasticsearch.test.cluster.local.distribution.DistributionType;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.junit.ClassRule;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasKey;

public class TextStructureTimestampFormatsIT extends ESRestTestCase {

public static final String[] ISO_08601_JAVA_FORMATS = new String[] { "yyyy-MM-dd HH:mm:ss" };
public static final String ISO_08601_TIMESTAMP_GROK_PATTERN = "%{TIMESTAMP_ISO8601:timestamp}";

public static final String[] TIMESTAMP_YMD_JAVA_FORMATS = new String[] {
"yyyy/MM/dd HH:mm:ss",
"yyyy.MM.dd HH:mm:ss",
"yyyy-MM-dd HH:mm:ss" };
public static final String TIMESTAMP_YMD_TIMESTAMP_GROK_PATTERN = "%{TIMESTAMP_YMD:timestamp}";

public static final String[] MONTH_EXPLICIT_NAME_JAVA_FORMATS = new String[] { "MMM d, yyyy" };

private final String ecsCompatibility;

@ClassRule
public static ElasticsearchCluster cluster = ElasticsearchCluster.local()
.distribution(DistributionType.DEFAULT)
.module("x-pack-text-structure")
.setting("xpack.security.enabled", "false")
.build();

public TextStructureTimestampFormatsIT(@Name("ecs_compatibility") String ecsCompatibility) {
this.ecsCompatibility = ecsCompatibility;
}

@Override
protected String getTestRestCluster() {
return cluster.getHttpAddresses();
}

@ParametersFactory
public static Iterable<Object[]> parameters() {
return Arrays.asList(new Object[] { "v1" }, new Object[] { "disabled" });
}

public void testTimestampYearYmdSlashFormat() throws IOException {
// use a multi-line sample to ensure we are detecting ndjson format
Map<String, Object> responseMap = executeAndVerifyRequest("""
"2025/07/10 10:30:35"
"2025/07/10 10:31:42"
"2025/07/10 10:32:15"
""", ecsCompatibility);
verifyTimestampDetected(responseMap, "date");
verifyTimestampFormat(responseMap, TIMESTAMP_YMD_TIMESTAMP_GROK_PATTERN, TIMESTAMP_YMD_JAVA_FORMATS);
}

public void testTimestampYearYmdSlashFormat_WithDotAndMillis() throws IOException {
// use a multi-line sample to ensure we are detecting ndjson format
Map<String, Object> responseMap = executeAndVerifyRequest("""
"2025/07/10 10:30:35.123"
"2025/07/10 10:31:42.123"
"2025/07/10 10:32:15.123"
""", ecsCompatibility);
verifyTimestampDetected(responseMap, "date");
verifyTimestampFormat(
responseMap,
TIMESTAMP_YMD_TIMESTAMP_GROK_PATTERN,
"yyyy/MM/dd HH:mm:ss.SSS",
"yyyy.MM.dd HH:mm:ss.SSS",
"yyyy-MM-dd HH:mm:ss.SSS"
);
}

public void testTimestampYearYmdSlashFormat_WithSlashAndNanos() throws IOException {
// use a multi-line sample to ensure we are detecting ndjson format
Map<String, Object> responseMap = executeAndVerifyRequest("""
"2025/07/10 10:30:35,123456789"
"2025/07/10 10:31:42,123456789"
"2025/07/10 10:32:15,123456789"
""", ecsCompatibility);
verifyTimestampDetected(responseMap, "date_nanos");
verifyTimestampFormat(
responseMap,
TIMESTAMP_YMD_TIMESTAMP_GROK_PATTERN,
"yyyy/MM/dd HH:mm:ss,SSSSSSSSS",
"yyyy.MM.dd HH:mm:ss,SSSSSSSSS",
"yyyy-MM-dd HH:mm:ss,SSSSSSSSS"
);
}

public void testTimestampYearYmdDotFormat() throws IOException {
// use a multi-line sample to ensure we are detecting ndjson format
Map<String, Object> responseMap = executeAndVerifyRequest("""
"2025.07.10 10:30:35"
"2025.07.10 10:31:42"
"2025.07.10 10:32:15"
""", ecsCompatibility);
verifyTimestampDetected(responseMap, "date");
verifyTimestampFormat(responseMap, TIMESTAMP_YMD_TIMESTAMP_GROK_PATTERN, TIMESTAMP_YMD_JAVA_FORMATS);
}

public void testIso08601TimestampFormat() throws IOException {
// use a multi-line sample to ensure we are detecting ndjson format
Map<String, Object> responseMap = executeAndVerifyRequest("""
"2025-07-10 10:30:35"
"2025-07-10 10:31:42"
"2025-07-10 10:32:15"
""", ecsCompatibility);
verifyTimestampDetected(responseMap, "date");
// ISO_8601 should have higher priority than TIMESTAMP_YMD
verifyTimestampFormat(responseMap, ISO_08601_TIMESTAMP_GROK_PATTERN, ISO_08601_JAVA_FORMATS);
}

public void testMonthExplicitNameFormat() throws IOException {
// use a multi-line sample to ensure we are detecting ndjson format
Map<String, Object> responseMap = executeAndVerifyRequest("""
"Aug 9, 2025"
"Aug 10, 2025"
"Aug 11, 2025"
""", ecsCompatibility);
verifyTimestampDetected(responseMap, "date");
verifyTimestampFormat(responseMap, "CUSTOM_TIMESTAMP", MONTH_EXPLICIT_NAME_JAVA_FORMATS);
}

private static Map<String, Object> executeAndVerifyRequest(String sample, String ecsCompatibility) throws IOException {
Request request = new Request("POST", "/_text_structure/find_structure");
request.addParameter("ecs_compatibility", ecsCompatibility);
request.setEntity(new StringEntity(sample, ContentType.APPLICATION_JSON));
Response response = client().performRequest(request);
assertOK(response);
return entityAsMap(response);
}

private static void verifyTimestampDetected(Map<String, Object> responseMap, String expectedType) {
@SuppressWarnings("unchecked")
Map<String, Object> mappings = (Map<String, Object>) responseMap.get("mappings");
assertThat(mappings, hasKey("properties"));
@SuppressWarnings("unchecked")
Map<String, Object> properties = (Map<String, Object>) mappings.get("properties");
assertThat(properties, hasKey("@timestamp"));
@SuppressWarnings("unchecked")
Map<String, Object> timestamp = (Map<String, Object>) properties.get("@timestamp");
assertThat(timestamp.get("type"), equalTo(expectedType));
}

private static void verifyTimestampFormat(Map<String, Object> responseMap, String expectedGrokPattern, String... expectedJavaFormats) {
assertThat(responseMap, hasKey("java_timestamp_formats"));
@SuppressWarnings("unchecked")
List<String> javaTimestampFormats = (List<String>) responseMap.get("java_timestamp_formats");
assertThat(javaTimestampFormats, containsInAnyOrder(expectedJavaFormats));
String grokPattern = (String) responseMap.get("grok_pattern");
assertThat(grokPattern, containsString(expectedGrokPattern));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,28 @@ public final class TimestampFormatFinder {
Arrays.asList(" 11 1111 11 11 11 111", " 1 1111 11 11 11 111"),
0,
0
),
new CandidateTimestampFormat(
example -> Arrays.asList(
CandidateTimestampFormat.adjustFractionalSecondsFromEndOfExample(example, "yyyy/MM/dd HH:mm:ss"),
CandidateTimestampFormat.adjustFractionalSecondsFromEndOfExample(example, "yyyy.MM.dd HH:mm:ss"),
CandidateTimestampFormat.adjustFractionalSecondsFromEndOfExample(example, "yyyy-MM-dd HH:mm:ss")
),
"\\b\\d{4}[./-]\\d{2}[./-]\\d{2} \\d{2}:\\d{2}:\\d{2}(?:[.,]\\d+)?\\b",
"\\b%{TIMESTAMP_YMD}\\b",
"TIMESTAMP_YMD",
List.of("1111 11 11 11 11 11"),
0,
10
),
new CandidateTimestampFormat(
example -> Collections.singletonList("MMM d, yyyy"),
"\\b[A-Z][a-z]{2} \\d{1,2}, \\d{4}\\b",
"\\b%{MONTH} %{MONTHDAY}, %{YEAR}\\b",
CUSTOM_TIMESTAMP_GROK_NAME,
Arrays.asList(" 11 1111", " 1 1111"),
5,
0
)
);

Expand Down
Loading