Skip to content

Commit f3f19cd

Browse files
authored
Supporting more timestamp formats in _text_structure/find_structure (#133745)
1 parent 88f55db commit f3f19cd

File tree

10 files changed

+414
-7
lines changed

10 files changed

+414
-7
lines changed

docs/changelog/133745.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 133745
2+
summary: Supporting more timestamp formats in `_text_structure/find_structure`
3+
area: Machine Learning
4+
type: feature
5+
issues: []

libs/grok/src/main/resources/patterns/ecs-v1/grok-patterns

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ ISO8601_SECOND %{SECOND}
7171
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
7272
DATE %{DATE_US}|%{DATE_EU}
7373
DATESTAMP %{DATE}[- ]%{TIME}
74+
DATE_YMD %{YEAR}[./-]%{MONTHNUM2}[./-]%{MONTHDAY}
75+
TIMESTAMP_YMD %{DATE_YMD}[ ]%{TIME}
7476
TZ (?:[APMCE][SD]T|UTC)
7577
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
7678
DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}

libs/grok/src/main/resources/patterns/legacy/grok-patterns

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ ISO8601_HOUR (?:2[0123]|[01][0-9])
7272
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{ISO8601_HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
7373
DATE %{DATE_US}|%{DATE_EU}
7474
DATESTAMP %{DATE}[- ]%{TIME}
75+
DATE_YMD %{YEAR}[./-]%{MONTHNUM2}[./-]%{MONTHDAY}
76+
TIMESTAMP_YMD %{DATE_YMD}[ ]%{TIME}
7577
TZ (?:[APMCE][SD]T|UTC)
7678
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
7779
DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}

modules/ingest-common/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ tasks.named("thirdPartyAudit").configure {
5353

5454
tasks.named("yamlRestCompatTestTransform").configure({ task ->
5555
task.skipTest("ingest/30_date_processor/Test week based date parsing", "week-date behaviour has changed")
56+
task.skipTest("ingest/120_grok/Test Grok Patterns Retrieval", "only counting the number of patterns, which may change")
5657
})
5758

5859
configurations {

modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/120_grok.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ teardown:
152152
"Test Grok Patterns Retrieval":
153153
- do:
154154
ingest.processor_grok: {}
155-
- length: { patterns: 318 }
155+
- length: { patterns: 320 }
156156
- match: { patterns.PATH: "(?:%{UNIXPATH}|%{WINPATH})" }
157157
158158

x-pack/plugin/text-structure/build.gradle

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
apply plugin: 'elasticsearch.internal-es-plugin'
2+
apply plugin: 'elasticsearch.internal-java-rest-test'
3+
apply plugin: 'elasticsearch.yaml-rest-compat-test'
4+
25
esplugin {
36
name = 'x-pack-text-structure'
47
description = 'Elasticsearch Expanded Pack Plugin - Text Structure'
@@ -9,12 +12,23 @@ base {
912
archivesName = 'x-pack-text-structure'
1013
}
1114

15+
restResources {
16+
restApi {
17+
include '_common', 'cluster', 'text_structure'
18+
}
19+
}
20+
1221
dependencies {
1322
compileOnly project(path: xpackModule('core'))
1423
testImplementation(testArtifact(project(xpackModule('core'))))
24+
testImplementation project(path: ':test:test-clusters')
1525
api project(':libs:grok')
1626
api "com.ibm.icu:icu4j:${versions.icu4j}"
1727
api "net.sf.supercsv:super-csv:${versions.supercsv}"
1828
}
1929

2030
addQaCheckDependencies(project)
31+
32+
tasks.named('javaRestTest') {
33+
usesDefaultDistribution("to be triaged")
34+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
package org.elasticsearch.xpack.textstructure.rest;
8+
9+
import com.carrotsearch.randomizedtesting.annotations.Name;
10+
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
11+
12+
import org.apache.http.entity.ContentType;
13+
import org.apache.http.entity.StringEntity;
14+
import org.elasticsearch.client.Request;
15+
import org.elasticsearch.client.Response;
16+
import org.elasticsearch.test.cluster.ElasticsearchCluster;
17+
import org.elasticsearch.test.cluster.local.distribution.DistributionType;
18+
import org.elasticsearch.test.rest.ESRestTestCase;
19+
import org.junit.ClassRule;
20+
21+
import java.io.IOException;
22+
import java.util.Arrays;
23+
import java.util.List;
24+
import java.util.Map;
25+
26+
import static org.hamcrest.Matchers.containsInAnyOrder;
27+
import static org.hamcrest.Matchers.containsString;
28+
import static org.hamcrest.Matchers.equalTo;
29+
import static org.hamcrest.Matchers.hasKey;
30+
31+
public class TextStructureTimestampFormatsIT extends ESRestTestCase {
32+
33+
public static final String[] ISO_08601_JAVA_FORMATS = new String[] { "yyyy-MM-dd HH:mm:ss" };
34+
public static final String ISO_08601_TIMESTAMP_GROK_PATTERN = "%{TIMESTAMP_ISO8601:timestamp}";
35+
36+
public static final String[] TIMESTAMP_YMD_JAVA_FORMATS = new String[] {
37+
"yyyy/MM/dd HH:mm:ss",
38+
"yyyy.MM.dd HH:mm:ss",
39+
"yyyy-MM-dd HH:mm:ss" };
40+
public static final String TIMESTAMP_YMD_TIMESTAMP_GROK_PATTERN = "%{TIMESTAMP_YMD:timestamp}";
41+
42+
public static final String[] MONTH_EXPLICIT_NAME_JAVA_FORMATS = new String[] { "MMM d, yyyy" };
43+
44+
private final String ecsCompatibility;
45+
46+
@ClassRule
47+
public static ElasticsearchCluster cluster = ElasticsearchCluster.local()
48+
.distribution(DistributionType.DEFAULT)
49+
.module("x-pack-text-structure")
50+
.setting("xpack.security.enabled", "false")
51+
.build();
52+
53+
public TextStructureTimestampFormatsIT(@Name("ecs_compatibility") String ecsCompatibility) {
54+
this.ecsCompatibility = ecsCompatibility;
55+
}
56+
57+
@Override
58+
protected String getTestRestCluster() {
59+
return cluster.getHttpAddresses();
60+
}
61+
62+
@ParametersFactory
63+
public static Iterable<Object[]> parameters() {
64+
return Arrays.asList(new Object[] { "v1" }, new Object[] { "disabled" });
65+
}
66+
67+
public void testTimestampYearYmdSlashFormat() throws IOException {
68+
// use a multi-line sample to ensure we are detecting ndjson format
69+
Map<String, Object> responseMap = executeAndVerifyRequest("""
70+
"2025/07/10 10:30:35"
71+
"2025/07/10 10:31:42"
72+
"2025/07/10 10:32:15"
73+
""", ecsCompatibility);
74+
verifyTimestampDetected(responseMap, "date");
75+
verifyTimestampFormat(responseMap, TIMESTAMP_YMD_TIMESTAMP_GROK_PATTERN, TIMESTAMP_YMD_JAVA_FORMATS);
76+
}
77+
78+
public void testTimestampYearYmdSlashFormat_WithDotAndMillis() throws IOException {
79+
// use a multi-line sample to ensure we are detecting ndjson format
80+
Map<String, Object> responseMap = executeAndVerifyRequest("""
81+
"2025/07/10 10:30:35.123"
82+
"2025/07/10 10:31:42.123"
83+
"2025/07/10 10:32:15.123"
84+
""", ecsCompatibility);
85+
verifyTimestampDetected(responseMap, "date");
86+
verifyTimestampFormat(
87+
responseMap,
88+
TIMESTAMP_YMD_TIMESTAMP_GROK_PATTERN,
89+
"yyyy/MM/dd HH:mm:ss.SSS",
90+
"yyyy.MM.dd HH:mm:ss.SSS",
91+
"yyyy-MM-dd HH:mm:ss.SSS"
92+
);
93+
}
94+
95+
public void testTimestampYearYmdSlashFormat_WithSlashAndNanos() throws IOException {
96+
// use a multi-line sample to ensure we are detecting ndjson format
97+
Map<String, Object> responseMap = executeAndVerifyRequest("""
98+
"2025/07/10 10:30:35,123456789"
99+
"2025/07/10 10:31:42,123456789"
100+
"2025/07/10 10:32:15,123456789"
101+
""", ecsCompatibility);
102+
verifyTimestampDetected(responseMap, "date_nanos");
103+
verifyTimestampFormat(
104+
responseMap,
105+
TIMESTAMP_YMD_TIMESTAMP_GROK_PATTERN,
106+
"yyyy/MM/dd HH:mm:ss,SSSSSSSSS",
107+
"yyyy.MM.dd HH:mm:ss,SSSSSSSSS",
108+
"yyyy-MM-dd HH:mm:ss,SSSSSSSSS"
109+
);
110+
}
111+
112+
public void testTimestampYearYmdDotFormat() throws IOException {
113+
// use a multi-line sample to ensure we are detecting ndjson format
114+
Map<String, Object> responseMap = executeAndVerifyRequest("""
115+
"2025.07.10 10:30:35"
116+
"2025.07.10 10:31:42"
117+
"2025.07.10 10:32:15"
118+
""", ecsCompatibility);
119+
verifyTimestampDetected(responseMap, "date");
120+
verifyTimestampFormat(responseMap, TIMESTAMP_YMD_TIMESTAMP_GROK_PATTERN, TIMESTAMP_YMD_JAVA_FORMATS);
121+
}
122+
123+
public void testIso08601TimestampFormat() throws IOException {
124+
// use a multi-line sample to ensure we are detecting ndjson format
125+
Map<String, Object> responseMap = executeAndVerifyRequest("""
126+
"2025-07-10 10:30:35"
127+
"2025-07-10 10:31:42"
128+
"2025-07-10 10:32:15"
129+
""", ecsCompatibility);
130+
verifyTimestampDetected(responseMap, "date");
131+
// ISO_8601 should have higher priority than TIMESTAMP_YMD
132+
verifyTimestampFormat(responseMap, ISO_08601_TIMESTAMP_GROK_PATTERN, ISO_08601_JAVA_FORMATS);
133+
}
134+
135+
public void testMonthExplicitNameFormat() throws IOException {
136+
// use a multi-line sample to ensure we are detecting ndjson format
137+
Map<String, Object> responseMap = executeAndVerifyRequest("""
138+
"Aug 9, 2025"
139+
"Aug 10, 2025"
140+
"Aug 11, 2025"
141+
""", ecsCompatibility);
142+
verifyTimestampDetected(responseMap, "date");
143+
verifyTimestampFormat(responseMap, "CUSTOM_TIMESTAMP", MONTH_EXPLICIT_NAME_JAVA_FORMATS);
144+
}
145+
146+
private static Map<String, Object> executeAndVerifyRequest(String sample, String ecsCompatibility) throws IOException {
147+
Request request = new Request("POST", "/_text_structure/find_structure");
148+
request.addParameter("ecs_compatibility", ecsCompatibility);
149+
request.setEntity(new StringEntity(sample, ContentType.APPLICATION_JSON));
150+
Response response = client().performRequest(request);
151+
assertOK(response);
152+
return entityAsMap(response);
153+
}
154+
155+
private static void verifyTimestampDetected(Map<String, Object> responseMap, String expectedType) {
156+
@SuppressWarnings("unchecked")
157+
Map<String, Object> mappings = (Map<String, Object>) responseMap.get("mappings");
158+
assertThat(mappings, hasKey("properties"));
159+
@SuppressWarnings("unchecked")
160+
Map<String, Object> properties = (Map<String, Object>) mappings.get("properties");
161+
assertThat(properties, hasKey("@timestamp"));
162+
@SuppressWarnings("unchecked")
163+
Map<String, Object> timestamp = (Map<String, Object>) properties.get("@timestamp");
164+
assertThat(timestamp.get("type"), equalTo(expectedType));
165+
}
166+
167+
private static void verifyTimestampFormat(Map<String, Object> responseMap, String expectedGrokPattern, String... expectedJavaFormats) {
168+
assertThat(responseMap, hasKey("java_timestamp_formats"));
169+
@SuppressWarnings("unchecked")
170+
List<String> javaTimestampFormats = (List<String>) responseMap.get("java_timestamp_formats");
171+
assertThat(javaTimestampFormats, containsInAnyOrder(expectedJavaFormats));
172+
String grokPattern = (String) responseMap.get("grok_pattern");
173+
assertThat(grokPattern, containsString(expectedGrokPattern));
174+
}
175+
}

x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TimestampFormatFinder.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,28 @@ public final class TimestampFormatFinder {
287287
Arrays.asList(" 11 1111 11 11 11 111", " 1 1111 11 11 11 111"),
288288
0,
289289
0
290+
),
291+
new CandidateTimestampFormat(
292+
example -> Arrays.asList(
293+
CandidateTimestampFormat.adjustFractionalSecondsFromEndOfExample(example, "yyyy/MM/dd HH:mm:ss"),
294+
CandidateTimestampFormat.adjustFractionalSecondsFromEndOfExample(example, "yyyy.MM.dd HH:mm:ss"),
295+
CandidateTimestampFormat.adjustFractionalSecondsFromEndOfExample(example, "yyyy-MM-dd HH:mm:ss")
296+
),
297+
"\\b\\d{4}[./-]\\d{2}[./-]\\d{2} \\d{2}:\\d{2}:\\d{2}(?:[.,]\\d+)?\\b",
298+
"\\b%{TIMESTAMP_YMD}\\b",
299+
"TIMESTAMP_YMD",
300+
List.of("1111 11 11 11 11 11"),
301+
0,
302+
10
303+
),
304+
new CandidateTimestampFormat(
305+
example -> Collections.singletonList("MMM d, yyyy"),
306+
"\\b[A-Z][a-z]{2} \\d{1,2}, \\d{4}\\b",
307+
"\\b%{MONTH} %{MONTHDAY}, %{YEAR}\\b",
308+
CUSTOM_TIMESTAMP_GROK_NAME,
309+
Arrays.asList(" 11 1111", " 1 1111"),
310+
5,
311+
0
290312
)
291313
);
292314

0 commit comments

Comments
 (0)