Skip to content

Commit 231f767

Browse files
committed
Fix ESQL load for unmapped numerics with synthetic source (#143916)
Add forUnmappedLoad flag to KeywordFieldType so only the unmapped-keyword path (created by ESQL for PotentiallyUnmappedKeywordEsField) converts decoded Object values to string in the fallback synthetic source reader; mapped keyword fields keep assuming BytesRef. Add synthetic-source test cases to unmapped-load and unmapped-nullify: - partial_mapping_synthetic_sample_data (unmapped_event_duration as long) - unmappedNumericFromK8sSyntheticTsIndex (TS k8s_unmapped, network.cost) Made-with: Cursor
1 parent 554f7d0 commit 231f767

File tree

7 files changed

+126
-4
lines changed

7 files changed

+126
-4
lines changed

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -556,14 +556,16 @@ public static final class KeywordFieldType extends TextFamilyFieldType {
556556
private final boolean isDimension;
557557
private final boolean usesBinaryDocValues;
558558
private final boolean usesBinaryDocValuesForIgnoredFields;
559+
private final boolean forUnmappedLoad;
559560

560561
public KeywordFieldType(
561562
String name,
562563
IndexType indexType,
563564
TextSearchInfo textSearchInfo,
564565
NamedAnalyzer normalizer,
565566
Builder builder,
566-
boolean isSyntheticSource
567+
boolean isSyntheticSource,
568+
boolean forUnmappedLoad
567569
) {
568570
super(
569571
name,
@@ -587,6 +589,18 @@ public KeywordFieldType(
587589
this.usesBinaryDocValues = builder.usesBinaryDocValues();
588590
this.usesBinaryDocValuesForIgnoredFields = builder.indexSettings.getIndexVersionCreated()
589591
.onOrAfter(IndexVersions.STORE_IGNORED_KEYWORDS_IN_BINARY_DOC_VALUES);
592+
this.forUnmappedLoad = forUnmappedLoad;
593+
}
594+
595+
public KeywordFieldType(
596+
String name,
597+
IndexType indexType,
598+
TextSearchInfo textSearchInfo,
599+
NamedAnalyzer normalizer,
600+
Builder builder,
601+
boolean isSyntheticSource
602+
) {
603+
this(name, indexType, textSearchInfo, normalizer, builder, isSyntheticSource, false);
590604
}
591605

592606
public KeywordFieldType(String name) {
@@ -613,6 +627,7 @@ public KeywordFieldType(
613627
this.isDimension = false;
614628
this.usesBinaryDocValues = usesBinaryDocValues;
615629
this.usesBinaryDocValuesForIgnoredFields = false;
630+
this.forUnmappedLoad = false;
616631
}
617632

618633
public KeywordFieldType(String name, FieldType fieldType, boolean isSyntheticSource) {
@@ -633,6 +648,7 @@ public KeywordFieldType(String name, FieldType fieldType, boolean isSyntheticSou
633648
this.isDimension = false;
634649
this.usesBinaryDocValues = false;
635650
this.usesBinaryDocValuesForIgnoredFields = false;
651+
this.forUnmappedLoad = false;
636652
}
637653

638654
public KeywordFieldType(String name, NamedAnalyzer analyzer) {
@@ -653,6 +669,7 @@ public KeywordFieldType(String name, NamedAnalyzer analyzer) {
653669
this.isDimension = false;
654670
this.usesBinaryDocValues = false;
655671
this.usesBinaryDocValuesForIgnoredFields = false;
672+
this.forUnmappedLoad = false;
656673
}
657674

658675
public boolean usesBinaryDocValues() {
@@ -933,10 +950,20 @@ private FallbackSyntheticSourceBlockLoader.Reader<?> fallbackSyntheticSourceBloc
933950
return new FallbackSyntheticSourceBlockLoader.SingleValueReader<BytesRef>(nullValueBytes) {
934951
@Override
935952
public void convertValue(Object value, List<BytesRef> accumulator) {
936-
String stringValue = ((BytesRef) value).utf8ToString();
953+
final String stringValue;
954+
if (forUnmappedLoad) {
955+
if (value == null) {
956+
if (nullValueBytes != null) {
957+
accumulator.add(nullValueBytes);
958+
}
959+
return;
960+
}
961+
stringValue = value instanceof BytesRef br ? br.utf8ToString() : value.toString();
962+
} else {
963+
stringValue = ((BytesRef) value).utf8ToString();
964+
}
937965
String adjusted = applyIgnoreAboveAndNormalizer(stringValue);
938966
if (adjusted != null) {
939-
// TODO what if the value didn't change?
940967
accumulator.add(new BytesRef(adjusted));
941968
}
942969
}

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ public class CsvTestsDataLoader {
108108
new TestDataset("ul_logs"),
109109
new TestDataset("sample_data"),
110110
new TestDataset("partial_mapping_sample_data"),
111+
new TestDataset(
112+
"partial_mapping_synthetic_sample_data",
113+
"mapping-partial_mapping_sample_data.json",
114+
"partial_mapping_synthetic_sample_data.csv"
115+
).withSetting("partial-mapping-synthetic-settings.json"),
111116
new TestDataset("no_mapping_sample_data", "mapping-no_mapping_sample_data.json", "partial_mapping_sample_data.csv").withTypeMapping(
112117
Stream.of("timestamp", "client_ip", "event_duration").collect(toMap(k -> k, k -> "keyword"))
113118
),
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
@timestamp:date,client_ip:ip,event_duration:long,message:keyword,unmapped_message:keyword,unmapped_event_duration:long,unmapped.nested:keyword
2+
2024-10-23T13:55:01.543Z,173.21.3.15,1756466,Connected to 10.1.0.1!,Disconnected from 10.1.0.1,1756468,a
3+
2024-10-23T13:53:55.832Z,173.21.3.15,5033754,Connection error?,Disconnection error,5033756,b
4+
2024-10-23T13:52:55.015Z,173.21.3.15,8268152,Connection error?,Disconnection error,8268154,c
5+
2024-10-23T13:51:54.732Z,173.21.3.15,725447,Connection error?,Disconnection error,725449,d
6+
2024-10-23T13:33:34.937Z,173.21.0.5,1232381,42,43,1232383,e
7+
2024-10-23T12:27:28.948Z,173.21.2.113,2764888,Connected to 10.1.0.2!,Disconnected from 10.1.0.2,2764890,f
8+
2024-10-23T12:15:03.360Z,173.21.2.162,3450232,Connected to 10.1.0.3!,Disconnected from 10.1.0.3,3450234,g
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"index": {
3+
"mapping": {
4+
"source": {
5+
"mode": "synthetic"
6+
}
7+
}
8+
}
9+
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-load.csv-spec

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,23 @@ FROM partial_mapping_no_source_sample_data
195195
2024-10-23T12:15:03.360Z | null
196196
;
197197

198+
unmappedNumericFromSyntheticSourceSingleIndex
199+
required_capability: optional_fields_v2
200+
201+
SET unmapped_fields="load"\;
202+
FROM partial_mapping_synthetic_sample_data
203+
| KEEP @timestamp, unmapped_event_duration
204+
| SORT @timestamp DESC
205+
| LIMIT 4
206+
;
207+
208+
@timestamp:date | unmapped_event_duration:keyword
209+
2024-10-23T13:55:01.543Z | 1756468
210+
2024-10-23T13:53:55.832Z | 5033756
211+
2024-10-23T13:52:55.015Z | 8268154
212+
2024-10-23T13:51:54.732Z | 725449
213+
;
214+
198215
statsByUnmappedFieldExistsInSource
199216
required_capability: optional_fields_v2
200217

@@ -249,6 +266,25 @@ FROM k8s_unmapped
249266
2024-05-10T00:01:25.000Z | qa | null
250267
;
251268

269+
unmappedNumericFromK8sSyntheticTsIndex
270+
required_capability: optional_fields_v2
271+
required_capability: ts_command_v0
272+
273+
SET unmapped_fields="load"\;
274+
TS k8s_unmapped
275+
| KEEP @timestamp, cluster, network.cost
276+
| SORT @timestamp
277+
| LIMIT 5
278+
;
279+
280+
@timestamp:date | cluster:keyword | network.cost:keyword
281+
2024-05-10T00:00:29.000Z | staging | 9.375
282+
2024-05-10T00:00:33.000Z | staging | 1.25
283+
2024-05-10T00:00:51.000Z | prod | 9.25
284+
2024-05-10T00:00:57.000Z | prod | 12.125
285+
2024-05-10T00:01:25.000Z | qa | 5.375
286+
;
287+
252288
statsImplicitLastOverTimeUnmappedTsIndex
253289
required_capability: optional_fields_v2
254290
required_capability: ts_command_v0

x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,23 @@ FROM partial_mapping_excluded_source_sample_data
4949
2024-10-23T12:15:03.360Z | null
5050
;
5151

52+
unmappedNumericFromSyntheticSourceSingleIndex
53+
required_capability: optional_fields_nullify_tech_preview
54+
55+
SET unmapped_fields="nullify"\;
56+
FROM partial_mapping_synthetic_sample_data
57+
| KEEP @timestamp, unmapped_event_duration
58+
| SORT @timestamp DESC
59+
| LIMIT 4
60+
;
61+
62+
@timestamp:date | unmapped_event_duration:null
63+
2024-10-23T13:55:01.543Z | null
64+
2024-10-23T13:53:55.832Z | null
65+
2024-10-23T13:52:55.015Z | null
66+
2024-10-23T13:51:54.732Z | null
67+
;
68+
5269
fieldUnmappedInSourceButSourceDisabledSingleIndex
5370
required_capability: optional_fields_nullify_tech_preview
5471
required_capability: source_field_mapping
@@ -86,6 +103,25 @@ nanos:date_nanos
86103
2023-01-23T13:55:01.543123456Z
87104
;
88105

106+
unmappedNumericFromK8sSyntheticTsIndex
107+
required_capability: optional_fields_nullify_tech_preview
108+
required_capability: ts_command_v0
109+
110+
SET unmapped_fields="nullify"\;
111+
TS k8s_unmapped
112+
| KEEP @timestamp, cluster, network.cost
113+
| SORT @timestamp
114+
| LIMIT 5
115+
;
116+
117+
@timestamp:date | cluster:keyword | network.cost:null
118+
2024-05-10T00:00:29.000Z | staging | null
119+
2024-05-10T00:00:33.000Z | staging | null
120+
2024-05-10T00:00:51.000Z | prod | null
121+
2024-05-10T00:00:57.000Z | prod | null
122+
2024-05-10T00:01:25.000Z | qa | null
123+
;
124+
89125
keepStar
90126
required_capability: optional_fields_nullify_tech_preview
91127

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,8 @@ static MappedFieldType createUnmappedFieldType(String name, DefaultShardContext
325325
new TextSearchInfo(UNMAPPED_FIELD_TYPE, builder.similarity(), Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER),
326326
Lucene.KEYWORD_ANALYZER,
327327
builder,
328-
context.ctx.isSourceSynthetic()
328+
context.ctx.isSourceSynthetic(),
329+
true
329330
);
330331
}
331332
}

0 commit comments

Comments
 (0)