4747 * {@link DataType#OBJECT})
4848 *
4949 * <h2>Process for adding a new data type</h2>
50+ * We assume that the data type is already supported in ES indices, but not in
51+ * ES|QL. Types that aren't yet enabled in ES will require some adjustments to
52+ * the process.
53+ * <p>
5054 * Note: it is not expected that all the following steps be done in a single PR.
5155 * Use capabilities to gate tests as you go, and use as many PRs as you think
5256 * appropriate. New data types are complex, and smaller PRs will make reviews
5357 * easier.
5458 * <ul>
5559 * <li>
5660 * Create a new data type and mark it as under construction using
57- * {@link Builder#underConstruction()}. This makes the type available on
61+ * {@link Builder#underConstruction(TransportVersion )}. This makes the type available on
5862 * SNAPSHOT builds, only, prevents some tests from running and prevents documentation
5963 * for the new type to be built.</li>
6064 * <li>
6165 * New tests using the type will require a new {@code EsqlCapabilities} entry,
6266 * otherwise bwc tests will fail (even in SNAPSHOT builds) because old nodes don't
63- * know about the new type.</li>
67+ * know about the new type. This capability needs to be SNAPSHOT-only as long as
68+ * the type is under construction</li>
6469 * <li>
6570 * Create a new CSV test file for the new type. You'll either need to
6671 * create a new data file as well, or add values of the new type to
115120 * EsqlDataTypeConverter#commonType, individual function type checking, the
116121 * verifier rules, or other places. We suggest starting with CSV tests and
117122 * seeing where they fail.</li>
123+ * <li>
124+ * Ensure the new type doesn't break {@code FROM idx | KEEP *} queries by
125+ * updating AllSupportedFieldsTestCase. Make sure to run this test in bwc
126+ * configurations in release mode.
127+ * </li>
118128 * </ul>
119- * There are some additional steps that should be taken when removing the
120- * feature flag and getting ready for a release:
129+ * There are some additional steps that should be taken when getting ready for a release:
121130 * <ul>
122131 * <li>
123132 * Ensure the capabilities for this type are always enabled.</li>
124133 * <li>
125134 * Mark the type with a new transport version via
126- * {@link Builder#supportedSince(TransportVersion)}. This will enable the type on
127- * non-SNAPSHOT builds as long as all nodes in the cluster (and remote clusters)
128- * support it.</li>
135+ * {@link Builder#supportedSince(TransportVersion, TransportVersion)}.
136+ * This will enable the type on non-SNAPSHOT builds as long as all nodes in the cluster
137+ * (and remote clusters) support it.
138+ * Use the under-construction transport version for the {@code createdVersion} here so that
139+ * existing tests continue to run.
140+ * </li>
129141 * <li>
130142 * Fix new test failures related to declared function types.</li>
131143 * <li>
144+ * Update the expectations in AllSupportedFieldsTestCase and make sure it
145+ * passes in release builds.</li>
146+ * <li>
132147 * Make sure to run the full test suite locally via gradle to generate
133148 * the function type tables and helper files with the new type. Ensure all
134149 * the functions that support the type have appropriate docs for it.</li>
@@ -299,12 +314,23 @@ public enum DataType implements Writeable {
299314 // mixed/multi clusters with remotes that don't support these types. This is low-ish risk because these types require specific
300315 // geo functions to turn up in the query, and those types aren't available before 9.2.0 either.
301316 GEOHASH (
302- builder ().esType ("geohash" ).typeName ("GEOHASH" ).estimatedSize (Long .BYTES ).supportedSince (DataTypesTransportVersions .INDEX_SOURCE )
317+ builder ().esType ("geohash" )
318+ .typeName ("GEOHASH" )
319+ .estimatedSize (Long .BYTES )
320+ .supportedSince (DataTypesTransportVersions .INDEX_SOURCE , DataTypesTransportVersions .INDEX_SOURCE )
303321 ),
304322 GEOTILE (
305- builder ().esType ("geotile" ).typeName ("GEOTILE" ).estimatedSize (Long .BYTES ).supportedSince (DataTypesTransportVersions .INDEX_SOURCE )
323+ builder ().esType ("geotile" )
324+ .typeName ("GEOTILE" )
325+ .estimatedSize (Long .BYTES )
326+ .supportedSince (DataTypesTransportVersions .INDEX_SOURCE , DataTypesTransportVersions .INDEX_SOURCE )
327+ ),
328+ GEOHEX (
329+ builder ().esType ("geohex" )
330+ .typeName ("GEOHEX" )
331+ .estimatedSize (Long .BYTES )
332+ .supportedSince (DataTypesTransportVersions .INDEX_SOURCE , DataTypesTransportVersions .INDEX_SOURCE )
306333 ),
307- GEOHEX (builder ().esType ("geohex" ).typeName ("GEOHEX" ).estimatedSize (Long .BYTES ).supportedSince (DataTypesTransportVersions .INDEX_SOURCE )),
308334
309335 /**
310336 * Fields with this type represent a Lucene doc id. This field is a bit magic in that:
@@ -328,7 +354,10 @@ public enum DataType implements Writeable {
328354 // mixed/multi clusters with remotes that don't support these types. This is low-ish risk because _tsid requires specifically being
329355 // used in `FROM idx METADATA _tsid` or in the `TS` command, which both weren't available before 9.2.0.
330356 TSID_DATA_TYPE (
331- builder ().esType ("_tsid" ).estimatedSize (Long .BYTES * 2 ).docValues ().supportedSince (DataTypesTransportVersions .INDEX_SOURCE )
357+ builder ().esType ("_tsid" )
358+ .estimatedSize (Long .BYTES * 2 )
359+ .docValues ()
360+ .supportedSince (DataTypesTransportVersions .INDEX_SOURCE , DataTypesTransportVersions .INDEX_SOURCE )
332361 ),
333362 /**
334363 * Fields with this type are the partial result of running a non-time-series aggregation
@@ -339,14 +368,17 @@ public enum DataType implements Writeable {
339368 AGGREGATE_METRIC_DOUBLE (
340369 builder ().esType ("aggregate_metric_double" )
341370 .estimatedSize (Double .BYTES * 3 + Integer .BYTES )
342- .supportedSince (DataTypesTransportVersions .ESQL_AGGREGATE_METRIC_DOUBLE_CREATED_VERSION )
371+ .supportedSince (
372+ DataTypesTransportVersions .ESQL_AGGREGATE_METRIC_DOUBLE_CREATED_VERSION ,
373+ DataTypesTransportVersions .ESQL_AGGREGATE_METRIC_DOUBLE_CREATED_VERSION
374+ )
343375 ),
344376
345377 EXPONENTIAL_HISTOGRAM (
346378 builder ().esType ("exponential_histogram" )
347379 .estimatedSize (16 * 160 )// guess 160 buckets (OTEL default for positive values only histograms) with 16 bytes per bucket
348380 .docValues ()
349- .underConstruction ()
381+ .underConstruction (DataTypesTransportVersions . RESOLVE_FIELDS_RESPONSE_USED_TV )
350382 ),
351383
352384 /*
@@ -363,11 +395,16 @@ public enum DataType implements Writeable {
363395 * Fields with this type are dense vectors, represented as an array of float values.
364396 */
365397 DENSE_VECTOR (
366- builder ().esType ("dense_vector" ).estimatedSize (4096 ).supportedSince (DataTypesTransportVersions .ESQL_DENSE_VECTOR_CREATED_VERSION )
398+ builder ().esType ("dense_vector" )
399+ .estimatedSize (4096 )
400+ .supportedSince (
401+ DataTypesTransportVersions .ESQL_DENSE_VECTOR_CREATED_VERSION ,
402+ DataTypesTransportVersions .ESQL_DENSE_VECTOR_CREATED_VERSION
403+ )
367404 );
368405
369406 public static final Set <DataType > UNDER_CONSTRUCTION = Arrays .stream (DataType .values ())
370- .filter (t -> t .supportedVersion () == SupportedVersion . UNDER_CONSTRUCTION )
407+ .filter (t -> t .supportedVersion (). underConstruction () )
371408 .collect (Collectors .toSet ());
372409
373410 private final String typeName ;
@@ -970,13 +1007,15 @@ Builder counter(DataType counter) {
9701007 }
9711008
9721009 /**
973- * The version from when on a {@link DataType} is supported. When a query tries to use a data type
974- * not supported on any of the nodes it runs on, it is invalid .
1010+ * Marks a type that is supported in production since {@code supportedVersion}.
1011+ * When a query tries to use a data type not supported on the nodes it runs on, this is a bug .
9751012 * <p>
976- * Generally, we should add a dedicated transport version when a type is enabled on release builds.
1013+ * On snapshot builds, the {@code createdVersion} is used instead, so that existing tests continue
1014+ * to work after release if the type was previously {@link #underConstruction(TransportVersion)};
1015+ * the under-construction version should be used as the {@code createdVersion}.
9771016 */
978- Builder supportedSince (TransportVersion supportedVersion ) {
979- this .supportedVersion = SupportedVersion .supportedSince (supportedVersion );
1017+ Builder supportedSince (TransportVersion createdVersion , TransportVersion supportedVersion ) {
1018+ this .supportedVersion = SupportedVersion .supportedSince (createdVersion , supportedVersion );
9801019 return this ;
9811020 }
9821021
@@ -985,8 +1024,12 @@ Builder supportedOnAllNodes() {
9851024 return this ;
9861025 }
9871026
988- Builder underConstruction () {
989- this .supportedVersion = SupportedVersion .UNDER_CONSTRUCTION ;
1027+ /**
1028+ * Marks a type that is not supported in production yet, but is supported in snapshot builds
1029+ * starting with the given version.
1030+ */
1031+ Builder underConstruction (TransportVersion createdVersion ) {
1032+ this .supportedVersion = SupportedVersion .underConstruction (createdVersion );
9901033 return this ;
9911034 }
9921035 }
@@ -1007,5 +1050,12 @@ public static class DataTypesTransportVersions {
10071050 public static final TransportVersion ESQL_AGGREGATE_METRIC_DOUBLE_CREATED_VERSION = TransportVersion .fromName (
10081051 "esql_aggregate_metric_double_created_version"
10091052 );
1053+
1054+ /**
1055+ * First transport version after the PR that introduced the exponential histogram data type.
1056+ */
1057+ public static final TransportVersion RESOLVE_FIELDS_RESPONSE_USED_TV = TransportVersion .fromName (
1058+ "esql_resolve_fields_response_used"
1059+ );
10101060 }
10111061}
0 commit comments