27
27
import java .util .List ;
28
28
import java .util .Locale ;
29
29
import java .util .Map ;
30
- import java .util .Optional ;
30
+ import java .util .Objects ;
31
31
import java .util .Set ;
32
32
import java .util .function .Function ;
33
33
@@ -145,7 +145,7 @@ public enum DataType {
145
145
* Fields of this type are unsupported by any functions and are always
146
146
* rendered as {@code null} in the response.
147
147
*/
148
- UNSUPPORTED (builder ().typeName ("UNSUPPORTED" ).unknownSize ( )),
148
+ UNSUPPORTED (builder ().typeName ("UNSUPPORTED" ).estimatedSize ( 1024 )),
149
149
/**
150
150
* Fields that are always {@code null}, usually created with constant
151
151
* {@code null} values.
@@ -238,15 +238,15 @@ public enum DataType {
238
238
* Generally ESQL uses {@code keyword} fields as raw strings. So things like
239
239
* {@code TO_STRING} will make a {@code keyword} field.
240
240
*/
241
- KEYWORD (builder ().esType ("keyword" ).unknownSize ( ).docValues ()),
241
+ KEYWORD (builder ().esType ("keyword" ).estimatedSize ( 50 ).docValues ()),
242
242
/**
243
243
* String fields that are analyzed when the document is received and may be
244
244
* cut into more than one token. Generally ESQL only sees {@code text} fields
245
245
* when loaded from the index and ESQL will load these fields
246
246
* <strong>without</strong> analysis. The {@code MATCH} operator can be used
247
247
* to query these fields with analysis.
248
248
*/
249
- TEXT (builder ().esType ("text" ).unknownSize ( )),
249
+ TEXT (builder ().esType ("text" ).estimatedSize ( 1024 )),
250
250
/**
251
251
* Millisecond precision date, stored as a 64-bit signed number.
252
252
*/
@@ -267,8 +267,8 @@ public enum DataType {
267
267
*/
268
268
// 8.15.2-SNAPSHOT is 15 bytes, most are shorter, some can be longer
269
269
VERSION (builder ().esType ("version" ).estimatedSize (15 ).docValues ()),
270
- OBJECT (builder ().esType ("object" ).unknownSize ( )),
271
- SOURCE (builder ().esType (SourceFieldMapper .NAME ).unknownSize ( )),
270
+ OBJECT (builder ().esType ("object" ).estimatedSize ( 1024 )),
271
+ SOURCE (builder ().esType (SourceFieldMapper .NAME ).estimatedSize ( 10 * 1024 )),
272
272
DATE_PERIOD (builder ().typeName ("DATE_PERIOD" ).estimatedSize (3 * Integer .BYTES )),
273
273
TIME_DURATION (builder ().typeName ("TIME_DURATION" ).estimatedSize (Integer .BYTES + Long .BYTES )),
274
274
// WKB for points is typically 21 bytes.
@@ -298,20 +298,20 @@ public enum DataType {
298
298
* Every document in {@link IndexMode#TIME_SERIES} index will have a single value
299
299
* for this field and the segments themselves are sorted on this value.
300
300
*/
301
- TSID_DATA_TYPE (builder ().esType ("_tsid" ).unknownSize ( ).docValues ()),
301
+ TSID_DATA_TYPE (builder ().esType ("_tsid" ).estimatedSize ( Long . BYTES * 2 ).docValues ()),
302
302
/**
303
303
* Fields with this type are the partial result of running a non-time-series aggregation
304
304
* inside alongside time-series aggregations. These fields are not parsable from the
305
305
* mapping and should be hidden from users.
306
306
*/
307
- PARTIAL_AGG (builder ().esType ("partial_agg" ).unknownSize ( )),
307
+ PARTIAL_AGG (builder ().esType ("partial_agg" ).estimatedSize ( 1024 )),
308
308
309
309
AGGREGATE_METRIC_DOUBLE (builder ().esType ("aggregate_metric_double" ).estimatedSize (Double .BYTES * 3 + Integer .BYTES )),
310
310
311
311
/**
312
312
* Fields with this type are dense vectors, represented as an array of double values.
313
313
*/
314
- DENSE_VECTOR (builder ().esType ("dense_vector" ).unknownSize ( ));
314
+ DENSE_VECTOR (builder ().esType ("dense_vector" ).estimatedSize ( 4096 ));
315
315
316
316
/**
317
317
* Types that are actively being built. These types are
@@ -341,7 +341,7 @@ public enum DataType {
341
341
342
342
private final String esType ;
343
343
344
- private final Optional < Integer > estimatedSize ;
344
+ private final int estimatedSize ;
345
345
346
346
/**
347
347
* True if the type represents a "whole number", as in, does <strong>not</strong> have a decimal part.
@@ -377,11 +377,10 @@ public enum DataType {
377
377
378
378
DataType (Builder builder ) {
379
379
String typeString = builder .typeName != null ? builder .typeName : builder .esType ;
380
- assert builder .estimatedSize != null : "Missing size for type " + typeString ;
381
380
this .typeName = typeString .toLowerCase (Locale .ROOT );
382
381
this .name = typeString .toUpperCase (Locale .ROOT );
383
382
this .esType = builder .esType ;
384
- this .estimatedSize = builder .estimatedSize ;
383
+ this .estimatedSize = Objects . requireNonNull ( builder .estimatedSize , "estimated size is required" ) ;
385
384
this .isWholeNumber = builder .isWholeNumber ;
386
385
this .isRationalNumber = builder .isRationalNumber ;
387
386
this .docValues = builder .docValues ;
@@ -683,10 +682,21 @@ public boolean isNumeric() {
683
682
}
684
683
685
684
/**
686
- * @return the estimated size, in bytes, of this data type. If there's no reasonable way to estimate the size,
687
- * the optional will be empty.
685
+ * An estimate of the size of values of this type in a Block. All types must have an
686
+ * estimate, and generally follow the following rules:
687
+ * <ol>
688
+ * <li>
689
+ * If you know the precise size of a single element of this type, use that.
690
+ * For example {@link #INTEGER} uses {@link Integer#BYTES}.
691
+ * </li>
692
+ * <li>
693
+ * Overestimates are better than under-estimates. Over-estimates make less
694
+ * efficient operations, but under-estimates make circuit breaker errors.
695
+ * </li>
696
+ * </ol>
697
+ * @return the estimated size of this data type in bytes
688
698
*/
689
- public Optional < Integer > estimatedSize () {
699
+ public int estimatedSize () {
690
700
return estimatedSize ;
691
701
}
692
702
@@ -801,7 +811,7 @@ private static class Builder {
801
811
802
812
private String typeName ;
803
813
804
- private Optional < Integer > estimatedSize ;
814
+ private Integer estimatedSize ;
805
815
806
816
/**
807
817
* True if the type represents a "whole number", as in, does <strong>not</strong> have a decimal part.
@@ -848,13 +858,11 @@ Builder typeName(String typeName) {
848
858
return this ;
849
859
}
850
860
861
+ /**
862
+ * See {@link DataType#estimatedSize}.
863
+ */
851
864
Builder estimatedSize (int size ) {
852
- this .estimatedSize = Optional .of (size );
853
- return this ;
854
- }
855
-
856
- Builder unknownSize () {
857
- this .estimatedSize = Optional .empty ();
865
+ this .estimatedSize = size ;
858
866
return this ;
859
867
}
860
868
0 commit comments