Skip to content

Commit da18b38

Browse files
committed
Incremental improvements to parquet metadata
1 parent 384bedd commit da18b38

File tree

1 file changed

+24
-22
lines changed

1 file changed

+24
-22
lines changed

src/main/thrift/parquet.thrift

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -242,43 +242,42 @@ struct SizeStatistics {
242242
* All fields are optional.
243243
*/
244244
struct Statistics {
245-
/**
246-
* DEPRECATED: min and max value of the column. Use min_value and max_value.
247-
*
248-
* Values are encoded using PLAIN encoding, except that variable-length byte
249-
* arrays do not include a length prefix.
250-
*
251-
* These fields encode min and max values determined by signed comparison
252-
* only. New files should use the correct order for a column's logical type
253-
* and store the values in the min_value and max_value fields.
254-
*
255-
* To support older readers, these may be set when the column order is
256-
* signed.
257-
*/
245+
/* DEPRECATED: do not use */
258246
1: optional binary max;
259247
2: optional binary min;
260248
/** count of null value in the column */
261249
3: optional i64 null_count;
262250
/** count of distinct values occurring */
263251
4: optional i64 distinct_count;
264252
/**
265-
* Lower and upper bound values for the column, determined by its ColumnOrder.
253+
* Only one pair of max_value/min_value, max1/min1, max2/min2, max4/min4,
254+
* max8/min8 can be set. The pair is determined by the physical type of the
255+
* column. Floating point values are bitcasted to integers. Variable length
256+
* values are set in min_value/max_value.
257+
*
258+
* Min and Max are the lower and upper bound values for the column,
259+
* respectively, as determined by its ColumnOrder.
266260
*
267261
* These may be the actual minimum and maximum values found on a page or column
268262
* chunk, but can also be (more compact) values that do not exist on a page or
269263
* column chunk. For example, instead of storing "Blart Versenwald III", a writer
270264
* may set min_value="B", max_value="C". Such more compact values must still be
271265
* valid values within the column's logical type.
272-
*
273-
* Values are encoded using PLAIN encoding, except that variable-length byte
274-
* arrays do not include a length prefix.
275266
*/
276267
5: optional binary max_value;
277268
6: optional binary min_value;
278269
/** If true, max_value is the actual maximum value for a column */
279270
7: optional bool is_max_value_exact;
280271
/** If true, min_value is the actual minimum value for a column */
281272
8: optional bool is_min_value_exact;
273+
9: optional byte max1;
274+
10: optional byte min1;
275+
11: optional i16 max2;
276+
12: optional i16 min2;
277+
13: optional i32 max4;
278+
14: optional i32 min4;
279+
15: optional i64 max8;
280+
16: optional i64 min8;
282281
}
283282

284283
/** Empty structs to use as logical type annotations */
@@ -490,7 +489,7 @@ enum Encoding {
490489
// GROUP_VAR_INT = 1;
491490

492491
/**
493-
* Deprecated: Dictionary encoding. The values in the dictionary are encoded in the
492+
* DEPRECATED: Dictionary encoding. The values in the dictionary are encoded in the
494493
* plain type.
495494
* in a data page use RLE_DICTIONARY instead.
496495
* in a Dictionary page use PLAIN instead
@@ -772,15 +771,15 @@ struct PageEncodingStats {
772771
* Description for column metadata
773772
*/
774773
struct ColumnMetaData {
775-
/** Type of this column **/
776-
1: required Type type
774+
/* DEPRECATED: can be found in SchemaElement */
775+
1: optional Type type
777776

778777
/** Set of all encodings used for this column. The purpose is to validate
779778
* whether we can decode those pages. **/
780779
2: required list<Encoding> encodings
781780

782-
/** Path in schema **/
783-
3: required list<string> path_in_schema
781+
/* DEPRECATED: can be found in SchemaElement */
782+
3: optional list<string> path_in_schema
784783

785784
/** Compression codec **/
786785
4: required CompressionCodec codec
@@ -833,6 +832,9 @@ struct ColumnMetaData {
833832
* filter pushdown.
834833
*/
835834
16: optional SizeStatistics size_statistics;
835+
836+
/* The index into FileMetadata.schema (list<SchemaElement>) for this column */
837+
17: optional i32 schema_index;
836838
}
837839

838840
struct EncryptionWithFooterKey {

0 commit comments

Comments
 (0)