@@ -242,43 +242,36 @@ struct SizeStatistics {
242242 * All fields are optional.
243243 */
244244struct Statistics {
245- /**
246- * DEPRECATED: min and max value of the column. Use min_value and max_value.
247- *
248- * Values are encoded using PLAIN encoding, except that variable-length byte
249- * arrays do not include a length prefix.
250- *
251- * These fields encode min and max values determined by signed comparison
252- * only. New files should use the correct order for a column's logical type
253- * and store the values in the min_value and max_value fields.
254- *
255- * To support older readers, these may be set when the column order is
256- * signed.
257- */
245+ /* DEPRECATED: do not use */
258246 1: optional binary max ;
259247 2: optional binary min ;
260248 /** count of null value in the column */
261249 3: optional i64 null_count ;
262250 /** count of distinct values occurring */
263251 4: optional i64 distinct_count ;
264252 /**
265- * Lower and upper bound values for the column, determined by its ColumnOrder.
253+ * Only one pair of max_value/min_value, max1/min1, max2/min2, max4/min4,
254+ * max8/min8 can be set. The pair is determined by the physical type of the
255+ * column. Floating point values are bitcasted to integers. Variable length
256+ * values are set in min_value/max_value.
257+ *
258+ * Min and Max are the lower and upper bound values for the column,
259+ * respectively, as determined by its ColumnOrder.
266260 *
267261 * These may be the actual minimum and maximum values found on a page or column
268262 * chunk, but can also be (more compact) values that do not exist on a page or
269263 * column chunk. For example, instead of storing "Blart Versenwald III", a writer
270264 * may set min_value="B", max_value="C". Such more compact values must still be
271265 * valid values within the column's logical type.
272- *
273- * Values are encoded using PLAIN encoding, except that variable-length byte
274- * arrays do not include a length prefix.
275266 */
276267 5: optional binary max_value ;
277268 6: optional binary min_value ;
278269 /** If true, max_value is the actual maximum value for a column */
279270 7: optional bool is_max_value_exact ;
280271 /** If true, min_value is the actual minimum value for a column */
281272 8: optional bool is_min_value_exact ;
273+ 9: optional i64 max8 ;
274+ 10: optional i64 min8 ;
282275}
283276
284277/** Empty structs to use as logical type annotations */
@@ -810,9 +803,13 @@ struct ColumnMetaData {
810803 /** optional statistics for this column chunk */
811804 12: optional Statistics statistics ;
812805
813- /** Set of all encodings used for pages in this column chunk.
806+ /**
807+ * DEPRECATED: use is_fully_dict_encoded instead
808+ *
809+ * Set of all encodings used for pages in this column chunk.
814810 * This information can be used to determine if all data pages are
815- * dictionary encoded for example **/
811+ * dictionary encoded for example
812+ */
816813 13: optional list<PageEncodingStats> encoding_stats ;
817814
818815 /** Byte offset from beginning of file to Bloom filter data. **/
@@ -833,6 +830,9 @@ struct ColumnMetaData {
833830 * filter pushdown.
834831 */
835832 16: optional SizeStatistics size_statistics ;
833+
834+ /** If true, all data pages are dictionary encoded **/
835+ 17: optional bool is_fully_dict_encoded ;
836836}
837837
838838struct EncryptionWithFooterKey {
0 commit comments