Skip to content

Commit 63c2ebc

Browse files
committed
schema_force_view_types to false(try true after df49)
1 parent 9b2fbbb commit 63c2ebc

File tree

9 files changed

+76
-112
lines changed

9 files changed

+76
-112
lines changed

datafusion/common/src/config.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ config_namespace! {
457457

458458
/// (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`,
459459
/// and `Binary/BinaryLarge` with `BinaryView`.
460-
pub schema_force_view_types: bool, default = true
460+
pub schema_force_view_types: bool, default = false
461461

462462
/// (reading) If true, parquet reader will read columns of
463463
/// `Binary/LargeBinary` with `Utf8`, and `BinaryView` with `Utf8View`.

datafusion/sqllogictest/test_files/describe.slt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ int_col Int32 YES
8181
bigint_col Int64 YES
8282
float_col Float32 YES
8383
double_col Float64 YES
84-
date_string_col Utf8View YES
85-
string_col Utf8View YES
84+
date_string_col Utf8 YES
85+
string_col Utf8 YES
8686
timestamp_col Timestamp(Nanosecond, None) YES
8787
year Int32 YES
8888
month Int32 YES

datafusion/sqllogictest/test_files/explain.slt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,8 @@ initial_physical_plan
297297
01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
298298
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
299299
initial_physical_plan_with_schema
300-
01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
301-
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
300+
01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
301+
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
302302
physical_plan after OutputRequirements
303303
01)OutputRequirementExec, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
304304
02)--GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
@@ -321,7 +321,7 @@ physical_plan after LimitPushdown DataSourceExec: file_groups={1 group: [[WORKSP
321321
physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
322322
physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
323323
physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
324-
physical_plan_with_schema DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
324+
physical_plan_with_schema DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
325325

326326

327327
statement ok
@@ -338,8 +338,8 @@ initial_physical_plan_with_stats
338338
01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
339339
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
340340
initial_physical_plan_with_schema
341-
01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
342-
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
341+
01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
342+
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
343343
physical_plan after OutputRequirements
344344
01)OutputRequirementExec
345345
02)--GlobalLimitExec: skip=0, fetch=10
@@ -363,7 +363,7 @@ physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
363363
physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
364364
physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet
365365
physical_plan_with_stats DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
366-
physical_plan_with_schema DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
366+
physical_plan_with_schema DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
367367

368368

369369
statement ok

datafusion/sqllogictest/test_files/explain_tree.slt

Lines changed: 36 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,42 +1185,24 @@ physical_plan
11851185
21)│ -------------------- │
11861186
22)│ on: │
11871187
23)│ (int_col = int_col), ├──────────────┐
1188-
24)│ (string_col = CAST │ │
1189-
25)│ (table1.string_col AS │ │
1190-
26)│ Utf8View)) │ │
1191-
27)└─────────────┬─────────────┘ │
1192-
28)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
1193-
29)│ DataSourceExec ││ ProjectionExec │
1194-
30)│ -------------------- ││ -------------------- │
1195-
31)│ files: 1 ││ CAST(table1.string_col AS │
1196-
32)│ format: parquet ││ Utf8View): │
1197-
33)│ ││ CAST(string_col AS │
1198-
34)│ ││ Utf8View) │
1199-
35)│ ││ │
1200-
36)│ ││ bigint_col: │
1201-
37)│ ││ bigint_col │
1202-
38)│ ││ │
1203-
39)│ ││ date_col: date_col │
1204-
40)│ ││ int_col: int_col │
1205-
41)│ ││ │
1206-
42)│ ││ string_col: │
1207-
43)│ ││ string_col │
1208-
44)└───────────────────────────┘└─────────────┬─────────────┘
1209-
45)-----------------------------┌─────────────┴─────────────┐
1210-
46)-----------------------------│ RepartitionExec │
1211-
47)-----------------------------│ -------------------- │
1212-
48)-----------------------------│ partition_count(in->out): │
1213-
49)-----------------------------│ 1 -> 4 │
1214-
50)-----------------------------│ │
1215-
51)-----------------------------│ partitioning_scheme: │
1216-
52)-----------------------------│ RoundRobinBatch(4) │
1217-
53)-----------------------------└─────────────┬─────────────┘
1218-
54)-----------------------------┌─────────────┴─────────────┐
1219-
55)-----------------------------│ DataSourceExec │
1220-
56)-----------------------------│ -------------------- │
1221-
57)-----------------------------│ files: 1 │
1222-
58)-----------------------------│ format: csv │
1223-
59)-----------------------------└───────────────────────────┘
1188+
24)│ (string_col = │ │
1189+
25)│ string_col) │ │
1190+
26)└─────────────┬─────────────┘ │
1191+
27)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
1192+
28)│ DataSourceExec ││ RepartitionExec │
1193+
29)│ -------------------- ││ -------------------- │
1194+
30)│ files: 1 ││ partition_count(in->out): │
1195+
31)│ format: parquet ││ 1 -> 4 │
1196+
32)│ ││ │
1197+
33)│ ││ partitioning_scheme: │
1198+
34)│ ││ RoundRobinBatch(4) │
1199+
35)└───────────────────────────┘└─────────────┬─────────────┘
1200+
36)-----------------------------┌─────────────┴─────────────┐
1201+
37)-----------------------------│ DataSourceExec │
1202+
38)-----------------------------│ -------------------- │
1203+
39)-----------------------------│ files: 1 │
1204+
40)-----------------------------│ format: csv │
1205+
41)-----------------------------└───────────────────────────┘
12241206

12251207
# Query with outer hash join.
12261208
query TT
@@ -1252,42 +1234,24 @@ physical_plan
12521234
23)│ │
12531235
24)│ on: ├──────────────┐
12541236
25)│ (int_col = int_col), │ │
1255-
26)│ (string_col = CAST │ │
1256-
27)│ (table1.string_col AS │ │
1257-
28)│ Utf8View)) │ │
1258-
29)└─────────────┬─────────────┘ │
1259-
30)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
1260-
31)│ DataSourceExec ││ ProjectionExec │
1261-
32)│ -------------------- ││ -------------------- │
1262-
33)│ files: 1 ││ CAST(table1.string_col AS │
1263-
34)│ format: parquet ││ Utf8View): │
1264-
35)│ ││ CAST(string_col AS │
1265-
36)│ ││ Utf8View) │
1266-
37)│ ││ │
1267-
38)│ ││ bigint_col: │
1268-
39)│ ││ bigint_col │
1269-
40)│ ││ │
1270-
41)│ ││ date_col: date_col │
1271-
42)│ ││ int_col: int_col │
1272-
43)│ ││ │
1273-
44)│ ││ string_col: │
1274-
45)│ ││ string_col │
1275-
46)└───────────────────────────┘└─────────────┬─────────────┘
1276-
47)-----------------------------┌─────────────┴─────────────┐
1277-
48)-----------------------------│ RepartitionExec │
1278-
49)-----------------------------│ -------------------- │
1279-
50)-----------------------------│ partition_count(in->out): │
1280-
51)-----------------------------│ 1 -> 4 │
1281-
52)-----------------------------│ │
1282-
53)-----------------------------│ partitioning_scheme: │
1283-
54)-----------------------------│ RoundRobinBatch(4) │
1284-
55)-----------------------------└─────────────┬─────────────┘
1285-
56)-----------------------------┌─────────────┴─────────────┐
1286-
57)-----------------------------│ DataSourceExec │
1287-
58)-----------------------------│ -------------------- │
1288-
59)-----------------------------│ files: 1 │
1289-
60)-----------------------------│ format: csv │
1290-
61)-----------------------------└───────────────────────────┘
1237+
26)│ (string_col = │ │
1238+
27)│ string_col) │ │
1239+
28)└─────────────┬─────────────┘ │
1240+
29)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
1241+
30)│ DataSourceExec ││ RepartitionExec │
1242+
31)│ -------------------- ││ -------------------- │
1243+
32)│ files: 1 ││ partition_count(in->out): │
1244+
33)│ format: parquet ││ 1 -> 4 │
1245+
34)│ ││ │
1246+
35)│ ││ partitioning_scheme: │
1247+
36)│ ││ RoundRobinBatch(4) │
1248+
37)└───────────────────────────┘└─────────────┬─────────────┘
1249+
38)-----------------------------┌─────────────┴─────────────┐
1250+
39)-----------------------------│ DataSourceExec │
1251+
40)-----------------------------│ -------------------- │
1252+
41)-----------------------------│ files: 1 │
1253+
42)-----------------------------│ format: csv │
1254+
43)-----------------------------└───────────────────────────┘
12911255

12921256
# Query with nested loop join.
12931257
query TT

0 commit comments

Comments
 (0)