Skip to content

Commit 0946f04

Browse files
authored
fix: handle array/map types in ffi schema example and test (#1497)
## What changes are proposed in this pull request? We had a bug where we used the _output_ size to try and constrain the _input_ size when using `snprintf`. This caused us to read out of bounds on the name string. This fixes that. ## How was this change tested? Added a new test that we can read `nested_types`, which was failing before.
1 parent 59333e5 commit 0946f04

File tree

3 files changed

+124
-4
lines changed

3 files changed

+124
-4
lines changed

ffi/examples/read-table/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ set(DatPath "../../../../acceptance/tests/dat/out/reader_tests/generated")
1616
set(ExpectedPath "../../../tests/read-table-testing/expected-data")
1717
set(KernelTestPath "../../../../kernel/tests/data")
1818
add_test(NAME read_and_print_all_prim COMMAND ${TestRunner} ${DatPath}/all_primitive_types/delta/ ${ExpectedPath}/all-prim-types.expected)
19+
add_test(NAME read_and_print_nested COMMAND ${TestRunner} ${DatPath}/nested_types/delta/ ${ExpectedPath}/nested-types.expected)
1920
add_test(NAME read_and_print_basic_partitioned COMMAND ${TestRunner} ${DatPath}/basic_partitioned/delta/ ${ExpectedPath}/basic-partitioned.expected)
2021
add_test(NAME read_and_print_with_dv_small COMMAND ${TestRunner} ${KernelTestPath}/table-with-dv-small/ ${ExpectedPath}/table-with-dv-small.expected)
2122

ffi/examples/read-table/schema.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,10 @@ void visit_array(
148148
{
149149
SchemaBuilder* builder = data;
150150
char* name_ptr = malloc(sizeof(char) * (name.len + 22));
151-
snprintf(name_ptr, name.len + 1, "%s", name.ptr);
152-
snprintf(name_ptr + name.len, 22, " (is nullable: %s)", is_nullable ? "true" : "false");
151+
// NOTE: we truncate to the max int size because the format specifier "%.*s" requires an int length specifier
152+
int name_chars = name.len > INT_MAX ? INT_MAX : (int)name.len; // handle _REALLY_ long names by truncation
153+
int wrote = snprintf(name_ptr, name.len + 1, "%.*s", name_chars, name.ptr);
154+
snprintf(name_ptr + wrote, 22, " (is nullable: %s)", is_nullable ? "true" : "false");
153155
print_physical_name(name_ptr, metadata);
154156
PRINT_CHILD_VISIT("array", name_ptr, sibling_list_id, "Types", child_list_id);
155157
SchemaItem* array_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "array", is_nullable);
@@ -166,8 +168,10 @@ void visit_map(
166168
{
167169
SchemaBuilder* builder = data;
168170
char* name_ptr = malloc(sizeof(char) * (name.len + 22));
169-
snprintf(name_ptr, name.len + 1, "%s", name.ptr);
170-
snprintf(name_ptr + name.len, 22, " (is nullable: %s)", is_nullable ? "true" : "false");
171+
// NOTE: we truncate to the max int size because the format specifier "%.*s" requires an int length specifier
172+
int name_chars = name.len > INT_MAX ? INT_MAX : (int)name.len; // handle _REALLY_ long names by truncation
173+
int wrote = snprintf(name_ptr, name.len + 1, "%.*s", name_chars, name.ptr);
174+
snprintf(name_ptr + wrote, 22, " (is nullable: %s)", is_nullable ? "true" : "false");
171175
print_physical_name(name_ptr, metadata);
172176
PRINT_CHILD_VISIT("map", name_ptr, sibling_list_id, "Types", child_list_id);
173177
SchemaItem* map_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "map", is_nullable);
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
Reading table at ../../../../acceptance/tests/dat/out/reader_tests/generated/nested_types/delta/
2+
version: 0
3+
4+
Schema:
5+
├─ pk: integer
6+
├─ struct: struct
7+
│ ├─ float64: double
8+
│ └─ bool: boolean
9+
├─ array (is nullable: true): array
10+
│ └─ array_element: short
11+
└─ map (is nullable: true): map
12+
├─ map_key: string
13+
└─ map_value: integer
14+
15+
pk: [
16+
0,
17+
1,
18+
2,
19+
3,
20+
4
21+
]
22+
struct: -- is_valid: all not null
23+
-- child 0 type: double
24+
[
25+
0,
26+
1,
27+
2,
28+
3,
29+
4
30+
]
31+
-- child 1 type: bool
32+
[
33+
true,
34+
false,
35+
true,
36+
false,
37+
true
38+
]
39+
array: [
40+
[
41+
0
42+
],
43+
[
44+
0,
45+
1
46+
],
47+
[
48+
0,
49+
1,
50+
2
51+
],
52+
[
53+
0,
54+
1,
55+
2,
56+
3
57+
],
58+
[
59+
0,
60+
1,
61+
2,
62+
3,
63+
4
64+
]
65+
]
66+
map: [
67+
keys:
68+
[]
69+
values:
70+
[],
71+
keys:
72+
[
73+
"0"
74+
]
75+
values:
76+
[
77+
0
78+
],
79+
keys:
80+
[
81+
"0",
82+
"1"
83+
]
84+
values:
85+
[
86+
0,
87+
1
88+
],
89+
keys:
90+
[
91+
"0",
92+
"1",
93+
"2"
94+
]
95+
values:
96+
[
97+
0,
98+
1,
99+
2
100+
],
101+
keys:
102+
[
103+
"0",
104+
"1",
105+
"2",
106+
"3"
107+
]
108+
values:
109+
[
110+
0,
111+
1,
112+
2,
113+
3
114+
]
115+
]

0 commit comments

Comments
 (0)