Skip to content

Commit 2fff4d1

Browse files
lidavidmzeroshade
authored andcommitted
GH-36669: [Go] Guard against garbage in C Data structures (#36670)
### Rationale for this change Prevent hard to debug crashes when using Go code with other code via C Data Interface. ### What changes are included in this PR? In the C Stream Interface implementation, jump through a trampoline that zeroes the out parameters before letting Go see them. Note that this can only guard against the issue when the C Stream Interface is used. Also, fix other issues in the C Data Interface tests with invalid pointers and uninitialized memory that were turned up by the new test here (because it calls `runtime.GC` very frequently). ### Are these changes tested? Yes ### Are there any user-facing changes? No **This PR contains a "Critical Fix".** * Closes: #36669 Lead-authored-by: David Li <[email protected]> Co-authored-by: Matt Topol <[email protected]> Signed-off-by: David Li <[email protected]>
1 parent 137e50d commit 2fff4d1

File tree

7 files changed

+156
-19
lines changed

7 files changed

+156
-19
lines changed

go/arrow/cdata/cdata_exports.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ func (exp *schemaExporter) export(field arrow.Field) {
283283

284284
func allocateArrowSchemaArr(n int) (out []CArrowSchema) {
285285
s := (*reflect.SliceHeader)(unsafe.Pointer(&out))
286-
s.Data = uintptr(C.malloc(C.sizeof_struct_ArrowSchema * C.size_t(n)))
286+
s.Data = uintptr(C.calloc(C.size_t(n), C.sizeof_struct_ArrowSchema))
287287
s.Len = n
288288
s.Cap = n
289289

@@ -292,7 +292,7 @@ func allocateArrowSchemaArr(n int) (out []CArrowSchema) {
292292

293293
func allocateArrowSchemaPtrArr(n int) (out []*CArrowSchema) {
294294
s := (*reflect.SliceHeader)(unsafe.Pointer(&out))
295-
s.Data = uintptr(C.malloc(C.size_t(unsafe.Sizeof((*CArrowSchema)(nil))) * C.size_t(n)))
295+
s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*CArrowSchema)(nil)))))
296296
s.Len = n
297297
s.Cap = n
298298

@@ -301,7 +301,7 @@ func allocateArrowSchemaPtrArr(n int) (out []*CArrowSchema) {
301301

302302
func allocateArrowArrayArr(n int) (out []CArrowArray) {
303303
s := (*reflect.SliceHeader)(unsafe.Pointer(&out))
304-
s.Data = uintptr(C.malloc(C.sizeof_struct_ArrowArray * C.size_t(n)))
304+
s.Data = uintptr(C.calloc(C.size_t(n), C.sizeof_struct_ArrowArray))
305305
s.Len = n
306306
s.Cap = n
307307

@@ -310,7 +310,7 @@ func allocateArrowArrayArr(n int) (out []CArrowArray) {
310310

311311
func allocateArrowArrayPtrArr(n int) (out []*CArrowArray) {
312312
s := (*reflect.SliceHeader)(unsafe.Pointer(&out))
313-
s.Data = uintptr(C.malloc(C.size_t(unsafe.Sizeof((*CArrowArray)(nil))) * C.size_t(n)))
313+
s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*CArrowArray)(nil)))))
314314
s.Len = n
315315
s.Cap = n
316316

@@ -319,7 +319,7 @@ func allocateArrowArrayPtrArr(n int) (out []*CArrowArray) {
319319

320320
func allocateBufferPtrArr(n int) (out []*C.void) {
321321
s := (*reflect.SliceHeader)(unsafe.Pointer(&out))
322-
s.Data = uintptr(C.malloc(C.size_t(unsafe.Sizeof((*C.void)(nil))) * C.size_t(n)))
322+
s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*C.void)(nil)))))
323323
s.Len = n
324324
s.Cap = n
325325

go/arrow/cdata/cdata_fulltest.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out)
404404
int test_exported_stream(struct ArrowArrayStream* stream) {
405405
while (1) {
406406
struct ArrowArray array;
407+
memset(&array, 0, sizeof(array));
407408
// Garbage - implementation should not try to call it, though!
408409
array.release = (void*)0xDEADBEEF;
409410
int rc = stream->get_next(stream, &array);
@@ -447,3 +448,35 @@ void test_stream_schema_fallible(struct ArrowArrayStream* stream) {
447448
stream->private_data = &kFallibleStream;
448449
stream->release = FallibleRelease;
449450
}
451+
452+
int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed) {
453+
struct ArrowSchema schema;
454+
// Try to confuse the Go GC by putting what looks like a Go pointer here.
455+
#ifdef _WIN32
456+
// Thread-safe on Windows with the multithread CRT
457+
#define DORAND rand()
458+
#else
459+
#define DORAND rand_r(&seed)
460+
#endif
461+
schema.name = (char*)(0xc000000000L + (DORAND % 0x2000));
462+
schema.format = (char*)(0xc000000000L + (DORAND % 0x2000));
463+
int rc = stream->get_schema(stream, &schema);
464+
if (rc != 0) return rc;
465+
schema.release(&schema);
466+
467+
while (1) {
468+
struct ArrowArray array;
469+
array.release = (void*)(0xc000000000L + (DORAND % 0x2000));
470+
array.private_data = (void*)(0xc000000000L + (DORAND % 0x2000));
471+
int rc = stream->get_next(stream, &array);
472+
if (rc != 0) return rc;
473+
474+
if (array.release == NULL) {
475+
stream->release(stream);
476+
break;
477+
}
478+
array.release(&array);
479+
}
480+
return 0;
481+
#undef DORAND
482+
}

go/arrow/cdata/cdata_test.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"io"
3030
"runtime"
3131
"runtime/cgo"
32+
"sync"
3233
"testing"
3334
"time"
3435
"unsafe"
@@ -940,3 +941,28 @@ func TestRecordReaderImportError(t *testing.T) {
940941
}
941942
assert.Contains(t, err.Error(), "Expected error message")
942943
}
944+
945+
func TestConfuseGoGc(t *testing.T) {
946+
// Regression test for https://github.com/apache/arrow-adbc/issues/729
947+
reclist := arrdata.Records["primitives"]
948+
949+
var wg sync.WaitGroup
950+
concurrency := 32
951+
wg.Add(concurrency)
952+
953+
// XXX: this test is a bit expensive
954+
for i := 0; i < concurrency; i++ {
955+
go func() {
956+
for i := 0; i < 256; i++ {
957+
rdr, err := array.NewRecordReader(reclist[0].Schema(), reclist)
958+
assert.NoError(t, err)
959+
runtime.GC()
960+
assert.NoError(t, confuseGoGc(rdr))
961+
runtime.GC()
962+
}
963+
wg.Done()
964+
}()
965+
}
966+
967+
wg.Wait()
968+
}

go/arrow/cdata/cdata_test_framework.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,16 @@ package cdata
2121

2222
// #include <stdlib.h>
2323
// #include <stdint.h>
24+
// #include <string.h>
2425
// #include "arrow/c/abi.h"
2526
// #include "arrow/c/helpers.h"
2627
//
2728
// void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out);
28-
// struct ArrowArray* get_test_arr() { return (struct ArrowArray*)(malloc(sizeof(struct ArrowArray))); }
29+
// struct ArrowArray* get_test_arr() {
30+
// struct ArrowArray* array = (struct ArrowArray*)malloc(sizeof(struct ArrowArray));
31+
// memset(array, 0, sizeof(*array));
32+
// return array;
33+
// }
2934
// struct ArrowArrayStream* get_test_stream() {
3035
// struct ArrowArrayStream* out = (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream));
3136
// memset(out, 0, sizeof(struct ArrowArrayStream));
@@ -56,11 +61,13 @@ package cdata
5661
// struct ArrowSchema** test_union(const char** fmts, const char** names, int64_t* flags, const int n);
5762
// int test_exported_stream(struct ArrowArrayStream* stream);
5863
// void test_stream_schema_fallible(struct ArrowArrayStream* stream);
64+
// int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed);
5965
import "C"
6066
import (
6167
"errors"
6268
"fmt"
6369
"io"
70+
"math/rand"
6471
"unsafe"
6572

6673
"github.com/apache/arrow/go/v13/arrow"
@@ -271,15 +278,17 @@ func createCArr(arr arrow.Array) *CArrowArray {
271278
carr.null_count = C.int64_t(arr.NullN())
272279
carr.offset = C.int64_t(arr.Data().Offset())
273280
buffers := arr.Data().Buffers()
274-
cbuf := []unsafe.Pointer{}
275-
for _, b := range buffers {
281+
cbufs := allocateBufferPtrArr(len(buffers))
282+
for i, b := range buffers {
276283
if b != nil {
277-
cbuf = append(cbuf, C.CBytes(b.Bytes()))
284+
cbufs[i] = (*C.void)(C.CBytes(b.Bytes()))
285+
} else {
286+
cbufs[i] = nil
278287
}
279288
}
280-
carr.n_buffers = C.int64_t(len(cbuf))
281-
if len(cbuf) > 0 {
282-
carr.buffers = &cbuf[0]
289+
carr.n_buffers = C.int64_t(len(cbufs))
290+
if len(cbufs) > 0 {
291+
carr.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cbufs[0]))
283292
}
284293
carr.release = (*[0]byte)(C.release_test_arr)
285294

@@ -350,3 +359,14 @@ func fallibleSchemaTest() error {
350359
}
351360
return nil
352361
}
362+
363+
func confuseGoGc(reader array.RecordReader) error {
364+
out := C.get_test_stream()
365+
ExportRecordReader(reader, out)
366+
rc := C.confuse_go_gc(out, C.uint(rand.Int()))
367+
C.free(unsafe.Pointer(out))
368+
if rc == 0 {
369+
return nil
370+
}
371+
return fmt.Errorf("Exported stream test failed with return code %d", int(rc))
372+
}

go/arrow/cdata/exports.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,14 @@ import (
2828
// #include <stdlib.h>
2929
// #include "arrow/c/helpers.h"
3030
//
31-
// typedef const char cchar_t;
32-
// extern int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*);
33-
// extern int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*);
34-
// extern const char* streamGetError(struct ArrowArrayStream*);
35-
// extern void streamRelease(struct ArrowArrayStream*);
31+
// typedef const char cchar_t;
32+
// extern int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*);
33+
// extern int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*);
34+
// extern const char* streamGetError(struct ArrowArrayStream*);
35+
// extern void streamRelease(struct ArrowArrayStream*);
36+
// // XXX(https://github.com/apache/arrow-adbc/issues/729)
37+
// int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out);
38+
// int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out);
3639
//
3740
import "C"
3841

@@ -154,10 +157,11 @@ func streamRelease(handle *CArrowArrayStream) {
154157
}
155158

156159
func exportStream(rdr array.RecordReader, out *CArrowArrayStream) {
157-
out.get_schema = (*[0]byte)(C.streamGetSchema)
158-
out.get_next = (*[0]byte)(C.streamGetNext)
160+
out.get_schema = (*[0]byte)(C.streamGetSchemaTrampoline)
161+
out.get_next = (*[0]byte)(C.streamGetNextTrampoline)
159162
out.get_last_error = (*[0]byte)(C.streamGetError)
160163
out.release = (*[0]byte)(C.streamRelease)
164+
rdr.Retain()
161165
h := cgo.NewHandle(cRecordReader{rdr: rdr, err: nil})
162166
out.private_data = createHandle(h)
163167
}

go/arrow/cdata/interface.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,11 @@ func ImportCRecordReader(stream *CArrowArrayStream, schema *arrow.Schema) (arrio
198198
// the populating of the struct. Any memory allocated will be allocated using malloc
199199
// which means that it is invisible to the Go Garbage Collector and must be freed manually
200200
// using the callback on the CArrowSchema object.
201+
//
202+
// WARNING: the output ArrowSchema MUST BE ZERO INITIALIZED, or the Go garbage collector
203+
// may error at runtime, due to CGO rules ("the current implementation may sometimes
204+
// cause a runtime error if the contents of the C memory appear to be a Go pointer").
205+
// You have been warned!
201206
func ExportArrowSchema(schema *arrow.Schema, out *CArrowSchema) {
202207
dummy := arrow.Field{Type: arrow.StructOf(schema.Fields()...), Metadata: schema.Metadata()}
203208
exportField(dummy, out)
@@ -220,6 +225,11 @@ func ExportArrowSchema(schema *arrow.Schema, out *CArrowSchema) {
220225
// The release function on the populated CArrowArray will properly decrease the reference counts,
221226
// and release the memory if the record has already been released. But since this must be explicitly
222227
// done, make sure it is released so that you do not create a memory leak.
228+
//
229+
// WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector
230+
// may error at runtime, due to CGO rules ("the current implementation may sometimes
231+
// cause a runtime error if the contents of the C memory appear to be a Go pointer").
232+
// You have been warned!
223233
func ExportArrowRecordBatch(rb arrow.Record, out *CArrowArray, outSchema *CArrowSchema) {
224234
children := make([]arrow.ArrayData, rb.NumCols())
225235
for i := range rb.Columns() {
@@ -243,6 +253,11 @@ func ExportArrowRecordBatch(rb arrow.Record, out *CArrowArray, outSchema *CArrow
243253
// being used by the arrow.Array passed in, in order to share with zero-copy across the C
244254
// Data Interface. See the documentation for ExportArrowRecordBatch for details on how to ensure
245255
// you do not leak memory and prevent unwanted, undefined or strange behaviors.
256+
//
257+
// WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector
258+
// may error at runtime, due to CGO rules ("the current implementation may sometimes
259+
// cause a runtime error if the contents of the C memory appear to be a Go pointer").
260+
// You have been warned!
246261
func ExportArrowArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) {
247262
exportArray(arr, out, outSchema)
248263
}
@@ -252,6 +267,11 @@ func ExportArrowArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema
252267
// CArrowArrayStream takes ownership of the RecordReader until the consumer calls the release
253268
// callback, as such it is unnecesary to call Release on the passed in reader unless it has
254269
// previously been retained.
270+
//
271+
// WARNING: the output ArrowArrayStream MUST BE ZERO INITIALIZED, or the Go garbage
272+
// collector may error at runtime, due to CGO rules ("the current implementation may
273+
// sometimes cause a runtime error if the contents of the C memory appear to be a Go
274+
// pointer"). You have been warned!
255275
func ExportRecordReader(reader array.RecordReader, out *CArrowArrayStream) {
256276
exportStream(reader, out)
257277
}

go/arrow/cdata/trampoline.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
#include <string.h>
18+
19+
#include "arrow/c/abi.h"
20+
21+
int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*);
22+
int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*);
23+
24+
int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out) {
25+
// XXX(https://github.com/apache/arrow-adbc/issues/729)
26+
memset(out, 0, sizeof(*out));
27+
return streamGetSchema(stream, out);
28+
}
29+
30+
int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out) {
31+
// XXX(https://github.com/apache/arrow-adbc/issues/729)
32+
memset(out, 0, sizeof(*out));
33+
return streamGetNext(stream, out);
34+
}

0 commit comments

Comments
 (0)