@@ -80,7 +80,7 @@ const char *col_names[] = {
80
80
" 198" , " 199" ,
81
81
};
82
82
#define COLS 200
83
- #define ROWS 5
83
+ #define ROWS 20
84
84
85
85
// We use alignas(8) because Arrow mandates all buffers to be 8-byte aligned
86
86
struct RecordBatchBody {
@@ -300,19 +300,19 @@ void arrow_flatcc_build_schema(flatcc_builder_t *b) {
300
300
org_apache_arrow_flatbuf_Field_nullable_add (b, false );
301
301
org_apache_arrow_flatbuf_Field_type_Int_create (b, 32 , true );
302
302
303
- // org_apache_arrow_flatbuf_Field_children_start(b);
304
303
// // Our columns are super simple (primitives only); no children
304
+ // org_apache_arrow_flatbuf_Field_children_start(b);
305
305
// org_apache_arrow_flatbuf_Field_children_end(b);
306
- // org_apache_arrow_flatbuf_Field_custom_metadata_start(b);
307
306
// // Our fields don't have any custom metadata afaik
307
+ // org_apache_arrow_flatbuf_Field_custom_metadata_start(b);
308
308
// org_apache_arrow_flatbuf_Field_custom_metadata_end(b);
309
309
310
310
org_apache_arrow_flatbuf_Schema_fields_push_end (b);
311
311
}
312
312
313
313
org_apache_arrow_flatbuf_Schema_fields_end (b);
314
- // org_apache_arrow_flatbuf_Schema_custom_metadata_start(b);
315
314
// // we don't have any custom metadata afaik
315
+ // org_apache_arrow_flatbuf_Schema_custom_metadata_start(b);
316
316
// org_apache_arrow_flatbuf_Schema_custom_metadata_end(b);
317
317
org_apache_arrow_flatbuf_Schema_features_start (b);
318
318
org_apache_arrow_flatbuf_Schema_features_end (b);
@@ -330,22 +330,25 @@ void arrow_flatcc_encode_schema_message(flatcc_builder_t *b) {
330
330
org_apache_arrow_flatbuf_Message_end_as_root (b);
331
331
}
332
332
333
- // Each column defined in the schema is represented by one Node struct here,
334
- // giving it's length and null count
335
- org_apache_arrow_flatbuf_FieldNode rb_nodes[COLS] = {};
336
- // Each column has 1-3+ buffers depending on its type; eg. data, validity,
337
- // offsets, etc. For primitives (which we're using exclusively), those are
338
- // validity followed by data. See buffer orders for different types here:
339
- // https://arrow.apache.org/docs/format/Columnar.html#buffer-listing-for-each-layout
340
- org_apache_arrow_flatbuf_Buffer rb_buffers[COLS * 2 ] = {};
341
-
342
333
// Note that the actual data buffers come after this flatbuffer message, in the
343
334
// messageBody. This is contains only the metadata (lenghts, offsets, etc)
344
335
void arrow_flatcc_encode_record_batch_message (flatcc_builder_t *b) {
345
336
org_apache_arrow_flatbuf_Message_start_as_root (b);
346
337
org_apache_arrow_flatbuf_Message_version_add (b, org_apache_arrow_flatbuf_MetadataVersion_V5);
347
338
org_apache_arrow_flatbuf_Message_header_RecordBatch_start (b);
348
339
340
+ // We use heap allocated lists below as mbed-os RTOS threads are heavily
341
+ // stack-limited. These are cleaned up immediately after writing the schema.
342
+
343
+ // Each column defined in the schema is represented by one Node struct here,
344
+ // giving it's length and null count
345
+ org_apache_arrow_flatbuf_FieldNode *rb_nodes = new org_apache_arrow_flatbuf_FieldNode[COLS];
346
+ // Each column has 1-3+ buffers depending on its type; eg. data, validity,
347
+ // offsets, etc. For primitives (which we're using exclusively atm), those
348
+ // are validity followed by data. Buffer orders for different types:
349
+ // https://arrow.apache.org/docs/format/Columnar.html#buffer-listing-for-each-layout
350
+ org_apache_arrow_flatbuf_Buffer *rb_buffers = new org_apache_arrow_flatbuf_Buffer[COLS*2 ];
351
+
349
352
int64_t offset = 0 ;
350
353
int buffer_index = 0 ;
351
354
for (uint i = 0 ; i < COLS; i++) {
@@ -358,7 +361,7 @@ void arrow_flatcc_encode_record_batch_message(flatcc_builder_t *b) {
358
361
359
362
// Since we're omitting validity buffers (enforcing non-null values)
360
363
// entirely for now, we set each validity buffer metadata to say
361
- // length=0 so Arrow decoders know to not look for them
364
+ // length=0 so Arrow decoders know not to look for them
362
365
const uint validity_buf_size = 0 ; // (int)(ceil(ROWS / 8.0f));
363
366
rb_buffers[buffer_index++] = {.offset = offset, .length = 0 };
364
367
if (rb_nodes[i].null_count > 0 ) // NOTE: ATM this will never happen
@@ -378,6 +381,10 @@ void arrow_flatcc_encode_record_batch_message(flatcc_builder_t *b) {
378
381
// metadata that contain the actual data buffers.
379
382
org_apache_arrow_flatbuf_Message_bodyLength_add (b, offset);
380
383
org_apache_arrow_flatbuf_Message_end_as_root (b);
384
+
385
+ // These take a significant amount of space, lets clean them up!
386
+ delete[] rb_nodes;
387
+ delete[] rb_buffers;
381
388
}
382
389
383
390
@@ -410,7 +417,22 @@ void arrow_stream_write_message_to_file(void *flatbuf, size_t flatbuf_size, void
410
417
}
411
418
}
412
419
413
- flatcc_builder_t b;
420
+ // TODO: experiment with a custom emitter that writes directly to a FILE*
421
+ int dbg_emitter (void *emit_context, const flatcc_iovec_t *iov, int iov_count, flatbuffers_soffset_t offset, size_t len) {
422
+ printf (" dbg: emit: iov_count: %d, offset: %d, len: %d\n " , iov_count, offset, len);
423
+
424
+ for (int i = 0 ; i < iov_count; ++i) {
425
+ if (iov[i].iov_base == flatcc_builder_padding_base) {
426
+ printf (" dbg: padding at: %d, len: %d\n " , offset, iov[i].iov_len );
427
+ }
428
+ if (iov[i].iov_base == 0 ) {
429
+ printf (" dbg: null vector reserved at: %d, len: %d\n " , offset, iov[i].iov_len );
430
+ }
431
+ offset += (flatbuffers_soffset_t )iov[i].iov_len ;
432
+ }
433
+ return 0 ;
434
+ }
435
+
414
436
RecordBatchBody values;
415
437
int32_t *cols[] = {
416
438
values.col0 , values.col1 , values.col2 , values.col3 ,
@@ -495,37 +517,48 @@ int main(int argc, char *argv[]) {
495
517
if (file == NULL )
496
518
error_quit (" Error opening file!" );
497
519
498
- flatcc_builder_init (&b);
520
+ flatcc_builder_t b, *B;
521
+ B = &b;
522
+
523
+ flatcc_builder_init (B);
524
+ // flatcc_builder_custom_init(B, dbg_emitter, 0, 0, 0);
499
525
500
526
print_mem_usage ();
501
527
502
- arrow_flatcc_encode_schema_message (&b );
528
+ arrow_flatcc_encode_schema_message (B );
503
529
size_t schema_size;
504
- void *schema = flatcc_builder_finalize_buffer (&b, &schema_size);
530
+ void *schema = flatcc_builder_finalize_buffer (B, &schema_size);
531
+ printf (" schema flatbuf size: %d" , schema_size);
505
532
MBED_ASSERT (schema_size != 0 && schema);
506
533
534
+ free (schema);
535
+
507
536
print_mem_usage ();
508
537
509
- flatcc_builder_reset (&b);
538
+ // A full clear/init cycle seems to free much more memory than just a reset.
539
+ flatcc_builder_clear (B);
540
+ flatcc_builder_init (B);
510
541
511
542
print_mem_usage ();
512
543
513
544
arrow_stream_write_message_to_file (schema, schema_size, nullptr , 0 , file);
514
545
515
546
print_mem_usage ();
516
547
517
- arrow_flatcc_encode_record_batch_message (&b );
548
+ arrow_flatcc_encode_record_batch_message (B );
518
549
519
550
print_mem_usage ();
520
551
521
552
size_t record_batch_flatbuf_size;
522
- // void *record_batch_flatbuf = flatcc_builder_finalize_buffer(&b, &record_batch_flatbuf_size);
523
- void *record_batch_flatbuf = flatcc_builder_get_direct_buffer (&b, &record_batch_flatbuf_size);
553
+ void *record_batch_flatbuf = flatcc_builder_finalize_buffer (B, &record_batch_flatbuf_size);
554
+ // void *record_batch_flatbuf = flatcc_builder_get_direct_buffer(B, &record_batch_flatbuf_size);
555
+
556
+ printf (" record batch flatbuf size: %d" , record_batch_flatbuf_size);
524
557
MBED_ASSERT (record_batch_flatbuf_size != 0 && record_batch_flatbuf);
525
558
526
559
print_mem_usage ();
527
560
528
- flatcc_builder_reset (&b );
561
+ flatcc_builder_clear (B );
529
562
530
563
int val = 0 ;
531
564
for (int i = 0 ; i < COLS; i++) {
0 commit comments