@@ -261,14 +261,25 @@ DEVICE auto fill_hash_join_buff_impl(int32_t* buff,
261
261
int32_t start = cpu_thread_idx;
262
262
int32_t step = cpu_thread_count;
263
263
#endif
264
+ // LOG(ERROR) << "fill_hash_join_buff_impl func cur th idx: " << cpu_thread_idx
265
+ // << " count: " << cpu_thread_count
266
+ // << " get th index: " << std::this_thread::get_id()
267
+ // << " chunk buff size: " << join_column.col_chunks_buff_sz
268
+ // << " num elems: " << join_column.num_elems
269
+ // << " num_chunks: " << join_column.num_chunks;
270
+ // INJECT_TIMER(fill_hash_join_buff_impl);
264
271
JoinColumnTyped col{&join_column, &type_info};
265
272
for (auto item : col.slice (start, step)) {
273
+ // LOG(ERROR) << "items: " << item.index;
266
274
const size_t index = item.index ;
267
275
int64_t elem = item.element ;
268
276
if (elem == type_info.null_val ) {
277
+ // LOG(ERROR) << "null val";
269
278
if (type_info.uses_bw_eq ) {
270
279
elem = type_info.translated_null_val ;
271
280
} else {
281
+ // LOG(ERROR) << "initOneToOneHashTableOnCpu(Threaded) cont elem: " << elem
282
+ // << " index: " << index;
272
283
continue ;
273
284
}
274
285
}
@@ -286,7 +297,13 @@ DEVICE auto fill_hash_join_buff_impl(int32_t* buff,
286
297
elem = outer_id;
287
298
}
288
299
#endif
300
+ // char line[1024];
301
+ // snprintf(line, sizeof(line), " entry ptr: %p", buff);
302
+ // LOG(ERROR) << "initOneToOneHashTableOnCpu(Threaded) elem: " << elem
303
+ // << " index: " << index << " invalid_slot_val: " << invalid_slot_val
304
+ // << line;
289
305
if (filling_func (elem, index)) {
306
+ // LOG(ERROR) << "errr";
290
307
return -1 ;
291
308
}
292
309
}
@@ -323,6 +340,138 @@ DEVICE int SUFFIX(fill_hash_join_buff_bucketized)(
323
340
hashtable_filling_func);
324
341
}
325
342
343
+ #ifndef __CUDACC__
344
+ DEVICE int SUFFIX (fill_hash_join_buff_bucketized_cpu)(
345
+ int32_t * cpu_hash_table_buff,
346
+ const int32_t hash_join_invalid_val,
347
+ const bool for_semi_join,
348
+ const JoinColumn& join_column,
349
+ const JoinColumnTypeInfo& type_info,
350
+ const int32_t * sd_inner_to_outer_translation_map,
351
+ const int32_t min_inner_elem,
352
+ const int64_t bucket_normalization) {
353
+ int partial_err = 0 ;
354
+ auto filling_func = for_semi_join ? SUFFIX (fill_hashtable_for_semi_join)
355
+ : SUFFIX (fill_one_to_one_hashtable);
356
+ auto hashtable_filling_func = [&](int64_t elem, size_t index) {
357
+ auto entry_ptr = SUFFIX (get_bucketized_hash_slot)(
358
+ cpu_hash_table_buff, elem, type_info.min_val , bucket_normalization);
359
+ // LOG(ERROR) << "filling index: " << index << " elem: " << elem
360
+ // << " entry_ptr: " << entry_ptr;
361
+ return filling_func (index, entry_ptr, hash_join_invalid_val);
362
+ };
363
+
364
+ // for some stupid reason int8* ptr is actually JoinChunk* Why?
365
+ auto join_chunk_array =
366
+ reinterpret_cast <const struct JoinChunk *>(join_column.col_chunks_buff );
367
+ // BTW it's vector with sz:
368
+ // join_column.num_chunks
369
+ const int8_t * chunk_mem_ptr = join_chunk_array->col_buff ;
370
+ size_t global_elem_index = 0 ;
371
+ // LOG(ERROR) << "fill_hash_join_buff_cpu chunk buff size: "
372
+ // << join_column.col_chunks_buff_sz << " num elems: " <<
373
+ // join_column.num_elems
374
+ // << " num_chunks: " << join_column.num_chunks;
375
+ for (size_t chunk_i = 0 ; chunk_i < join_column.num_chunks ; chunk_i++) {
376
+ // wtf 1 chunk, but 0 elements.
377
+ if (join_column.num_elems == 0 ) {
378
+ break ;
379
+ }
380
+ auto curr_chunk = join_chunk_array[chunk_i];
381
+ for (size_t elem_i = 0 ; elem_i < curr_chunk.num_elems ; elem_i++) {
382
+ chunk_mem_ptr = curr_chunk.col_buff ;
383
+
384
+ // char line[1024];
385
+ // snprintf(line, sizeof(line), " ptr: %p", chunk_mem_ptr);
386
+ // LOG(ERROR) << "initOneToOneHashTableOnCpu " << line
387
+ // << " type: " << type_info.column_type << " index in chunk: " << elem_i
388
+ // << " elem_sz: " << type_info.elem_sz
389
+ // << " invalid_slot_val: " << hash_join_invalid_val;
390
+
391
+ int64_t elem = 0 ;
392
+ switch (type_info.column_type ) {
393
+ case SmallDate: {
394
+ // LOG(ERROR) << "smallDate";
395
+ elem = fixed_width_small_date_decode_noinline (
396
+ chunk_mem_ptr,
397
+ type_info.elem_sz ,
398
+ type_info.elem_sz == 4 ? NULL_INT : NULL_SMALLINT,
399
+ type_info.elem_sz == 4 ? NULL_INT : NULL_SMALLINT,
400
+ elem_i);
401
+ break ;
402
+ }
403
+ case Signed: {
404
+ // char line[1024];
405
+ // snprintf(line, sizeof(line), " ptr: %p", chunk_mem_ptr);
406
+ // LOG(ERROR) << "Should call fixed_width_int_decode_noinline: " << line
407
+ // << " elem_i: " << elem_i << " without func: ";
408
+
409
+ // LOG(ERROR) << "int32_cast: "
410
+ // << *(reinterpret_cast<const int32_t*>(
411
+ // &chunk_mem_ptr[elem_i * type_info.elem_sz]));
412
+ elem =
413
+ fixed_width_int_decode_noinline (chunk_mem_ptr, type_info.elem_sz , elem_i);
414
+ break ;
415
+ }
416
+ case Unsigned: {
417
+ // LOG(ERROR) << "unsigned";
418
+ elem = fixed_width_unsigned_decode_noinline (
419
+ chunk_mem_ptr, type_info.elem_sz , elem_i);
420
+ break ;
421
+ }
422
+ case Double: {
423
+ // LOG(ERROR) << "double";
424
+ elem = fixed_width_double_decode_noinline (chunk_mem_ptr, elem_i);
425
+ break ;
426
+ }
427
+ default : {
428
+ // LOG(ERROR) << "default";
429
+ assert (0 );
430
+ }
431
+ }
432
+
433
+ if (elem == type_info.null_val ) {
434
+ // LOG(ERROR) << "null elem";
435
+ if (type_info.uses_bw_eq ) {
436
+ elem = type_info.translated_null_val ;
437
+ } else {
438
+ // LOG(ERROR) << "cont: elem_i - " << elem_i << " chunk_i - " << chunk_i;
439
+ break ;
440
+ }
441
+ }
442
+ if (sd_inner_to_outer_translation_map &&
443
+ (!type_info.uses_bw_eq || elem != type_info.translated_null_val )) {
444
+ const auto outer_id = map_str_id_to_outer_dict (elem,
445
+ min_inner_elem,
446
+ type_info.min_val ,
447
+ type_info.max_val ,
448
+ sd_inner_to_outer_translation_map);
449
+ if (outer_id == StringDictionary::INVALID_STR_ID) {
450
+ break ;
451
+ }
452
+ elem = outer_id;
453
+ }
454
+
455
+ // LOG(ERROR) << "initOneToOneHashTableOnCpu elem: " << elem
456
+ // << " index: " << global_elem_index << " chunk idx: " << chunk_i
457
+ // << " el_i: " << elem_i;
458
+
459
+ if (hashtable_filling_func (elem, global_elem_index)) {
460
+ partial_err = -1 ;
461
+ }
462
+
463
+ global_elem_index++;
464
+ if (partial_err != 0 ) {
465
+ // LOG(ERROR) << "error here! " << partial_err;
466
+ return partial_err;
467
+ }
468
+ partial_err = 0 ;
469
+ }
470
+ }
471
+ return 0 ;
472
+ }
473
+ #endif
474
+
326
475
DEVICE int SUFFIX (fill_hash_join_buff)(int32_t * buff,
327
476
const int32_t invalid_slot_val,
328
477
const bool for_semi_join,
0 commit comments