Skip to content

Commit e20b41b

Browse files
vtjnashIanButterworth
authored andcommitted
add array element mutex offset in print and gc (#58997)
The layout, printing, and gc logic need to correctly offset and align the inset fields to account for the per-element mutex of an atomic array with large elements. Fix #58993 (cherry picked from commit c5ba4e2)
1 parent 6134d9a commit e20b41b

File tree

8 files changed

+90
-53
lines changed

8 files changed

+90
-53
lines changed

base/runtime_internals.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,10 @@ struct DataTypeLayout
528528
# fielddesc_type : 2;
529529
# arrayelem_isboxed : 1;
530530
# arrayelem_isunion : 1;
531+
# arrayelem_isatomic : 1;
532+
# arrayelem_islocked : 1;
533+
# isbitsegal : 1;
534+
# padding : 8;
531535
end
532536

533537
"""
@@ -600,7 +604,7 @@ function datatype_isbitsegal(dt::DataType)
600604
@_foldable_meta
601605
dt.layout == C_NULL && throw(UndefRefError())
602606
flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
603-
return (flags & (1<<5)) != 0
607+
return (flags & (1<<7)) != 0
604608
end
605609

606610
"""

src/cgutils.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3257,7 +3257,7 @@ static Value *emit_genericmemoryelsize(jl_codectx_t &ctx, Value *v, jl_value_t *
32573257
if (jl_is_genericmemoryref_type(sty))
32583258
sty = (jl_datatype_t*)jl_field_type_concrete(sty, 1);
32593259
size_t sz = sty->layout->size;
3260-
if (sty->layout->flags.arrayelem_isunion)
3260+
if (sty->layout->flags.arrayelem_isunion && add_isunion)
32613261
sz++;
32623262
auto elsize = ConstantInt::get(ctx.types().T_size, sz);
32633263
return elsize;
@@ -4711,9 +4711,10 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
47114711
setName(ctx.emission_context, ovflw, "memoryref_ovflw");
47124712
}
47134713
#endif
4714-
Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jl_tparam1(ref.typ));
4715-
newdata = ctx.builder.CreateGEP(elty, data, offset);
4716-
setName(ctx.emission_context, newdata, "memoryref_data_offset");
4714+
boffset = ctx.builder.CreateMul(offset, elsz);
4715+
setName(ctx.emission_context, boffset, "memoryref_byteoffset");
4716+
newdata = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), data, boffset);
4717+
setName(ctx.emission_context, newdata, "memoryref_data_byteoffset");
47174718
(void)boffset; // LLVM is very bad at handling GEP with types different from the load
47184719
if (bc) {
47194720
BasicBlock *failBB, *endBB;

src/codegen.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3869,8 +3869,8 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
38693869
const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
38703870
bool isboxed = layout->flags.arrayelem_isboxed;
38713871
bool isunion = layout->flags.arrayelem_isunion;
3872-
bool isatomic = kind == (jl_value_t*)jl_atomic_sym;
3873-
bool needlock = isatomic && layout->size > MAX_ATOMIC_SIZE;
3872+
bool isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked;
3873+
bool needlock = layout->flags.arrayelem_islocked;
38743874
size_t elsz = layout->size;
38753875
size_t al = layout->alignment;
38763876
if (al > JL_HEAP_ALIGNMENT)
@@ -4229,7 +4229,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
42294229
size_t al = layout->alignment;
42304230
if (al > JL_HEAP_ALIGNMENT)
42314231
al = JL_HEAP_ALIGNMENT;
4232-
bool needlock = isatomic && !isboxed && elsz > MAX_ATOMIC_SIZE;
4232+
bool needlock = layout->flags.arrayelem_islocked;
42334233
AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
42344234
? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic)
42354235
: get_llvm_atomic_order(order);
@@ -4315,7 +4315,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
43154315
*ret = jl_cgval_t(); // unreachable
43164316
return true;
43174317
}
4318-
bool isatomic = kind == (jl_value_t*)jl_atomic_sym;
4318+
const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
4319+
bool isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked;
43194320
if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
43204321
emit_atomic_error(ctx, "memoryref_isassigned: non-atomic memory cannot be accessed atomically");
43214322
*ret = jl_cgval_t(); // unreachable
@@ -4331,13 +4332,12 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
43314332
}
43324333
jl_value_t *boundscheck = argv[3].constant;
43334334
emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, fname);
4334-
const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
43354335
Value *mem = emit_memoryref_mem(ctx, ref, layout);
43364336
Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
43374337
Value *oob = bounds_check_enabled(ctx, boundscheck) ? ctx.builder.CreateIsNull(mlen) : nullptr;
43384338
bool isboxed = layout->flags.arrayelem_isboxed;
43394339
if (isboxed || layout->first_ptr >= 0) {
4340-
bool needlock = isatomic && !isboxed && layout->size > MAX_ATOMIC_SIZE;
4340+
bool needlock = layout->flags.arrayelem_islocked;
43414341
AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
43424342
? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic)
43434343
: get_llvm_atomic_order(order);
@@ -4357,13 +4357,12 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
43574357
ctx.builder.SetInsertPoint(passBB);
43584358
}
43594359
Value *elem = emit_memoryref_ptr(ctx, ref, layout);
4360-
if (needlock) {
4360+
if (!isboxed)
4361+
elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*));
4362+
else if (needlock)
43614363
// n.b. no actual lock acquire needed, as the check itself only needs to load a single pointer and check for null
43624364
// elem += sizeof(lock);
43634365
elem = emit_ptrgep(ctx, elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
4364-
}
4365-
if (!isboxed)
4366-
elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*));
43674366
// emit this using the same type as BUILTIN(memoryrefget)
43684367
// so that LLVM may be able to load-load forward them and fold the result
43694368
auto tbaa = isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf;

src/datatype.c

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,10 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
239239
flddesc->flags.haspadding = haspadding;
240240
flddesc->flags.isbitsegal = isbitsegal;
241241
flddesc->flags.fielddesc_type = fielddesc_type;
242-
flddesc->flags.arrayelem_isboxed = arrayelem == 1;
243-
flddesc->flags.arrayelem_isunion = arrayelem == 2;
242+
flddesc->flags.arrayelem_isboxed = (arrayelem & 1) != 0;
243+
flddesc->flags.arrayelem_isunion = (arrayelem & 2) != 0;
244+
flddesc->flags.arrayelem_isatomic = (arrayelem & 4) != 0;
245+
flddesc->flags.arrayelem_islocked = (arrayelem & 8) != 0;
244246
flddesc->flags.padding = 0;
245247
flddesc->npointers = npointers;
246248
flddesc->first_ptr = first_ptr;
@@ -537,6 +539,7 @@ void jl_get_genericmemory_layout(jl_datatype_t *st)
537539
uint32_t *pointers = &first_ptr;
538540
int needlock = 0;
539541

542+
const jl_datatype_layout_t *el_layout = NULL;
540543
if (isunboxed) {
541544
elsz = LLT_ALIGN(elsz, al);
542545
if (kind == (jl_value_t*)jl_atomic_sym) {
@@ -551,12 +554,12 @@ void jl_get_genericmemory_layout(jl_datatype_t *st)
551554
else {
552555
assert(jl_is_datatype(eltype));
553556
zi = ((jl_datatype_t*)eltype)->zeroinit;
554-
const jl_datatype_layout_t *layout = ((jl_datatype_t*)eltype)->layout;
555-
if (layout->first_ptr >= 0) {
556-
first_ptr = layout->first_ptr;
557-
npointers = layout->npointers;
558-
if (layout->flags.fielddesc_type == 2) {
559-
pointers = (uint32_t*)jl_dt_layout_ptrs(layout);
557+
el_layout = ((jl_datatype_t*)eltype)->layout;
558+
if (el_layout->first_ptr >= 0) {
559+
first_ptr = el_layout->first_ptr;
560+
npointers = el_layout->npointers;
561+
if (el_layout->flags.fielddesc_type == 2 && !needlock) {
562+
pointers = (uint32_t*)jl_dt_layout_ptrs(el_layout);
560563
}
561564
else {
562565
pointers = (uint32_t*)alloca(npointers * sizeof(uint32_t));
@@ -568,10 +571,22 @@ void jl_get_genericmemory_layout(jl_datatype_t *st)
568571
}
569572
if (needlock) {
570573
assert(al <= JL_SMALL_BYTE_ALIGNMENT);
571-
size_t offset = LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT);
572-
elsz += offset;
574+
size_t lock_offset = LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT);
575+
elsz += lock_offset;
576+
if (al < sizeof(void*)) {
577+
al = sizeof(void*);
578+
elsz = LLT_ALIGN(elsz, al);
579+
}
573580
haspadding = 1;
574581
zi = 1;
582+
// Adjust pointer offsets to account for the lock at the beginning
583+
if (first_ptr != -1) {
584+
uint32_t lock_offset_words = lock_offset / sizeof(void*);
585+
first_ptr += lock_offset_words;
586+
for (int j = 0; j < npointers; j++) {
587+
pointers[j] += lock_offset_words;
588+
}
589+
}
575590
}
576591
}
577592
else {
@@ -580,13 +595,17 @@ void jl_get_genericmemory_layout(jl_datatype_t *st)
580595
zi = 1;
581596
}
582597

583-
int arrayelem;
598+
// arrayelem is a bitfield: 1=isboxed, 2=isunion, 4=isatomic, 8=islocked
599+
int arrayelem = 0;
584600
if (!isunboxed)
585-
arrayelem = 1;
586-
else if (isunion)
587-
arrayelem = 2;
588-
else
589-
arrayelem = 0;
601+
arrayelem |= 1; // arrayelem_isboxed
602+
if (isunion)
603+
arrayelem |= 2; // arrayelem_isunion
604+
if (kind == (jl_value_t*)jl_atomic_sym) {
605+
arrayelem |= 4; // arrayelem_isatomic
606+
if (needlock)
607+
arrayelem |= 8; // arrayelem_islocked
608+
}
590609
assert(!st->layout);
591610
st->layout = jl_get_layout(elsz, nfields, npointers, al, haspadding, isbitsegal, arrayelem, NULL, pointers);
592611
st->zeroinit = zi;
@@ -647,17 +666,17 @@ void jl_compute_field_offsets(jl_datatype_t *st)
647666
// if we have no fields, we can trivially skip the rest
648667
if (st == jl_symbol_type || st == jl_string_type) {
649668
// opaque layout - heap-allocated blob
650-
static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
669+
static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, { .isbitsegal=1 }};
651670
st->layout = &opaque_byte_layout;
652671
return;
653672
}
654673
else if (st == jl_simplevector_type || st == jl_module_type) {
655-
static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
674+
static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), { .isbitsegal=1 }};
656675
st->layout = &opaque_ptr_layout;
657676
return;
658677
}
659678
else {
660-
static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
679+
static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, { .isbitsegal=1 }};
661680
st->layout = &singleton_layout;
662681
}
663682
}
@@ -1001,6 +1020,8 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
10011020
layout->flags.padding = 0;
10021021
layout->flags.arrayelem_isboxed = 0;
10031022
layout->flags.arrayelem_isunion = 0;
1023+
layout->flags.arrayelem_isatomic = 0;
1024+
layout->flags.arrayelem_islocked = 0;
10041025
jl_fielddescdyn_t * desc =
10051026
(jl_fielddescdyn_t *) ((char *)layout + sizeof(*layout));
10061027
desc->markfunc = markfunc;

src/genericmemory.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,8 @@ JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destda
302302

303303
JL_DLLEXPORT jl_value_t *jl_genericmemoryref(jl_genericmemory_t *mem, size_t i)
304304
{
305-
int isatomic = (jl_tparam0(jl_typetagof(mem)) == (jl_value_t*)jl_atomic_sym);
306305
const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(mem))->layout;
306+
int isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked;
307307
jl_genericmemoryref_t m;
308308
m.mem = mem;
309309
m.ptr_or_offset = (layout->flags.arrayelem_isunion || layout->size == 0) ? (void*)i : (void*)((char*)mem->ptr + layout->size * i);
@@ -379,8 +379,8 @@ static jl_value_t *jl_ptrmemrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT, i
379379

380380
JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m, int isatomic)
381381
{
382-
assert(isatomic == (jl_tparam0(jl_typetagof(m.mem)) == (jl_value_t*)jl_atomic_sym));
383382
const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
383+
assert(isatomic == (layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked));
384384
if (layout->flags.arrayelem_isboxed)
385385
return jl_ptrmemrefget(m, isatomic);
386386
jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
@@ -402,7 +402,7 @@ JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m, int isatomic)
402402
assert(data - (char*)m.mem->ptr < layout->size * m.mem->length);
403403
jl_value_t *r;
404404
size_t fsz = jl_datatype_size(eltype);
405-
int needlock = isatomic && fsz > MAX_ATOMIC_SIZE;
405+
int needlock = layout->flags.arrayelem_islocked;
406406
if (isatomic && !needlock) {
407407
r = jl_atomic_new_bits(eltype, data);
408408
}
@@ -430,9 +430,6 @@ static int _jl_memoryref_isassigned(jl_genericmemoryref_t m, int isatomic)
430430
if (layout->flags.arrayelem_isboxed) {
431431
}
432432
else if (layout->first_ptr >= 0) {
433-
int needlock = isatomic && layout->size > MAX_ATOMIC_SIZE;
434-
if (needlock)
435-
elem = elem + LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT) / sizeof(jl_value_t*);
436433
elem = &elem[layout->first_ptr];
437434
}
438435
else {
@@ -448,15 +445,15 @@ JL_DLLEXPORT jl_value_t *jl_memoryref_isassigned(jl_genericmemoryref_t m, int is
448445

449446
JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, int isatomic)
450447
{
451-
assert(isatomic == (jl_tparam0(jl_typetagof(m.mem)) == (jl_value_t*)jl_atomic_sym));
448+
const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
449+
assert(isatomic == (layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked));
452450
jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
453451
if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
454452
JL_GC_PUSH1(&rhs);
455453
if (!jl_isa(rhs, eltype))
456454
jl_type_error("memoryrefset!", eltype, rhs);
457455
JL_GC_POP();
458456
}
459-
const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
460457
if (layout->flags.arrayelem_isboxed) {
461458
assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
462459
if (isatomic)
@@ -486,7 +483,7 @@ JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, j
486483
}
487484
if (layout->size != 0) {
488485
assert(data - (char*)m.mem->ptr < layout->size * m.mem->length);
489-
int needlock = isatomic && layout->size > MAX_ATOMIC_SIZE;
486+
int needlock = layout->flags.arrayelem_islocked;
490487
size_t fsz = jl_datatype_size((jl_datatype_t*)jl_typeof(rhs)); // need to shrink-wrap the final copy
491488
if (isatomic && !needlock) {
492489
jl_atomic_store_bits(data, rhs, fsz);

src/julia.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -583,10 +583,12 @@ typedef struct {
583583
// metadata bit only for GenericMemory eltype layout
584584
uint16_t arrayelem_isboxed : 1;
585585
uint16_t arrayelem_isunion : 1;
586+
uint16_t arrayelem_isatomic : 1;
587+
uint16_t arrayelem_islocked : 1;
586588
// If set, this type's egality can be determined entirely by comparing
587589
// the non-padding bits of this datatype.
588590
uint16_t isbitsegal : 1;
589-
uint16_t padding : 10;
591+
uint16_t padding : 8;
590592
} flags;
591593
// union {
592594
// jl_fielddesc8_t field8[nfields];
@@ -1689,6 +1691,8 @@ static inline int jl_field_isconst(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
16891691
#define jl_is_addrspacecore(v) jl_typetagis(v,jl_addrspacecore_type)
16901692
#define jl_is_abioverride(v) jl_typetagis(v,jl_abioverride_type)
16911693
#define jl_genericmemory_isbitsunion(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_isunion)
1694+
#define jl_genericmemory_isatomic(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_isatomic)
1695+
#define jl_genericmemory_islocked(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_islocked)
16921696
#define jl_is_array_any(v) jl_typetagis(v,jl_array_any_type)
16931697

16941698
JL_DLLEXPORT int jl_subtype(jl_value_t *a, jl_value_t *b);

src/rtutils.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,11 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
12941294
}
12951295
else {
12961296
char *ptr = ((char*)m->ptr) + j * layout->size;
1297+
if (layout->flags.arrayelem_islocked) {
1298+
// Skip the lock at the beginning for locked arrays
1299+
size_t lock_size = sizeof(jl_mutex_t);
1300+
ptr += lock_size;
1301+
}
12971302
n += jl_static_show_x_(out, (jl_value_t*)ptr,
12981303
(jl_datatype_t*)(typetagdata ? jl_nth_union_component(el_type, typetagdata[j]) : el_type),
12991304
depth, ctx);

0 commit comments

Comments
 (0)