Skip to content

Commit c5ba4e2

Browse files
authored
add array element mutex offset in print and gc (#58997)
The layout, printing, and gc logic need to correctly offset and align the inset fields to account for the per-element mutex of an atomic array with large elements. Fix #58993
1 parent 44dd95a commit c5ba4e2

File tree

8 files changed

+92
-55
lines changed

8 files changed

+92
-55
lines changed

base/runtime_internals.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,10 @@ struct DataTypeLayout
565565
# fielddesc_type : 2;
566566
# arrayelem_isboxed : 1;
567567
# arrayelem_isunion : 1;
568+
# arrayelem_isatomic : 1;
569+
# arrayelem_islocked : 1;
570+
# isbitsegal : 1;
571+
# padding : 8;
568572
end
569573

570574
"""
@@ -637,7 +641,7 @@ function datatype_isbitsegal(dt::DataType)
637641
@_foldable_meta
638642
dt.layout == C_NULL && throw(UndefRefError())
639643
flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
640-
return (flags & (1<<5)) != 0
644+
return (flags & (1<<7)) != 0
641645
end
642646

643647
"""

src/cgutils.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3299,7 +3299,7 @@ static Value *emit_genericmemoryelsize(jl_codectx_t &ctx, Value *v, jl_value_t *
32993299
if (jl_is_genericmemoryref_type(sty))
33003300
sty = (jl_datatype_t*)jl_field_type_concrete(sty, 1);
33013301
size_t sz = sty->layout->size;
3302-
if (sty->layout->flags.arrayelem_isunion)
3302+
if (sty->layout->flags.arrayelem_isunion && add_isunion)
33033303
sz++;
33043304
auto elsize = ConstantInt::get(ctx.types().T_size, sz);
33053305
return elsize;
@@ -4716,8 +4716,8 @@ static jl_cgval_t emit_memoryref_direct(jl_codectx_t &ctx, const jl_cgval_t &mem
47164716

47174717
} else {
47184718
data = emit_genericmemoryptr(ctx, boxmem, layout, 0);
4719-
Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jl_tparam1(typ));
4720-
data = ctx.builder.CreateInBoundsGEP(elty, data, idx0);
4719+
idx0 = ctx.builder.CreateMul(idx0, emit_genericmemoryelsize(ctx, boxmem, mem.typ, false), "", true, true);
4720+
data = ctx.builder.CreatePtrAdd(data, idx0);
47214721
}
47224722

47234723
return _emit_memoryref(ctx, boxmem, data, layout, typ);
@@ -4820,9 +4820,10 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
48204820
setName(ctx.emission_context, ovflw, "memoryref_ovflw");
48214821
}
48224822
#endif
4823-
Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jl_tparam1(ref.typ));
4824-
newdata = ctx.builder.CreateGEP(elty, data, offset);
4825-
setName(ctx.emission_context, newdata, "memoryref_data_offset");
4823+
boffset = ctx.builder.CreateMul(offset, elsz);
4824+
setName(ctx.emission_context, boffset, "memoryref_byteoffset");
4825+
newdata = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), data, boffset);
4826+
setName(ctx.emission_context, newdata, "memoryref_data_byteoffset");
48264827
(void)boffset; // LLVM is very bad at handling GEP with types different from the load
48274828
if (bc) {
48284829
BasicBlock *failBB, *endBB;

src/codegen.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3862,8 +3862,8 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
38623862
const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
38633863
bool isboxed = layout->flags.arrayelem_isboxed;
38643864
bool isunion = layout->flags.arrayelem_isunion;
3865-
bool isatomic = kind == (jl_value_t*)jl_atomic_sym;
3866-
bool needlock = isatomic && layout->size > MAX_ATOMIC_SIZE;
3865+
bool isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked;
3866+
bool needlock = layout->flags.arrayelem_islocked;
38673867
size_t elsz = layout->size;
38683868
size_t al = layout->alignment;
38693869
if (al > JL_HEAP_ALIGNMENT)
@@ -4231,7 +4231,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
42314231
size_t al = layout->alignment;
42324232
if (al > JL_HEAP_ALIGNMENT)
42334233
al = JL_HEAP_ALIGNMENT;
4234-
bool needlock = isatomic && !isboxed && elsz > MAX_ATOMIC_SIZE;
4234+
bool needlock = layout->flags.arrayelem_islocked;
42354235
AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
42364236
? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic)
42374237
: get_llvm_atomic_order(order);
@@ -4317,7 +4317,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
43174317
*ret = jl_cgval_t(); // unreachable
43184318
return true;
43194319
}
4320-
bool isatomic = kind == (jl_value_t*)jl_atomic_sym;
4320+
const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
4321+
bool isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked;
43214322
if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
43224323
emit_atomic_error(ctx, "memoryref_isassigned: non-atomic memory cannot be accessed atomically");
43234324
*ret = jl_cgval_t(); // unreachable
@@ -4333,13 +4334,12 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
43334334
}
43344335
jl_value_t *boundscheck = argv[3].constant;
43354336
emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, fname);
4336-
const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
43374337
Value *mem = emit_memoryref_mem(ctx, ref, layout);
43384338
Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
43394339
Value *oob = bounds_check_enabled(ctx, boundscheck) ? ctx.builder.CreateIsNull(mlen) : nullptr;
43404340
bool isboxed = layout->flags.arrayelem_isboxed;
43414341
if (isboxed || layout->first_ptr >= 0) {
4342-
bool needlock = isatomic && !isboxed && layout->size > MAX_ATOMIC_SIZE;
4342+
bool needlock = layout->flags.arrayelem_islocked;
43434343
AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
43444344
? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic)
43454345
: get_llvm_atomic_order(order);
@@ -4359,13 +4359,12 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
43594359
ctx.builder.SetInsertPoint(passBB);
43604360
}
43614361
Value *elem = emit_memoryref_ptr(ctx, ref, layout);
4362-
if (needlock) {
4362+
if (!isboxed)
4363+
elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*));
4364+
else if (needlock)
43634365
// n.b. no actual lock acquire needed, as the check itself only needs to load a single pointer and check for null
43644366
// elem += sizeof(lock);
43654367
elem = emit_ptrgep(ctx, elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
4366-
}
4367-
if (!isboxed)
4368-
elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*));
43694368
// emit this using the same type as BUILTIN(memoryrefget)
43704369
// so that LLVM may be able to load-load forward them and fold the result
43714370
auto tbaa = isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf;

src/datatype.c

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,10 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
239239
flddesc->flags.haspadding = haspadding;
240240
flddesc->flags.isbitsegal = isbitsegal;
241241
flddesc->flags.fielddesc_type = fielddesc_type;
242-
flddesc->flags.arrayelem_isboxed = arrayelem == 1;
243-
flddesc->flags.arrayelem_isunion = arrayelem == 2;
242+
flddesc->flags.arrayelem_isboxed = (arrayelem & 1) != 0;
243+
flddesc->flags.arrayelem_isunion = (arrayelem & 2) != 0;
244+
flddesc->flags.arrayelem_isatomic = (arrayelem & 4) != 0;
245+
flddesc->flags.arrayelem_islocked = (arrayelem & 8) != 0;
244246
flddesc->flags.padding = 0;
245247
flddesc->npointers = npointers;
246248
flddesc->first_ptr = first_ptr;
@@ -537,6 +539,7 @@ void jl_get_genericmemory_layout(jl_datatype_t *st)
537539
uint32_t *pointers = &first_ptr;
538540
int needlock = 0;
539541

542+
const jl_datatype_layout_t *el_layout = NULL;
540543
if (isunboxed) {
541544
elsz = LLT_ALIGN(elsz, al);
542545
if (kind == (jl_value_t*)jl_atomic_sym) {
@@ -551,12 +554,12 @@ void jl_get_genericmemory_layout(jl_datatype_t *st)
551554
else {
552555
assert(jl_is_datatype(eltype));
553556
zi = ((jl_datatype_t*)eltype)->zeroinit;
554-
const jl_datatype_layout_t *layout = ((jl_datatype_t*)eltype)->layout;
555-
if (layout->first_ptr >= 0) {
556-
first_ptr = layout->first_ptr;
557-
npointers = layout->npointers;
558-
if (layout->flags.fielddesc_type == 2) {
559-
pointers = (uint32_t*)jl_dt_layout_ptrs(layout);
557+
el_layout = ((jl_datatype_t*)eltype)->layout;
558+
if (el_layout->first_ptr >= 0) {
559+
first_ptr = el_layout->first_ptr;
560+
npointers = el_layout->npointers;
561+
if (el_layout->flags.fielddesc_type == 2 && !needlock) {
562+
pointers = (uint32_t*)jl_dt_layout_ptrs(el_layout);
560563
}
561564
else {
562565
pointers = (uint32_t*)alloca(npointers * sizeof(uint32_t));
@@ -568,10 +571,22 @@ void jl_get_genericmemory_layout(jl_datatype_t *st)
568571
}
569572
if (needlock) {
570573
assert(al <= JL_SMALL_BYTE_ALIGNMENT);
571-
size_t offset = LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT);
572-
elsz += offset;
574+
size_t lock_offset = LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT);
575+
elsz += lock_offset;
576+
if (al < sizeof(void*)) {
577+
al = sizeof(void*);
578+
elsz = LLT_ALIGN(elsz, al);
579+
}
573580
haspadding = 1;
574581
zi = 1;
582+
// Adjust pointer offsets to account for the lock at the beginning
583+
if (first_ptr != -1) {
584+
uint32_t lock_offset_words = lock_offset / sizeof(void*);
585+
first_ptr += lock_offset_words;
586+
for (int j = 0; j < npointers; j++) {
587+
pointers[j] += lock_offset_words;
588+
}
589+
}
575590
}
576591
}
577592
else {
@@ -580,13 +595,17 @@ void jl_get_genericmemory_layout(jl_datatype_t *st)
580595
zi = 1;
581596
}
582597

583-
int arrayelem;
598+
// arrayelem is a bitfield: 1=isboxed, 2=isunion, 4=isatomic, 8=islocked
599+
int arrayelem = 0;
584600
if (!isunboxed)
585-
arrayelem = 1;
586-
else if (isunion)
587-
arrayelem = 2;
588-
else
589-
arrayelem = 0;
601+
arrayelem |= 1; // arrayelem_isboxed
602+
if (isunion)
603+
arrayelem |= 2; // arrayelem_isunion
604+
if (kind == (jl_value_t*)jl_atomic_sym) {
605+
arrayelem |= 4; // arrayelem_isatomic
606+
if (needlock)
607+
arrayelem |= 8; // arrayelem_islocked
608+
}
590609
assert(!st->layout);
591610
st->layout = jl_get_layout(elsz, nfields, npointers, al, haspadding, isbitsegal, arrayelem, NULL, pointers);
592611
st->zeroinit = zi;
@@ -647,17 +666,17 @@ void jl_compute_field_offsets(jl_datatype_t *st)
647666
// if we have no fields, we can trivially skip the rest
648667
if (st == jl_symbol_type || st == jl_string_type) {
649668
// opaque layout - heap-allocated blob
650-
static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
669+
static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, { .isbitsegal=1 }};
651670
st->layout = &opaque_byte_layout;
652671
return;
653672
}
654673
else if (st == jl_simplevector_type || st == jl_module_type) {
655-
static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
674+
static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), { .isbitsegal=1 }};
656675
st->layout = &opaque_ptr_layout;
657676
return;
658677
}
659678
else {
660-
static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
679+
static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, { .isbitsegal=1 }};
661680
st->layout = &singleton_layout;
662681
}
663682
}
@@ -1001,6 +1020,8 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
10011020
layout->flags.padding = 0;
10021021
layout->flags.arrayelem_isboxed = 0;
10031022
layout->flags.arrayelem_isunion = 0;
1023+
layout->flags.arrayelem_isatomic = 0;
1024+
layout->flags.arrayelem_islocked = 0;
10041025
jl_fielddescdyn_t * desc =
10051026
(jl_fielddescdyn_t *) ((char *)layout + sizeof(*layout));
10061027
desc->markfunc = markfunc;

src/genericmemory.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destda
262262

263263
JL_DLLEXPORT jl_value_t *jl_genericmemoryref(jl_genericmemory_t *mem, size_t i)
264264
{
265-
int isatomic = (jl_tparam0(jl_typetagof(mem)) == (jl_value_t*)jl_atomic_sym);
266265
const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(mem))->layout;
266+
int isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked;
267267
jl_genericmemoryref_t m;
268268
m.mem = mem;
269269
m.ptr_or_offset = (layout->flags.arrayelem_isunion || layout->size == 0) ? (void*)i : (void*)((char*)mem->ptr + layout->size * i);
@@ -342,8 +342,8 @@ static jl_value_t *jl_ptrmemrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT, i
342342

343343
JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m, int isatomic)
344344
{
345-
assert(isatomic == (jl_tparam0(jl_typetagof(m.mem)) == (jl_value_t*)jl_atomic_sym));
346345
const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
346+
assert(isatomic == (layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked));
347347
if (layout->flags.arrayelem_isboxed)
348348
return jl_ptrmemrefget(m, isatomic);
349349
jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
@@ -365,7 +365,7 @@ JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m, int isatomic)
365365
assert(data - (char*)m.mem->ptr < layout->size * m.mem->length);
366366
jl_value_t *r;
367367
size_t fsz = jl_datatype_size(eltype);
368-
int needlock = isatomic && fsz > MAX_ATOMIC_SIZE;
368+
int needlock = layout->flags.arrayelem_islocked;
369369
if (isatomic && !needlock) {
370370
r = jl_atomic_new_bits(eltype, data);
371371
}
@@ -393,9 +393,6 @@ static int _jl_memoryref_isassigned(jl_genericmemoryref_t m, int isatomic)
393393
if (layout->flags.arrayelem_isboxed) {
394394
}
395395
else if (layout->first_ptr >= 0) {
396-
int needlock = isatomic && layout->size > MAX_ATOMIC_SIZE;
397-
if (needlock)
398-
elem = elem + LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT) / sizeof(jl_value_t*);
399396
elem = &elem[layout->first_ptr];
400397
}
401398
else {
@@ -411,15 +408,15 @@ JL_DLLEXPORT jl_value_t *jl_memoryref_isassigned(jl_genericmemoryref_t m, int is
411408

412409
JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, int isatomic)
413410
{
414-
assert(isatomic == (jl_tparam0(jl_typetagof(m.mem)) == (jl_value_t*)jl_atomic_sym));
411+
const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
412+
assert(isatomic == (layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked));
415413
jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
416414
if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
417415
JL_GC_PUSH1(&rhs);
418416
if (!jl_isa(rhs, eltype))
419417
jl_type_error("memoryrefset!", eltype, rhs);
420418
JL_GC_POP();
421419
}
422-
const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
423420
if (layout->flags.arrayelem_isboxed) {
424421
assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
425422
if (isatomic)
@@ -449,7 +446,7 @@ JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, j
449446
}
450447
if (layout->size != 0) {
451448
assert(data - (char*)m.mem->ptr < layout->size * m.mem->length);
452-
int needlock = isatomic && layout->size > MAX_ATOMIC_SIZE;
449+
int needlock = layout->flags.arrayelem_islocked;
453450
size_t fsz = jl_datatype_size((jl_datatype_t*)jl_typeof(rhs)); // need to shrink-wrap the final copy
454451
if (isatomic && !needlock) {
455452
jl_atomic_store_bits(data, rhs, fsz);

src/julia.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -575,10 +575,12 @@ typedef struct {
575575
// metadata bit only for GenericMemory eltype layout
576576
uint16_t arrayelem_isboxed : 1;
577577
uint16_t arrayelem_isunion : 1;
578+
uint16_t arrayelem_isatomic : 1;
579+
uint16_t arrayelem_islocked : 1;
578580
// If set, this type's egality can be determined entirely by comparing
579581
// the non-padding bits of this datatype.
580582
uint16_t isbitsegal : 1;
581-
uint16_t padding : 10;
583+
uint16_t padding : 8;
582584
} flags;
583585
// union {
584586
// jl_fielddesc8_t field8[nfields];
@@ -1668,6 +1670,8 @@ static inline int jl_field_isconst(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
16681670
#define jl_is_addrspacecore(v) jl_typetagis(v,jl_addrspacecore_type)
16691671
#define jl_is_abioverride(v) jl_typetagis(v,jl_abioverride_type)
16701672
#define jl_genericmemory_isbitsunion(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_isunion)
1673+
#define jl_genericmemory_isatomic(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_isatomic)
1674+
#define jl_genericmemory_islocked(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_islocked)
16711675
#define jl_is_array_any(v) jl_typetagis(v,jl_array_any_type)
16721676

16731677
JL_DLLEXPORT int jl_subtype(jl_value_t *a, jl_value_t *b);

src/rtutils.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,11 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
12941294
}
12951295
else {
12961296
char *ptr = ((char*)m->ptr) + j * layout->size;
1297+
if (layout->flags.arrayelem_islocked) {
1298+
// Skip the lock at the beginning for locked arrays
1299+
size_t lock_size = sizeof(jl_mutex_t);
1300+
ptr += lock_size;
1301+
}
12971302
n += jl_static_show_x_(out, (jl_value_t*)ptr,
12981303
(jl_datatype_t*)(typetagdata ? jl_nth_union_component(el_type, typetagdata[j]) : el_type),
12991304
depth, ctx);

0 commit comments

Comments
 (0)