Skip to content

Commit bce3d4d

Browse files
authored
Allow taking Matrix slices without an extra allocation (#56236)
Since changing Array to use Memory as the backing, we had the option of making non-Vector arrays more flexible, but had instead preserved the restriction that they must be zero offset and equal in length to the Memory. This results in extra complexity, restrictions, and allocations however, but doesn't gain many known benefits. Nanosoldier shows a decrease in performance on linear eachindex loops, which we theorize is due to a minor failure to CSE before SCEV or a lack of NUW/NSW on the length multiplication calculation.
1 parent ded3e3d commit bce3d4d

File tree

9 files changed

+36
-93
lines changed

9 files changed

+36
-93
lines changed

base/Base.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -354,14 +354,14 @@ include("set.jl")
354354
include("char.jl")
355355
function array_new_memory(mem::Memory{UInt8}, newlen::Int)
356356
# add an optimization to array_new_memory for StringVector
357-
if (@assume_effects :total @ccall jl_genericmemory_owner(mem::Any,)::Any) isa String
357+
if (@assume_effects :total @ccall jl_genericmemory_owner(mem::Any,)::Any) === mem
358+
# TODO: when implemented, this should use a memory growing call
359+
return typeof(mem)(undef, newlen)
360+
else
358361
# If data is in a String, keep it that way.
359362
# When implemented, this could use jl_gc_expand_string(oldstr, newlen) as an optimization
360363
str = _string_n(newlen)
361364
return (@assume_effects :total !:consistent @ccall jl_string_to_genericmemory(str::Any,)::Memory{UInt8})
362-
else
363-
# TODO: when implemented, this should use a memory growing call
364-
return typeof(mem)(undef, newlen)
365365
end
366366
end
367367
include("strings/basic.jl")

base/abstractarray.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1584,7 +1584,7 @@ their component parts. A typical definition for an array that wraps a parent is
15841584
`Base.dataids(C::CustomArray) = dataids(C.parent)`.
15851585
"""
15861586
dataids(A::AbstractArray) = (UInt(objectid(A)),)
1587-
dataids(A::Memory) = (B = ccall(:jl_genericmemory_owner, Any, (Any,), A); (UInt(pointer(B isa typeof(A) ? B : A)),))
1587+
dataids(A::Memory) = (UInt(A.ptr),)
15881588
dataids(A::Array) = dataids(A.ref.mem)
15891589
dataids(::AbstractRange) = ()
15901590
dataids(x) = ()

base/array.jl

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3141,10 +3141,6 @@ function _wrap(ref::MemoryRef{T}, dims::NTuple{N, Int}) where {T, N}
31413141
mem_len = length(mem) + 1 - memoryrefoffset(ref)
31423142
len = Core.checked_dims(dims...)
31433143
@boundscheck mem_len >= len || invalid_wrap_err(mem_len, dims, len)
3144-
if N != 1 && !(ref === GenericMemoryRef(mem) && len === mem_len)
3145-
mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len)
3146-
ref = memoryref(mem)
3147-
end
31483144
return ref
31493145
end
31503146

base/essentials.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ const Bottom = Union{}
99
# Define minimal array interface here to help code used in macros:
1010
length(a::Array{T, 0}) where {T} = 1
1111
length(a::Array{T, 1}) where {T} = getfield(a, :size)[1]
12-
length(a::Array) = getfield(getfield(getfield(a, :ref), :mem), :length)
12+
length(a::Array{T, 2}) where {T} = (sz = getfield(a, :size); sz[1] * sz[2])
13+
# other sizes are handled by generic prod definition for AbstractArray
1314
length(a::GenericMemory) = getfield(a, :length)
1415
throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))
1516

base/reshapedarray.jl

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,31 +35,33 @@ end
3535
length(R::ReshapedArrayIterator) = length(R.iter)
3636
eltype(::Type{<:ReshapedArrayIterator{I}}) where {I} = @isdefined(I) ? ReshapedIndex{eltype(I)} : Any
3737

38-
## reshape(::Array, ::Dims) returns an Array, except for isbitsunion eltypes (issue #28611)
38+
@noinline throw_dmrsa(dims, len) =
39+
throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $len"))
40+
41+
## reshape(::Array, ::Dims) returns a new Array (to avoid conditionally aliasing the structure, only the data)
3942
# reshaping to same # of dimensions
4043
@eval function reshape(a::Array{T,M}, dims::NTuple{N,Int}) where {T,N,M}
41-
throw_dmrsa(dims, len) =
42-
throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $len"))
4344
len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a))
4445
if len != length(a)
4546
throw_dmrsa(dims, length(a))
4647
end
47-
isbitsunion(T) && return ReshapedArray(a, dims, ())
48-
if N == M && dims == size(a)
49-
return a
50-
end
5148
ref = a.ref
52-
if M == 1 && N !== 1
53-
mem = ref.mem::Memory{T}
54-
if !(ref === memoryref(mem) && len === mem.length)
55-
mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len)
56-
ref = memoryref(mem)::typeof(ref)
57-
end
58-
end
59-
# or we could use `a = Array{T,N}(undef, ntuple(0, Val(N))); a.ref = ref; a.size = dims; return a` here
49+
# or we could use `a = Array{T,N}(undef, ntuple(i->0, Val(N))); a.ref = ref; a.size = dims; return a` here to avoid the eval
6050
return $(Expr(:new, :(Array{T,N}), :ref, :dims))
6151
end
6252

53+
## reshape!(::Array, ::Dims) returns the original array, but must have the same dimensions and length as the original
54+
# see also resize! for a similar operation that can change the length
55+
function reshape!(a::Array{T,N}, dims::NTuple{N,Int}) where {T,N}
56+
len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a))
57+
if len != length(a)
58+
throw_dmrsa(dims, length(a))
59+
end
60+
setfield!(a, :dims, dims)
61+
return a
62+
end
63+
64+
6365

6466
"""
6567
reshape(A, dims...) -> AbstractArray

src/codegen.cpp

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,18 +1435,6 @@ static const auto jl_allocgenericmemory = new JuliaFunction<TypeFnContextAndSize
14351435
AttributeSet::get(C, RetAttrs),
14361436
None); },
14371437
};
1438-
static const auto jlarray_data_owner_func = new JuliaFunction<>{
1439-
XSTR(jl_array_data_owner),
1440-
[](LLVMContext &C) {
1441-
auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
1442-
return FunctionType::get(T_prjlvalue,
1443-
{T_prjlvalue}, false);
1444-
},
1445-
[](LLVMContext &C) { return AttributeList::get(C,
1446-
Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind}),
1447-
Attributes(C, {Attribute::NonNull}),
1448-
None); },
1449-
};
14501438
#define BOX_FUNC(ct,at,attrs,nbytes) \
14511439
static const auto box_##ct##_func = new JuliaFunction<>{ \
14521440
XSTR(jl_box_##ct), \
@@ -4341,8 +4329,7 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
43414329
}
43424330
Value *data_owner = NULL; // owner object against which the write barrier must check
43434331
if (isboxed || layout->first_ptr >= 0) { // if elements are just bits, don't need a write barrier
4344-
Value *V = emit_memoryref_FCA(ctx, ref, layout);
4345-
data_owner = emit_genericmemoryowner(ctx, CreateSimplifiedExtractValue(ctx, V, 1));
4332+
data_owner = emit_memoryref_mem(ctx, ref, layout);
43464333
}
43474334
*ret = typed_store(ctx,
43484335
ptr,

src/genericmemory.c

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_
197197
if (how != 0) {
198198
jl_value_t *o = jl_genericmemory_data_owner_field(m);
199199
jl_genericmemory_data_owner_field(m) = NULL;
200-
if (how == 3 && jl_is_string(o) &&
200+
if (how == 3 && // implies jl_is_string(o)
201201
((mlength + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS))) {
202202
if (jl_string_data(o)[len] != '\0')
203203
jl_string_data(o)[len] = '\0';
@@ -221,39 +221,6 @@ JL_DLLEXPORT jl_genericmemory_t *jl_alloc_memory_any(size_t n)
221221
return jl_alloc_genericmemory(jl_memory_any_type, n);
222222
}
223223

224-
JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_slice(jl_genericmemory_t *mem, void *data, size_t len)
225-
{
226-
// Given a GenericMemoryRef represented as `jl_genericmemory_ref ref = {data, mem}`,
227-
// return a new GenericMemory that only accesses the slice from the given GenericMemoryRef to
228-
// the given length if this is possible to return. This allows us to make
229-
// `length(Array)==length(Array.ref.mem)`, for simplification of this.
230-
jl_datatype_t *dt = (jl_datatype_t*)jl_typetagof(mem);
231-
const jl_datatype_layout_t *layout = dt->layout;
232-
// repeated checks here ensure the values cannot overflow, since we know mem->length is a reasonable value
233-
if (len > mem->length)
234-
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError
235-
if (layout->flags.arrayelem_isunion) {
236-
if (!((size_t)data == 0 && mem->length == len))
237-
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // only exact slices are supported
238-
data = mem->ptr;
239-
}
240-
else if (layout->size == 0) {
241-
if ((size_t)data > mem->length || (size_t)data + len > mem->length)
242-
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError
243-
data = mem->ptr;
244-
}
245-
else {
246-
if (data < mem->ptr || (char*)data > (char*)mem->ptr + mem->length * layout->size || (char*)data + len * layout->size > (char*)mem->ptr + mem->length * layout->size)
247-
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError
248-
}
249-
jl_task_t *ct = jl_current_task;
250-
jl_genericmemory_t *newmem = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, sizeof(jl_genericmemory_t) + sizeof(void*), dt);
251-
newmem->length = len;
252-
newmem->ptr = data;
253-
jl_genericmemory_data_owner_field(newmem) = jl_genericmemory_owner(mem);
254-
return newmem;
255-
}
256-
257224
JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destdata,
258225
jl_genericmemory_t *src, char* srcdata,
259226
size_t n) JL_NOTSAFEPOINT

src/julia.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ STATIC_INLINE jl_value_t *jl_svecset(
12331233
0 = data is inlined
12341234
1 = owns the gc-managed data, exclusively (will free it)
12351235
2 = malloc-allocated pointer (does not own it)
1236-
3 = has a pointer to the object that owns the data pointer
1236+
3 = has a pointer to the String object that owns the data pointer (m must be isbits)
12371237
*/
12381238
STATIC_INLINE int jl_genericmemory_how(jl_genericmemory_t *m) JL_NOTSAFEPOINT
12391239
{
@@ -1249,8 +1249,6 @@ STATIC_INLINE int jl_genericmemory_how(jl_genericmemory_t *m) JL_NOTSAFEPOINT
12491249

12501250
STATIC_INLINE jl_value_t *jl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
12511251
{
1252-
if (jl_genericmemory_how(m) == 3)
1253-
return jl_genericmemory_data_owner_field(m);
12541252
return (jl_value_t*)m;
12551253
}
12561254

@@ -1280,8 +1278,6 @@ STATIC_INLINE jl_value_t *jl_genericmemory_ptr_set(
12801278
assert(i < m_->length);
12811279
jl_atomic_store_release(((_Atomic(jl_value_t*)*)(m_->ptr)) + i, (jl_value_t*)x);
12821280
if (x) {
1283-
if (jl_genericmemory_how(m_) == 3)
1284-
m = (void*)jl_genericmemory_data_owner_field(m_);
12851281
jl_gc_wb(m, x);
12861282
}
12871283
return (jl_value_t*)x;

src/staticdata.c

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
932932
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
933933
const char *data = (const char*)m->ptr;
934934
if (jl_genericmemory_how(m) == 3) {
935-
jl_queue_for_serialization_(s, jl_genericmemory_data_owner_field(v), 1, immediate);
935+
assert(jl_is_string(jl_genericmemory_data_owner_field(m)));
936936
}
937937
else if (layout->flags.arrayelem_isboxed) {
938938
size_t i, l = m->length;
@@ -1472,17 +1472,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
14721472
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
14731473
const jl_datatype_layout_t *layout = t->layout;
14741474
size_t len = m->length;
1475-
if (jl_genericmemory_how(m) == 3 && jl_is_genericmemory(jl_genericmemory_data_owner_field(m))) {
1476-
jl_genericmemory_t *owner = (jl_genericmemory_t*)jl_genericmemory_data_owner_field(m);
1477-
size_t data = ((char*)m->ptr - (char*)owner->ptr); // relocation offset (bytes)
1478-
write_uint(f, len);
1479-
write_uint(f, data);
1480-
write_pointerfield(s, (jl_value_t*)owner);
1481-
// similar to record_memoryref, but the field is always an (offset) pointer
1482-
arraylist_push(&s->memowner_list, (void*)(reloc_offset + offsetof(jl_genericmemory_t, ptr))); // relocation location
1483-
arraylist_push(&s->memowner_list, NULL); // relocation target (ignored)
1484-
}
1485-
// else if (jl_genericmemory_how(m) == 3) {
1475+
// if (jl_genericmemory_how(m) == 3) {
14861476
// jl_value_t *owner = jl_genericmemory_data_owner_field(m);
14871477
// write_uint(f, len);
14881478
// write_pointerfield(s, owner);
@@ -1491,7 +1481,8 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
14911481
// assert(new_mem->ptr == NULL);
14921482
// new_mem->ptr = (void*)((char*)m->ptr - (char*)owner); // relocation offset
14931483
// }
1494-
else {
1484+
// else
1485+
{
14951486
size_t datasize = len * layout->size;
14961487
size_t tot = datasize;
14971488
int isbitsunion = layout->flags.arrayelem_isunion;
@@ -1538,10 +1529,13 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
15381529
ios_write(s->const_data, (char*)m->ptr, tot);
15391530
}
15401531
}
1541-
if (len == 0) // TODO: should we have a zero-page, instead of writing each type's fragment separately?
1532+
if (len == 0) { // TODO: should we have a zero-page, instead of writing each type's fragment separately?
15421533
write_padding(s->const_data, layout->size ? layout->size : isbitsunion);
1543-
else if (jl_genericmemory_how(m) == 3 && jl_is_string(jl_genericmemory_data_owner_field(m)))
1534+
}
1535+
else if (jl_genericmemory_how(m) == 3) {
1536+
assert(jl_is_string(jl_genericmemory_data_owner_field(m)));
15441537
write_padding(s->const_data, 1);
1538+
}
15451539
}
15461540
else {
15471541
// Pointer eltypes are encoded in the mutable data section

0 commit comments

Comments
 (0)