Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions flang-rt/include/flang-rt/runtime/descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,43 @@ class Descriptor {
};
static_assert(sizeof(Descriptor) == sizeof(ISO::CFI_cdesc_t));

// Lightweight iterator-like API to simplify specialising Descriptor indexing
// in cases where it can improve application performance. On account of the
// purpose of this API being performance optimisation, it is up to the user to
// do all the necessary checks to make sure the RANK1=true variant can be used
// safely and that Advance() is not called more times than the number of
// elements in the Descriptor allows for.
template <bool RANK1 = false> class DescriptorIterator {
private:
const Descriptor &descriptor;
SubscriptValue subscripts[maxRank];
std::size_t elementOffset = 0;

public:
DescriptorIterator(const Descriptor &descriptor) : descriptor(descriptor) {
descriptor.GetLowerBounds(subscripts);
if constexpr (RANK1) {
elementOffset = descriptor.SubscriptByteOffset(0, subscripts[0]);
}
};

template <typename A> A *Get() {
if constexpr (RANK1) {
return descriptor.OffsetElement<A>(elementOffset);
} else {
return descriptor.Element<A>(subscripts);
}
}

void Advance() {
if constexpr (RANK1) {
elementOffset += descriptor.GetDimension(0).ByteStride();
} else {
descriptor.IncrementSubscripts(subscripts);
}
}
};

// Properly configured instances of StaticDescriptor will occupy the
// exact amount of storage required for the descriptor, its dimensional
// information, and possible addendum. To build such a static descriptor,
Expand Down
3 changes: 3 additions & 0 deletions flang-rt/include/flang-rt/runtime/tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -511,10 +511,13 @@ inline RT_API_ATTRS const char *FindCharacter(
// Copy payload data from one allocated descriptor to another.
// Assumes element counts and element sizes match, and that both
// descriptors are allocated.
template <bool RANK1 = false>
RT_API_ATTRS void ShallowCopyDiscontiguousToDiscontiguous(
const Descriptor &to, const Descriptor &from);
template <bool RANK1 = false>
RT_API_ATTRS void ShallowCopyDiscontiguousToContiguous(
const Descriptor &to, const Descriptor &from);
template <bool RANK1 = false>
RT_API_ATTRS void ShallowCopyContiguousToDiscontiguous(
const Descriptor &to, const Descriptor &from);
RT_API_ATTRS void ShallowCopy(const Descriptor &to, const Descriptor &from,
Expand Down
20 changes: 15 additions & 5 deletions flang-rt/lib/runtime/assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,11 +492,21 @@ RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
terminator.Crash("unexpected type code %d in blank padded Assign()",
to.type().raw());
}
} else { // elemental copies, possibly with character truncation
for (std::size_t n{toElements}; n-- > 0;
to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
memmoveFct(to.Element<char>(toAt), from.Element<const char>(fromAt),
toElementBytes);
} else {
// We can't simply call ShallowCopy due to edge cases such as character
// truncation or assignments where the RHS is a scalar.
if (toElementBytes == fromElementBytes && to.IsContiguous()) {
if (to.rank() == 1 && from.rank() == 1) {
ShallowCopyDiscontiguousToContiguous<true>(to, from);
} else {
ShallowCopyDiscontiguousToContiguous<false>(to, from);
}
} else {
if (to.rank() == 1 && from.rank() == 1) {
ShallowCopyDiscontiguousToDiscontiguous<true>(to, from);
} else {
ShallowCopyDiscontiguousToDiscontiguous<false>(to, from);
}
}
}
}
Expand Down
84 changes: 67 additions & 17 deletions flang-rt/lib/runtime/tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,40 +114,78 @@ RT_API_ATTRS void CheckIntegerKind(
}
}

template <bool RANK1>
RT_API_ATTRS void ShallowCopyDiscontiguousToDiscontiguous(
const Descriptor &to, const Descriptor &from) {
SubscriptValue toAt[maxRank], fromAt[maxRank];
to.GetLowerBounds(toAt);
from.GetLowerBounds(fromAt);
DescriptorIterator<RANK1> toIt{to};
DescriptorIterator<RANK1> fromIt{from};
std::size_t elementBytes{to.ElementBytes()};
for (std::size_t n{to.Elements()}; n-- > 0;
to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
std::memcpy(
to.Element<char>(toAt), from.Element<char>(fromAt), elementBytes);
toIt.Advance(), fromIt.Advance()) {
// Checking the size at runtime and making sure the pointer passed to memcpy
// has a type that matches the element size makes it possible for the
// compiler to optimise out the memcpy calls altogether and can
// substantially improve performance for some applications.
if (elementBytes == 16) {
std::memcpy(toIt.template Get<__int128_t>(),
fromIt.template Get<__int128_t>(), elementBytes);
} else if (elementBytes == 8) {
std::memcpy(toIt.template Get<int64_t>(), fromIt.template Get<int64_t>(),
elementBytes);
} else if (elementBytes == 4) {
std::memcpy(toIt.template Get<int32_t>(), fromIt.template Get<int32_t>(),
elementBytes);
} else if (elementBytes == 2) {
std::memcpy(toIt.template Get<int16_t>(), fromIt.template Get<int16_t>(),
elementBytes);
} else {
std::memcpy(
toIt.template Get<char>(), fromIt.template Get<char>(), elementBytes);
}
}
}

template <bool RANK1>
RT_API_ATTRS void ShallowCopyDiscontiguousToContiguous(
const Descriptor &to, const Descriptor &from) {
char *toAt{to.OffsetElement()};
SubscriptValue fromAt[maxRank];
from.GetLowerBounds(fromAt);
std::size_t elementBytes{to.ElementBytes()};
DescriptorIterator<RANK1> fromIt{from};
for (std::size_t n{to.Elements()}; n-- > 0;
toAt += elementBytes, from.IncrementSubscripts(fromAt)) {
std::memcpy(toAt, from.Element<char>(fromAt), elementBytes);
toAt += elementBytes, fromIt.Advance()) {
if (elementBytes == 16) {
std::memcpy(toAt, fromIt.template Get<__int128_t>(), elementBytes);
} else if (elementBytes == 8) {
std::memcpy(toAt, fromIt.template Get<int64_t>(), elementBytes);
} else if (elementBytes == 4) {
std::memcpy(toAt, fromIt.template Get<int32_t>(), elementBytes);
} else if (elementBytes == 2) {
std::memcpy(toAt, fromIt.template Get<int16_t>(), elementBytes);
} else {
std::memcpy(toAt, fromIt.template Get<char>(), elementBytes);
}
}
}

template <bool RANK1>
RT_API_ATTRS void ShallowCopyContiguousToDiscontiguous(
const Descriptor &to, const Descriptor &from) {
SubscriptValue toAt[maxRank];
to.GetLowerBounds(toAt);
char *fromAt{from.OffsetElement()};
DescriptorIterator<RANK1> toIt{to};
std::size_t elementBytes{to.ElementBytes()};
for (std::size_t n{to.Elements()}; n-- > 0;
to.IncrementSubscripts(toAt), fromAt += elementBytes) {
std::memcpy(to.Element<char>(toAt), fromAt, elementBytes);
toIt.Advance(), fromAt += elementBytes) {
if (elementBytes == 16) {
std::memcpy(toIt.template Get<__int128_t>(), fromAt, elementBytes);
} else if (elementBytes == 8) {
std::memcpy(toIt.template Get<int64_t>(), fromAt, elementBytes);
} else if (elementBytes == 4) {
std::memcpy(toIt.template Get<int32_t>(), fromAt, elementBytes);
} else if (elementBytes == 2) {
std::memcpy(toIt.template Get<int16_t>(), fromAt, elementBytes);
} else {
std::memcpy(toIt.template Get<char>(), fromAt, elementBytes);
}
}
}

Expand All @@ -158,13 +196,25 @@ RT_API_ATTRS void ShallowCopy(const Descriptor &to, const Descriptor &from,
std::memcpy(to.OffsetElement(), from.OffsetElement(),
to.Elements() * to.ElementBytes());
} else {
ShallowCopyDiscontiguousToContiguous(to, from);
if (to.rank() == 1 && from.rank() == 1) {
ShallowCopyDiscontiguousToContiguous<true>(to, from);
} else {
ShallowCopyDiscontiguousToContiguous<false>(to, from);
}
}
} else {
if (fromIsContiguous) {
ShallowCopyContiguousToDiscontiguous(to, from);
if (to.rank() == 1 && from.rank() == 1) {
ShallowCopyContiguousToDiscontiguous<true>(to, from);
} else {
ShallowCopyContiguousToDiscontiguous<false>(to, from);
}
} else {
ShallowCopyDiscontiguousToDiscontiguous(to, from);
if (to.rank() == 1 && from.rank() == 1) {
ShallowCopyDiscontiguousToDiscontiguous<true>(to, from);
} else {
ShallowCopyDiscontiguousToDiscontiguous<false>(to, from);
}
}
}
}
Expand Down
Loading