Skip to content

Commit e3991f8

Browse files
committed
Review v2 - Recursive template and missing constexprs
1 parent 79a353e commit e3991f8

File tree

2 files changed

+31
-26
lines changed

2 files changed

+31
-26
lines changed

flang-rt/include/flang-rt/runtime/descriptor.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -453,9 +453,9 @@ template <int RANK = -1> class DescriptorIterator {
453453
public:
454454
RT_API_ATTRS DescriptorIterator(const Descriptor &descriptor)
455455
: descriptor(descriptor) {
456-
descriptor.GetLowerBounds(subscripts);
457-
if constexpr (RANK == 1) {
458-
elementOffset = descriptor.SubscriptByteOffset(0, subscripts[0]);
456+
// We do not need the subscripts to iterate over a rank-1 array
457+
if constexpr (RANK != 1) {
458+
descriptor.GetLowerBounds(subscripts);
459459
}
460460
};
461461

@@ -466,7 +466,7 @@ template <int RANK = -1> class DescriptorIterator {
466466
offset = elementOffset;
467467
// The compiler might be able to optimise this better if we know the rank
468468
// at compile time
469-
} else if (RANK != -1) {
469+
} else if constexpr (RANK != -1) {
470470
for (int j{0}; j < RANK; ++j) {
471471
offset += descriptor.SubscriptByteOffset(j, subscripts[j]);
472472
}
@@ -481,7 +481,7 @@ template <int RANK = -1> class DescriptorIterator {
481481
RT_API_ATTRS void Advance() {
482482
if constexpr (RANK == 1) {
483483
elementOffset += descriptor.GetDimension(0).ByteStride();
484-
} else if (RANK != -1) {
484+
} else if constexpr (RANK != -1) {
485485
for (int j{0}; j < RANK; ++j) {
486486
const Dimension &dim{descriptor.GetDimension(j)};
487487
if (subscripts[j]++ < dim.UpperBound()) {

flang-rt/lib/runtime/tools.cpp

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -191,31 +191,36 @@ RT_API_ATTRS void ShallowCopyInner(const Descriptor &to, const Descriptor &from,
191191
}
192192
}
193193

194+
// Most arrays are much closer to rank-1 than to maxRank.
195+
// Doing the recursion upwards instead of downwards puts the more common
196+
// cases earlier in the if-chain and has a tangible impact on performance.
197+
template <typename P, int RANK> struct ShallowCopyRankSpecialize {
198+
static bool execute(const Descriptor &to, const Descriptor &from,
199+
bool toIsContiguous, bool fromIsContiguous) {
200+
if (to.rank() == RANK && from.rank() == RANK) {
201+
ShallowCopyInner<P, RANK>(to, from, toIsContiguous, fromIsContiguous);
202+
return true;
203+
}
204+
return ShallowCopyRankSpecialize<P, RANK + 1>::execute(
205+
to, from, toIsContiguous, fromIsContiguous);
206+
}
207+
};
208+
209+
template <typename P> struct ShallowCopyRankSpecialize<P, maxRank + 1> {
210+
static bool execute(const Descriptor &to, const Descriptor &from,
211+
bool toIsContiguous, bool fromIsContiguous) {
212+
return false;
213+
}
214+
};
215+
194216
// ShallowCopy helper for specialising the variants based on array rank
195217
template <typename P>
196218
RT_API_ATTRS void ShallowCopyRank(const Descriptor &to, const Descriptor &from,
197219
bool toIsContiguous, bool fromIsContiguous) {
198-
if (to.rank() == 1 && from.rank() == 1) {
199-
ShallowCopyInner<P, 1>(to, from, toIsContiguous, fromIsContiguous);
200-
} else if (to.rank() == 2 && from.rank() == 2) {
201-
ShallowCopyInner<P, 2>(to, from, toIsContiguous, fromIsContiguous);
202-
} else if (to.rank() == 3 && from.rank() == 3) {
203-
ShallowCopyInner<P, 3>(to, from, toIsContiguous, fromIsContiguous);
204-
} else if (to.rank() == 4 && from.rank() == 4) {
205-
ShallowCopyInner<P, 4>(to, from, toIsContiguous, fromIsContiguous);
206-
} else if (to.rank() == 5 && from.rank() == 5) {
207-
ShallowCopyInner<P, 5>(to, from, toIsContiguous, fromIsContiguous);
208-
} else if (to.rank() == 6 && from.rank() == 6) {
209-
ShallowCopyInner<P, 6>(to, from, toIsContiguous, fromIsContiguous);
210-
} else if (to.rank() == 7 && from.rank() == 7) {
211-
ShallowCopyInner<P, 7>(to, from, toIsContiguous, fromIsContiguous);
212-
} else if (to.rank() == 8 && from.rank() == 8) {
213-
ShallowCopyInner<P, 8>(to, from, toIsContiguous, fromIsContiguous);
214-
} else if (to.rank() == 9 && from.rank() == 9) {
215-
ShallowCopyInner<P, 9>(to, from, toIsContiguous, fromIsContiguous);
216-
} else if (to.rank() == 10 && from.rank() == 10) {
217-
ShallowCopyInner<P, 10>(to, from, toIsContiguous, fromIsContiguous);
218-
} else {
220+
// Try to call a specialised ShallowCopy variant from rank-1 up to maxRank
221+
bool specialized = ShallowCopyRankSpecialize<P, 1>::execute(
222+
to, from, toIsContiguous, fromIsContiguous);
223+
if (!specialized) {
219224
ShallowCopyInner<P>(to, from, toIsContiguous, fromIsContiguous);
220225
}
221226
}

0 commit comments

Comments
 (0)