@@ -114,111 +114,146 @@ RT_API_ATTRS void CheckIntegerKind(
114114 }
115115}
116116
117- template <bool RANK1 >
117+ template <typename P, int RANK >
118118RT_API_ATTRS void ShallowCopyDiscontiguousToDiscontiguous (
119119 const Descriptor &to, const Descriptor &from) {
120- DescriptorIterator<RANK1> toIt{to};
121- DescriptorIterator<RANK1> fromIt{from};
120+ DescriptorIterator<RANK> toIt{to};
121+ DescriptorIterator<RANK> fromIt{from};
122+ // Knowing the size at compile time can enable memcpy inlining optimisations
123+ constexpr std::size_t typeElementBytes{sizeof (P)};
124+ // We might still need to check the actual size as a fallback
122125 std::size_t elementBytes{to.ElementBytes ()};
123126 for (std::size_t n{to.Elements ()}; n-- > 0 ;
124127 toIt.Advance (), fromIt.Advance ()) {
125- // Checking the size at runtime and making sure the pointer passed to memcpy
126- // has a type that matches the element size makes it possible for the
127- // compiler to optimise out the memcpy calls altogether and can
128- // substantially improve performance for some applications.
129- if (elementBytes == 16 ) {
130- std::memcpy (toIt.template Get <__int128_t >(),
131- fromIt.template Get <__int128_t >(), elementBytes);
132- } else if (elementBytes == 8 ) {
133- std::memcpy (toIt.template Get <int64_t >(), fromIt.template Get <int64_t >(),
134- elementBytes);
135- } else if (elementBytes == 4 ) {
136- std::memcpy (toIt.template Get <int32_t >(), fromIt.template Get <int32_t >(),
137- elementBytes);
138- } else if (elementBytes == 2 ) {
139- std::memcpy (toIt.template Get <int16_t >(), fromIt.template Get <int16_t >(),
140- elementBytes);
128+ // typeElementBytes == 1 when P is a char - the non-specialised case
129+ if constexpr (typeElementBytes != 1 ) {
130+ std::memcpy (
131+ toIt.template Get <P>(), fromIt.template Get <P>(), typeElementBytes);
141132 } else {
142133 std::memcpy (
143- toIt.template Get <char >(), fromIt.template Get <char >(), elementBytes);
134+ toIt.template Get <P >(), fromIt.template Get <P >(), elementBytes);
144135 }
145136 }
146137}
147138
148- template <bool RANK1 >
139+ template <typename P, int RANK >
149140RT_API_ATTRS void ShallowCopyDiscontiguousToContiguous (
150141 const Descriptor &to, const Descriptor &from) {
151142 char *toAt{to.OffsetElement ()};
143+ constexpr std::size_t typeElementBytes{sizeof (P)};
152144 std::size_t elementBytes{to.ElementBytes ()};
153- DescriptorIterator<RANK1 > fromIt{from};
145+ DescriptorIterator<RANK > fromIt{from};
154146 for (std::size_t n{to.Elements ()}; n-- > 0 ;
155147 toAt += elementBytes, fromIt.Advance ()) {
156- if (elementBytes == 16 ) {
157- std::memcpy (toAt, fromIt.template Get <__int128_t >(), elementBytes);
158- } else if (elementBytes == 8 ) {
159- std::memcpy (toAt, fromIt.template Get <int64_t >(), elementBytes);
160- } else if (elementBytes == 4 ) {
161- std::memcpy (toAt, fromIt.template Get <int32_t >(), elementBytes);
162- } else if (elementBytes == 2 ) {
163- std::memcpy (toAt, fromIt.template Get <int16_t >(), elementBytes);
148+ if constexpr (typeElementBytes != 1 ) {
149+ std::memcpy (toAt, fromIt.template Get <P>(), typeElementBytes);
164150 } else {
165- std::memcpy (toAt, fromIt.template Get <char >(), elementBytes);
151+ std::memcpy (toAt, fromIt.template Get <P >(), elementBytes);
166152 }
167153 }
168154}
169155
170- template <bool RANK1 >
156+ template <typename P, int RANK >
171157RT_API_ATTRS void ShallowCopyContiguousToDiscontiguous (
172158 const Descriptor &to, const Descriptor &from) {
173159 char *fromAt{from.OffsetElement ()};
174- DescriptorIterator<RANK1> toIt{to};
160+ DescriptorIterator<RANK> toIt{to};
161+ constexpr std::size_t typeElementBytes{sizeof (P)};
175162 std::size_t elementBytes{to.ElementBytes ()};
176163 for (std::size_t n{to.Elements ()}; n-- > 0 ;
177164 toIt.Advance (), fromAt += elementBytes) {
178- if (elementBytes == 16 ) {
179- std::memcpy (toIt.template Get <__int128_t >(), fromAt, elementBytes);
180- } else if (elementBytes == 8 ) {
181- std::memcpy (toIt.template Get <int64_t >(), fromAt, elementBytes);
182- } else if (elementBytes == 4 ) {
183- std::memcpy (toIt.template Get <int32_t >(), fromAt, elementBytes);
184- } else if (elementBytes == 2 ) {
185- std::memcpy (toIt.template Get <int16_t >(), fromAt, elementBytes);
165+ if constexpr (typeElementBytes != 1 ) {
166+ std::memcpy (toIt.template Get <P>(), fromAt, typeElementBytes);
186167 } else {
187- std::memcpy (toIt.template Get <char >(), fromAt, elementBytes);
168+ std::memcpy (toIt.template Get <P >(), fromAt, elementBytes);
188169 }
189170 }
190171}
191172
192- RT_API_ATTRS void ShallowCopy (const Descriptor &to, const Descriptor &from,
173+ // ShallowCopy helper for calling the correct specialised variant based on
174+ // scenario
175+ template <typename P, int RANK = -1 >
176+ RT_API_ATTRS void ShallowCopyInner (const Descriptor &to, const Descriptor &from,
193177 bool toIsContiguous, bool fromIsContiguous) {
194178 if (toIsContiguous) {
195179 if (fromIsContiguous) {
196180 std::memcpy (to.OffsetElement (), from.OffsetElement (),
197181 to.Elements () * to.ElementBytes ());
198182 } else {
199- if (to.rank () == 1 && from.rank () == 1 ) {
200- ShallowCopyDiscontiguousToContiguous<true >(to, from);
201- } else {
202- ShallowCopyDiscontiguousToContiguous<false >(to, from);
203- }
183+ ShallowCopyDiscontiguousToContiguous<P, RANK>(to, from);
204184 }
205185 } else {
206186 if (fromIsContiguous) {
207- if (to.rank () == 1 && from.rank () == 1 ) {
208- ShallowCopyContiguousToDiscontiguous<true >(to, from);
209- } else {
210- ShallowCopyContiguousToDiscontiguous<false >(to, from);
211- }
187+ ShallowCopyContiguousToDiscontiguous<P, RANK>(to, from);
212188 } else {
213- if (to.rank () == 1 && from.rank () == 1 ) {
214- ShallowCopyDiscontiguousToDiscontiguous<true >(to, from);
215- } else {
216- ShallowCopyDiscontiguousToDiscontiguous<false >(to, from);
217- }
189+ ShallowCopyDiscontiguousToDiscontiguous<P, RANK>(to, from);
218190 }
219191 }
220192}
221193
194+ // ShallowCopy helper for specialising the variants based on array rank
195+ template <typename P>
196+ RT_API_ATTRS void ShallowCopyRank (const Descriptor &to, const Descriptor &from,
197+ bool toIsContiguous, bool fromIsContiguous) {
198+ if (to.rank () == 1 && from.rank () == 1 ) {
199+ ShallowCopyInner<P, 1 >(to, from, toIsContiguous, fromIsContiguous);
200+ } else if (to.rank () == 2 && from.rank () == 2 ) {
201+ ShallowCopyInner<P, 2 >(to, from, toIsContiguous, fromIsContiguous);
202+ } else if (to.rank () == 3 && from.rank () == 3 ) {
203+ ShallowCopyInner<P, 3 >(to, from, toIsContiguous, fromIsContiguous);
204+ } else if (to.rank () == 4 && from.rank () == 4 ) {
205+ ShallowCopyInner<P, 4 >(to, from, toIsContiguous, fromIsContiguous);
206+ } else if (to.rank () == 5 && from.rank () == 5 ) {
207+ ShallowCopyInner<P, 5 >(to, from, toIsContiguous, fromIsContiguous);
208+ } else if (to.rank () == 6 && from.rank () == 6 ) {
209+ ShallowCopyInner<P, 6 >(to, from, toIsContiguous, fromIsContiguous);
210+ } else if (to.rank () == 7 && from.rank () == 7 ) {
211+ ShallowCopyInner<P, 7 >(to, from, toIsContiguous, fromIsContiguous);
212+ } else if (to.rank () == 8 && from.rank () == 8 ) {
213+ ShallowCopyInner<P, 8 >(to, from, toIsContiguous, fromIsContiguous);
214+ } else if (to.rank () == 9 && from.rank () == 9 ) {
215+ ShallowCopyInner<P, 9 >(to, from, toIsContiguous, fromIsContiguous);
216+ } else if (to.rank () == 10 && from.rank () == 10 ) {
217+ ShallowCopyInner<P, 10 >(to, from, toIsContiguous, fromIsContiguous);
218+ } else {
219+ ShallowCopyInner<P>(to, from, toIsContiguous, fromIsContiguous);
220+ }
221+ }
222+
223+ RT_API_ATTRS void ShallowCopy (const Descriptor &to, const Descriptor &from,
224+ bool toIsContiguous, bool fromIsContiguous) {
225+ std::size_t elementBytes{to.ElementBytes ()};
226+ // Checking the type at runtime and making sure the pointer passed to memcpy
227+ // has a type that matches the element type makes it possible for the compiler
228+ // to optimise out the memcpy calls altogether and can substantially improve
229+ // performance for some applications.
230+ if (to.type ().IsInteger ()) {
231+ if (elementBytes == sizeof (int64_t )) {
232+ ShallowCopyRank<int64_t >(to, from, toIsContiguous, fromIsContiguous);
233+ } else if (elementBytes == sizeof (int32_t )) {
234+ ShallowCopyRank<int32_t >(to, from, toIsContiguous, fromIsContiguous);
235+ } else if (elementBytes == sizeof (int16_t )) {
236+ ShallowCopyRank<int16_t >(to, from, toIsContiguous, fromIsContiguous);
237+ #if defined USING_NATIVE_INT128_T
238+ } else if (elementBytes == sizeof (__int128_t )) {
239+ ShallowCopyRank<__int128_t >(to, from, toIsContiguous, fromIsContiguous);
240+ #endif
241+ } else {
242+ ShallowCopyRank<char >(to, from, toIsContiguous, fromIsContiguous);
243+ }
244+ } else if (to.type ().IsReal ()) {
245+ if (elementBytes == sizeof (double )) {
246+ ShallowCopyRank<double >(to, from, toIsContiguous, fromIsContiguous);
247+ } else if (elementBytes == sizeof (float )) {
248+ ShallowCopyRank<float >(to, from, toIsContiguous, fromIsContiguous);
249+ } else {
250+ ShallowCopyRank<char >(to, from, toIsContiguous, fromIsContiguous);
251+ }
252+ } else {
253+ ShallowCopyRank<char >(to, from, toIsContiguous, fromIsContiguous);
254+ }
255+ }
256+
222257RT_API_ATTRS void ShallowCopy (const Descriptor &to, const Descriptor &from) {
223258 ShallowCopy (to, from, to.IsContiguous (), from.IsContiguous ());
224259}
0 commit comments