20
20
#include < folly/algorithm/simd/Ignore.h>
21
21
#include < folly/algorithm/simd/Movemask.h>
22
22
#include < folly/algorithm/simd/detail/SimdPlatform.h>
23
- #include < folly/lang/Bits .h>
23
+ #include < folly/lang/SafeAssert .h>
24
24
25
25
#include < array>
26
26
@@ -96,6 +96,9 @@ struct SimdPlatformCommon {
96
96
template <typename Ignore>
97
97
static bool any (logical_t logical, Ignore ignore);
98
98
99
+ template <typename Ignore>
100
+ static bool all (logical_t logical, Ignore ignore);
101
+
99
102
/* *
100
103
* logical operations
101
104
**/
@@ -110,7 +113,7 @@ struct SimdPlatformCommon {
110
113
template <typename Platform>
111
114
template <typename Ignore>
112
115
FOLLY_ERASE auto SimdPlatformCommon<Platform>::loada(
113
- const scalar_t * ptr, Ignore) -> reg_t {
116
+ const scalar_t * ptr, [[maybe_unused]] Ignore ignore ) -> reg_t {
114
117
if constexpr (std::is_same_v<ignore_none, Ignore>) {
115
118
// There is not point to aligned load instructions
116
119
// on modern cpus. Arm doesn't even have any.
@@ -122,7 +125,25 @@ FOLLY_ERASE auto SimdPlatformCommon<Platform>::loada(
122
125
//
123
126
// Here is an explanation from Stephen Canon:
124
127
// https://stackoverflow.com/questions/25566302/vectorized-strlen-getting-away-with-reading-unallocated-memory
125
- return unsafeLoadu (ptr, ignore_none{});
128
+ if constexpr (!kIsSanitizeAddress ) {
129
+ return unsafeLoadu (ptr, ignore_none{});
130
+ } else {
131
+ // If the sanitizers are enabled, we want to trigger the issues.
132
+ // We also want to match the garbage values with/without asan,
133
+ // so that testing works on the same values as prod.
134
+ scalar_t buf[kCardinal ];
135
+ std::memcpy (
136
+ buf + ignore.first ,
137
+ ptr + ignore.first ,
138
+ (kCardinal - ignore.first - ignore.last ) * sizeof (scalar_t ));
139
+
140
+ auto testAgainst = loadu (buf, ignore_none{});
141
+ auto res = unsafeLoadu (ptr, ignore_none{});
142
+
143
+ // Extra sanity check.
144
+ FOLLY_SAFE_CHECK (all (Platform::equal (res, testAgainst), ignore));
145
+ return res;
146
+ }
126
147
}
127
148
}
128
149
@@ -162,6 +183,24 @@ FOLLY_ERASE bool SimdPlatformCommon<Platform>::any(
162
183
}
163
184
}
164
185
186
+ template <typename Platform>
187
+ template <typename Ignore>
188
+ FOLLY_ERASE bool SimdPlatformCommon<Platform>::all(
189
+ logical_t logical, Ignore ignore) {
190
+ if constexpr (std::is_same_v<Ignore, ignore_none>) {
191
+ return Platform::all (logical);
192
+ } else {
193
+ auto [bits, bitsPerElement] = movemask<scalar_t >(logical, ignore_none{});
194
+
195
+ auto expected = n_least_significant_bits<decltype (bits)>(
196
+ bitsPerElement * (kCardinal - ignore.last ));
197
+ expected =
198
+ clear_n_least_significant_bits (expected, ignore.first * bitsPerElement);
199
+
200
+ return (bits & expected) == expected;
201
+ }
202
+ }
203
+
165
204
template <typename Platform>
166
205
FOLLY_ERASE auto SimdPlatformCommon<Platform>::logical_or(
167
206
logical_t x, logical_t y) -> logical_t {
@@ -185,6 +224,8 @@ struct SimdSse42PlatformSpecific {
185
224
using reg_t = __m128i;
186
225
using logical_t = reg_t ;
187
226
227
+ static constexpr std::size_t kCardinal = sizeof (reg_t ) / sizeof (scalar_t );
228
+
188
229
FOLLY_ERASE
189
230
static reg_t loadu (const scalar_t * p) {
190
231
return _mm_loadu_si128 (reinterpret_cast <const reg_t *>(p));
@@ -238,7 +279,16 @@ struct SimdSse42PlatformSpecific {
238
279
}
239
280
240
281
FOLLY_ERASE
241
- static bool any (logical_t log) { return movemask<std::uint8_t >(log).first ; }
282
+ static bool any (logical_t log) { return movemask<scalar_t >(log).first ; }
283
+
284
+ #if 0 // disabled untill we have a test where this is relevant
285
+ FOLLY_ERASE
286
+ static bool all(logical_t log) {
287
+ auto [bits, bitsPerElement] = movemask<scalar_t>(log);
288
+ return movemask<scalar_t>(log) ==
289
+ n_least_significant_bits<decltype(bits)>(kCardinal * bitsPerElement);
290
+ }
291
+ #endif
242
292
};
243
293
244
294
#define FOLLY_DETAIL_HAS_SIMD_PLATFORM 1
@@ -254,6 +304,8 @@ struct SimdAvx2PlatformSpecific {
254
304
using reg_t = __m256i;
255
305
using logical_t = reg_t ;
256
306
307
+ static constexpr std::size_t kCardinal = sizeof (reg_t ) / sizeof (scalar_t );
308
+
257
309
FOLLY_ERASE
258
310
static reg_t loadu (const scalar_t * p) {
259
311
return _mm256_loadu_si256 (reinterpret_cast <const reg_t *>(p));
@@ -306,9 +358,16 @@ struct SimdAvx2PlatformSpecific {
306
358
}
307
359
308
360
FOLLY_ERASE
309
- static bool any (logical_t log) {
310
- return simd::movemask<std::uint8_t >(log).first ;
361
+ static bool any (logical_t log) { return simd::movemask<scalar_t >(log).first ; }
362
+
363
+ #if 0 // disabled untill we have a test where this is relevant
364
+ FOLLY_ERASE
365
+ static bool all(logical_t log) {
366
+ auto [bits, bitsPerElement] = movemask<scalar_t>(log);
367
+ return movemask<scalar_t>(log) ==
368
+ n_least_significant_bits<decltype(bits)>(kCardinal * bitsPerElement);
311
369
}
370
+ #endif
312
371
};
313
372
314
373
template <typename T>
@@ -420,6 +479,19 @@ struct SimdAarch64PlatformSpecific {
420
479
auto u64 = bit_cast<uint64x2_t >(u32 );
421
480
return vgetq_lane_u64 (u64 , 0 );
422
481
}
482
+
483
+ #if 0 // disabled untill we have a test where this is relevant
484
+ FOLLY_ERASE
485
+ static bool all(logical_t log) {
486
+ // Not quite what they did in .Net runtime, but
487
+ // should be close.
488
+ // https://github.com/dotnet/runtime/pull/75864
489
+ auto u32 = bit_cast<uint32x4_t>(log);
490
+ u32 = vpminq_u32(u32, u32);
491
+ auto u64 = bit_cast<uint64x2_t>(u32);
492
+ return u64 == n_least_significant_bits<std::uint64_t>(64);
493
+ }
494
+ #endif
423
495
};
424
496
425
497
#define FOLLY_DETAIL_HAS_SIMD_PLATFORM 1
0 commit comments