@@ -259,6 +259,8 @@ unsafe fn test_simd() {
259
259
test_mm_insert_epi16 ( ) ;
260
260
test_mm_shuffle_epi8 ( ) ;
261
261
262
+ test_mm_cmpestri ( ) ;
263
+
262
264
test_mm256_shuffle_epi8 ( ) ;
263
265
test_mm256_permute2x128_si256 ( ) ;
264
266
test_mm256_permutevar8x32_epi32 ( ) ;
@@ -430,6 +432,29 @@ unsafe fn test_mm_shuffle_epi8() {
430
432
assert_eq_m128i ( r, expected) ;
431
433
}
432
434
435
+ // Currently one cannot `load` a &[u8] that is less than 16
436
+ // in length. This makes loading strings less than 16 in length
437
+ // a bit difficult. Rather than `load` and mutate the __m128i,
438
+ // it is easier to memcpy the given string to a local slice with
439
+ // length 16 and `load` the local slice.
440
+ #[ cfg( target_arch = "x86_64" ) ]
441
+ #[ target_feature( enable = "sse4.2" ) ]
442
+ unsafe fn str_to_m128i ( s : & [ u8 ] ) -> __m128i {
443
+ assert ! ( s. len( ) <= 16 ) ;
444
+ let slice = & mut [ 0u8 ; 16 ] ;
445
+ std:: ptr:: copy_nonoverlapping ( s. as_ptr ( ) , slice. as_mut_ptr ( ) , s. len ( ) ) ;
446
+ _mm_loadu_si128 ( slice. as_ptr ( ) as * const _ )
447
+ }
448
+
449
+ #[ cfg( target_arch = "x86_64" ) ]
450
+ #[ simd_test( enable = "sse4.2" ) ]
451
+ unsafe fn test_mm_cmpestri ( ) {
452
+ let a = str_to_m128i ( b"bar - garbage" ) ;
453
+ let b = str_to_m128i ( b"foobar" ) ;
454
+ let i = _mm_cmpestri :: < _SIDD_CMP_EQUAL_ORDERED > ( a, 3 , b, 6 ) ;
455
+ assert_eq ! ( 3 , i) ;
456
+ }
457
+
433
458
#[ cfg( target_arch = "x86_64" ) ]
434
459
#[ target_feature( enable = "avx2" ) ]
435
460
unsafe fn test_mm256_shuffle_epi8 ( ) {
0 commit comments