@@ -46,71 +46,65 @@ __m128 test_mm_cvtph_ps(__m128i a) {
4646 return _mm_cvtph_ps (a );
4747}
4848
49- __m256 test_mm256_cvtph_ps (__m128i a ) {
50- // CHECK-LABEL: test_mm256_cvtph_ps
51- // CHECK: fpext <8 x half> %{{.*}} to <8 x float>
52- return _mm256_cvtph_ps (a );
53- }
54- TEST_CONSTEXPR (match_m256 (
55- _mm256_cvtph_ps (_mm_setr_epi16 (0x3C00 , 0x4000 , 0x4200 , 0x4400 , 0x4500 , 0x3800 , 0xC000 , 0x0000 )),
56- 1.0f , 2.0f , 3.0f , 4.0f , 5.0f , 0.5f , -2.0f , 0.0f
57- ));
58-
5949__m128i test_mm_cvtps_ph (__m128 a ) {
6050 // CHECK-LABEL: test_mm_cvtps_ph
6151 // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
6252 return _mm_cvtps_ph (a , 0 );
6353}
6454
65- __m128i test_mm256_cvtps_ph (__m256 a ) {
66- // CHECK-LABEL: test_mm256_cvtps_ph
67- // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
68- return _mm256_cvtps_ph (a , 0 );
69- }
70-
7155// A value exactly halfway between 1.0 and the next representable FP16 number.
7256// In binary, its significand ends in ...000, followed by a tie-bit 1.
7357#define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
7458
7559//
76- // __builtin_ia32_vcvtps2ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
60+ // _mm_cvtps_ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
7761//
7862// Test values: -2.5f, 1.123f, POS_HALFWAY
7963TEST_CONSTEXPR (match_v8hi (
80- __builtin_ia32_vcvtps2ph (_mm_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_NEAREST_INT ),
64+ _mm_cvtps_ph (_mm_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_NEAREST_INT ),
8165 0xC100 , 0x3C7E , 0x3C00 , 0x0000 , 0 , 0 , 0 , 0
8266));
8367TEST_CONSTEXPR (match_v8hi (
84- __builtin_ia32_vcvtps2ph (_mm_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_NEG_INF ),
68+ _mm_cvtps_ph (_mm_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_NEG_INF ),
8569 0xC100 , 0x3C7D , 0x3C00 , 0x0000 , 0 , 0 , 0 , 0
8670));
8771TEST_CONSTEXPR (match_v8hi (
88- __builtin_ia32_vcvtps2ph (_mm_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_POS_INF ),
72+ _mm_cvtps_ph (_mm_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_POS_INF ),
8973 0xC100 , 0x3C7E , 0x3C01 , 0x0000 , 0 , 0 , 0 , 0
9074));
9175TEST_CONSTEXPR (match_v8hi (
92- __builtin_ia32_vcvtps2ph (_mm_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_ZERO ),
76+ _mm_cvtps_ph (_mm_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_ZERO ),
9377 0xC100 , 0x3C7D , 0x3C00 , 0x0000 , 0 , 0 , 0 , 0
9478));
9579
80+ __m256 test_mm256_cvtph_ps (__m128i a ) {
81+ // CHECK-LABEL: test_mm256_cvtph_ps
82+ // CHECK: fpext <8 x half> %{{.*}} to <8 x float>
83+ return _mm256_cvtph_ps (a );
84+ }
85+ TEST_CONSTEXPR (match_m256 (
86+ _mm256_cvtph_ps (_mm_setr_epi16 (0x3C00 , 0x4000 , 0x4200 , 0x4400 , 0x4500 , 0x3800 , 0xC000 , 0x0000 )),
87+ 1.0f , 2.0f , 3.0f , 4.0f , 5.0f , 0.5f , -2.0f , 0.0f
88+ ));
89+
9690//
97- // __builtin_ia32_vcvtps2ph256 (256-bit, 8 floats -> 8 shorts)
91+ // _mm256_cvtps_ph (256-bit, 8 floats -> 8 shorts)
9892//
9993// Test values: -2.5f, 1.123f, POS_HALFWAY
10094TEST_CONSTEXPR (match_v8hi (
101- __builtin_ia32_vcvtps2ph256 (_mm256_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f , -2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_NEAREST_INT ),
95+ _mm256_cvtps_ph (_mm256_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f , -2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_NEAREST_INT ),
10296 0xC100 , 0x3C7E , 0x3C00 , 0x0000 , 0xC100 , 0x3C7E , 0x3C00 , 0x0000
10397));
10498TEST_CONSTEXPR (match_v8hi (
105- __builtin_ia32_vcvtps2ph256 (_mm256_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f , -2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_NEG_INF ),
99+ _mm256_cvtps_ph (_mm256_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f , -2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_NEG_INF ),
106100 0xC100 , 0x3C7D , 0x3C00 , 0x0000 , 0xC100 , 0x3C7D , 0x3C00 , 0x0000
107101));
108102TEST_CONSTEXPR (match_v8hi (
109- __builtin_ia32_vcvtps2ph256 (_mm256_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f , -2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_POS_INF ),
103+ _mm256_cvtps_ph (_mm256_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f , -2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_POS_INF ),
110104 0xC100 , 0x3C7E , 0x3C01 , 0x0000 , 0xC100 , 0x3C7E , 0x3C01 , 0x0000
111105));
112106TEST_CONSTEXPR (match_v8hi (
113- __builtin_ia32_vcvtps2ph256 (_mm256_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f , -2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_ZERO ),
107+ _mm256_cvtps_ph (_mm256_setr_ps (-2.5f , 1.123f , POS_HALFWAY , 0.0f , -2.5f , 1.123f , POS_HALFWAY , 0.0f ), _MM_FROUND_TO_ZERO ),
114108 0xC100 , 0x3C7D , 0x3C00 , 0x0000 , 0xC100 , 0x3C7D , 0x3C00 , 0x0000
115109));
116110
@@ -123,4 +117,10 @@ TEST_CONSTEXPR(match_v8hi(
123117TEST_CONSTEXPR (match_v8hi (
124118 __builtin_ia32_vcvtps2ph256 (_mm256_setr_ps (-2.5f , 0.125f , -16.0f , 0.0f , -2.5f , 0.125f , -16.0f , 0.0f ), _MM_FROUND_CUR_DIRECTION ),
125119 0xC100 , 0x3000 , 0xCC00 , 0x0000 , 0xC100 , 0x3000 , 0xCC00 , 0x0000
126- ));
120+ ));
121+
122+ __m128i test_mm256_cvtps_ph (__m256 a ) {
123+ // CHECK-LABEL: test_mm256_cvtps_ph
124+ // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
125+ return _mm256_cvtps_ph (a , 0 );
126+ }
0 commit comments