Skip to content

Commit 1141c40

Browse files
committed
Finished addressing review comments
1 parent c1b4300 commit 1141c40

File tree

3 files changed

+31
-33
lines changed

3 files changed

+31
-33
lines changed

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3526,9 +3526,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
35263526

35273527
QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
35283528
PrimType DstElemT = *S.getContext().classify(DstElemQT);
3529-
bool DstIsUnsigned = DstElemQT->isUnsignedIntegerOrEnumerationType();
35303529

3531-
for (unsigned I = 0; I < SrcNumElems; ++I) {
3530+
for (unsigned I = 0; I != SrcNumElems; ++I) {
35323531
Floating SrcVal = Src.elem<Floating>(I);
35333532
APFloat DstVal = SrcVal.getAPFloat();
35343533

@@ -3542,7 +3541,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
35423541
}
35433542

35443543
INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
3545-
// FIX: Extract the integer value before calling 'from'.
3544+
// Convert the destination value's bit pattern to an unsigned integer,
3545+
// then reconstruct the element using the target type's 'from' method.
35463546
uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
35473547
Dst.elem<T>(I) = T::from(RawBits);
35483548
});
@@ -3551,7 +3551,7 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
35513551
// Zero out remaining elements if the destination has more elements
35523552
// (e.g., vcvtps2ph converting 4 floats to 8 shorts).
35533553
if (DstNumElems > SrcNumElems) {
3554-
for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
3554+
for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
35553555
INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
35563556
}
35573557
}

clang/lib/AST/ExprConstant.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13186,8 +13186,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1318613186
if (!EvaluateInteger(E->getArg(1), Imm, Info))
1318713187
return false;
1318813188

13189-
assert(SrcVec.isVector());
13190-
1319113189
const auto *SrcVTy = E->getArg(0)->getType()->castAs<VectorType>();
1319213190
unsigned SrcNumElems = SrcVTy->getNumElements();
1319313191
const auto *DstVTy = E->getType()->castAs<VectorType>();

clang/test/CodeGen/X86/f16c-builtins.c

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -46,71 +46,65 @@ __m128 test_mm_cvtph_ps(__m128i a) {
4646
return _mm_cvtph_ps(a);
4747
}
4848

49-
__m256 test_mm256_cvtph_ps(__m128i a) {
50-
// CHECK-LABEL: test_mm256_cvtph_ps
51-
// CHECK: fpext <8 x half> %{{.*}} to <8 x float>
52-
return _mm256_cvtph_ps(a);
53-
}
54-
TEST_CONSTEXPR(match_m256(
55-
_mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)),
56-
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
57-
));
58-
5949
__m128i test_mm_cvtps_ph(__m128 a) {
6050
// CHECK-LABEL: test_mm_cvtps_ph
6151
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
6252
return _mm_cvtps_ph(a, 0);
6353
}
6454

65-
__m128i test_mm256_cvtps_ph(__m256 a) {
66-
// CHECK-LABEL: test_mm256_cvtps_ph
67-
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
68-
return _mm256_cvtps_ph(a, 0);
69-
}
70-
7155
// A value exactly halfway between 1.0 and the next representable FP16 number.
7256
// In binary, its significand ends in ...000, followed by a tie-bit 1.
7357
#define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
7458

7559
//
76-
// __builtin_ia32_vcvtps2ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
60+
// _mm_cvtps_ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
7761
//
7862
// Test values: -2.5f, 1.123f, POS_HALFWAY
7963
TEST_CONSTEXPR(match_v8hi(
80-
__builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
64+
_mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
8165
0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0
8266
));
8367
TEST_CONSTEXPR(match_v8hi(
84-
__builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
68+
_mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
8569
0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
8670
));
8771
TEST_CONSTEXPR(match_v8hi(
88-
__builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
72+
_mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
8973
0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0
9074
));
9175
TEST_CONSTEXPR(match_v8hi(
92-
__builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
76+
_mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
9377
0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
9478
));
9579

80+
__m256 test_mm256_cvtph_ps(__m128i a) {
81+
// CHECK-LABEL: test_mm256_cvtph_ps
82+
// CHECK: fpext <8 x half> %{{.*}} to <8 x float>
83+
return _mm256_cvtph_ps(a);
84+
}
85+
TEST_CONSTEXPR(match_m256(
86+
_mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)),
87+
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
88+
));
89+
9690
//
97-
// __builtin_ia32_vcvtps2ph256 (256-bit, 8 floats -> 8 shorts)
91+
// _mm256_cvtps_ph (256-bit, 8 floats -> 8 shorts)
9892
//
9993
// Test values: -2.5f, 1.123f, POS_HALFWAY
10094
TEST_CONSTEXPR(match_v8hi(
101-
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
95+
_mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
10296
0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000
10397
));
10498
TEST_CONSTEXPR(match_v8hi(
105-
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
99+
_mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
106100
0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
107101
));
108102
TEST_CONSTEXPR(match_v8hi(
109-
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
103+
_mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
110104
0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000
111105
));
112106
TEST_CONSTEXPR(match_v8hi(
113-
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
107+
_mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
114108
0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
115109
));
116110

@@ -123,4 +117,10 @@ TEST_CONSTEXPR(match_v8hi(
123117
TEST_CONSTEXPR(match_v8hi(
124118
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, -2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION),
125119
0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000
126-
));
120+
));
121+
122+
__m128i test_mm256_cvtps_ph(__m256 a) {
123+
// CHECK-LABEL: test_mm256_cvtps_ph
124+
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
125+
return _mm256_cvtps_ph(a, 0);
126+
}

0 commit comments

Comments
 (0)