Skip to content

Commit bad6e80

Browse files
committed
Finished addressing review comments
1 parent 5417390 commit bad6e80

File tree

3 files changed

+31
-33
lines changed

3 files changed

+31
-33
lines changed

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3053,9 +3053,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
30533053

30543054
QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
30553055
PrimType DstElemT = *S.getContext().classify(DstElemQT);
3056-
bool DstIsUnsigned = DstElemQT->isUnsignedIntegerOrEnumerationType();
30573056

3058-
for (unsigned I = 0; I < SrcNumElems; ++I) {
3057+
for (unsigned I = 0; I != SrcNumElems; ++I) {
30593058
Floating SrcVal = Src.elem<Floating>(I);
30603059
APFloat DstVal = SrcVal.getAPFloat();
30613060

@@ -3069,7 +3068,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
30693068
}
30703069

30713070
INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
3072-
// FIX: Extract the integer value before calling 'from'.
3071+
// Convert the destination value's bit pattern to an unsigned integer,
3072+
// then reconstruct the element using the target type's 'from' method.
30733073
uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
30743074
Dst.elem<T>(I) = T::from(RawBits);
30753075
});
@@ -3078,7 +3078,7 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
30783078
// Zero out remaining elements if the destination has more elements
30793079
// (e.g., vcvtps2ph converting 4 floats to 8 shorts).
30803080
if (DstNumElems > SrcNumElems) {
3081-
for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
3081+
for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
30823082
INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
30833083
}
30843084
}

clang/lib/AST/ExprConstant.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12453,8 +12453,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1245312453
if (!EvaluateInteger(E->getArg(1), Imm, Info))
1245412454
return false;
1245512455

12456-
assert(SrcVec.isVector());
12457-
1245812456
const auto *SrcVTy = E->getArg(0)->getType()->castAs<VectorType>();
1245912457
unsigned SrcNumElems = SrcVTy->getNumElements();
1246012458
const auto *DstVTy = E->getType()->castAs<VectorType>();

clang/test/CodeGen/X86/f16c-builtins.c

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -46,71 +46,65 @@ __m128 test_mm_cvtph_ps(__m128i a) {
4646
return _mm_cvtph_ps(a);
4747
}
4848

49-
__m256 test_mm256_cvtph_ps(__m128i a) {
50-
// CHECK-LABEL: test_mm256_cvtph_ps
51-
// CHECK: fpext <8 x half> %{{.*}} to <8 x float>
52-
return _mm256_cvtph_ps(a);
53-
}
54-
TEST_CONSTEXPR(match_m256(
55-
_mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)),
56-
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
57-
));
58-
5949
__m128i test_mm_cvtps_ph(__m128 a) {
6050
// CHECK-LABEL: test_mm_cvtps_ph
6151
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
6252
return _mm_cvtps_ph(a, 0);
6353
}
6454

65-
__m128i test_mm256_cvtps_ph(__m256 a) {
66-
// CHECK-LABEL: test_mm256_cvtps_ph
67-
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
68-
return _mm256_cvtps_ph(a, 0);
69-
}
70-
7155
// A value exactly halfway between 1.0 and the next representable FP16 number.
7256
// In binary, its significand ends in ...000, followed by a tie-bit 1.
7357
#define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
7458

7559
//
76-
// __builtin_ia32_vcvtps2ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
60+
// _mm_cvtps_ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
7761
//
7862
// Test values: -2.5f, 1.123f, POS_HALFWAY
7963
TEST_CONSTEXPR(match_v8hi(
80-
__builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
64+
_mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
8165
0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0
8266
));
8367
TEST_CONSTEXPR(match_v8hi(
84-
__builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
68+
_mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
8569
0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
8670
));
8771
TEST_CONSTEXPR(match_v8hi(
88-
__builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
72+
_mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
8973
0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0
9074
));
9175
TEST_CONSTEXPR(match_v8hi(
92-
__builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
76+
_mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
9377
0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
9478
));
9579

80+
__m256 test_mm256_cvtph_ps(__m128i a) {
81+
// CHECK-LABEL: test_mm256_cvtph_ps
82+
// CHECK: fpext <8 x half> %{{.*}} to <8 x float>
83+
return _mm256_cvtph_ps(a);
84+
}
85+
TEST_CONSTEXPR(match_m256(
86+
_mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)),
87+
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
88+
));
89+
9690
//
97-
// __builtin_ia32_vcvtps2ph256 (256-bit, 8 floats -> 8 shorts)
91+
// _mm256_cvtps_ph (256-bit, 8 floats -> 8 shorts)
9892
//
9993
// Test values: -2.5f, 1.123f, POS_HALFWAY
10094
TEST_CONSTEXPR(match_v8hi(
101-
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
95+
_mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
10296
0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000
10397
));
10498
TEST_CONSTEXPR(match_v8hi(
105-
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
99+
_mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
106100
0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
107101
));
108102
TEST_CONSTEXPR(match_v8hi(
109-
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
103+
_mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
110104
0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000
111105
));
112106
TEST_CONSTEXPR(match_v8hi(
113-
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
107+
_mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
114108
0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
115109
));
116110

@@ -123,4 +117,10 @@ TEST_CONSTEXPR(match_v8hi(
123117
TEST_CONSTEXPR(match_v8hi(
124118
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, -2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION),
125119
0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000
126-
));
120+
));
121+
122+
__m128i test_mm256_cvtps_ph(__m256 a) {
123+
// CHECK-LABEL: test_mm256_cvtps_ph
124+
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
125+
return _mm256_cvtps_ph(a, 0);
126+
}

0 commit comments

Comments
 (0)