Skip to content

Commit b96fa9f

Browse files
authored
[clang][AArch64] Use .i16.f16 intrinsic formats for vcvth*_[s|u]16_f16 (#156029)
Use .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16. Avoids issues with incorrect saturation that arise when using .i32.f16 formats for the same conversions. Fixes #154343. Signed-off-by: Kajetan Puchalski <[email protected]>
1 parent 0fa7733 commit b96fa9f

File tree

3 files changed

+26
-39
lines changed

3 files changed

+26
-39
lines changed

clang/lib/CodeGen/TargetBuiltins/ARM.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5847,7 +5847,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
58475847
case NEON::BI__builtin_neon_vcvtph_s16_f16:
58485848
case NEON::BI__builtin_neon_vcvth_s16_f16: {
58495849
unsigned Int;
5850-
llvm::Type* InTy = Int32Ty;
5850+
llvm::Type *InTy = Int16Ty;
58515851
llvm::Type* FTy = HalfTy;
58525852
llvm::Type *Tys[2] = {InTy, FTy};
58535853
Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -5874,8 +5874,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
58745874
case NEON::BI__builtin_neon_vcvth_s16_f16:
58755875
Int = Intrinsic::aarch64_neon_fcvtzs; break;
58765876
}
5877-
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5878-
return Builder.CreateTrunc(Ops[0], Int16Ty);
5877+
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
58795878
}
58805879
case NEON::BI__builtin_neon_vcaleh_f16:
58815880
case NEON::BI__builtin_neon_vcalth_f16:

clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
105105
}
106106

107107
// COMMON-LABEL: test_vcvth_s16_f16
108-
// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
109-
// COMMONIR: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
110-
// COMMONIR: ret i16 [[TRUNC]]
108+
// COMMONIR: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
109+
// COMMONIR: ret i16 [[VCVT]]
111110
int16_t test_vcvth_s16_f16 (float16_t a) {
112111
return vcvth_s16_f16(a);
113112
}
@@ -127,9 +126,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
127126
}
128127

129128
// COMMON-LABEL: test_vcvth_u16_f16
130-
// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
131-
// COMMONIR: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
132-
// COMMONIR: ret i16 [[TRUNC]]
129+
// COMMONIR: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
130+
// COMMONIR: ret i16 [[VCVT]]
133131
uint16_t test_vcvth_u16_f16 (float16_t a) {
134132
return vcvth_u16_f16(a);
135133
}

clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
9797
}
9898

9999
// CHECK-LABEL: test_vcvth_s16_f16
100-
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
101-
// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
102-
// CHECK: ret i16 [[TRUNC]]
100+
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
101+
// CHECK: ret i16 [[VCVT]]
103102
int16_t test_vcvth_s16_f16 (float16_t a) {
104103
return vcvth_s16_f16(a);
105104
}
@@ -119,9 +118,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
119118
}
120119

121120
// CHECK-LABEL: test_vcvth_u16_f16
122-
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
123-
// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
124-
// CHECK: ret i16 [[TRUNC]]
121+
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
122+
// CHECK: ret i16 [[VCVT]]
125123
uint16_t test_vcvth_u16_f16 (float16_t a) {
126124
return vcvth_u16_f16(a);
127125
}
@@ -141,9 +139,8 @@ uint64_t test_vcvth_u64_f16 (float16_t a) {
141139
}
142140

143141
// CHECK-LABEL: test_vcvtah_s16_f16
144-
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
145-
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
146-
// CHECK: ret i16 [[RET]]
142+
// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtas.i16.f16(half %a)
143+
// CHECK: ret i16 [[FCVT]]
147144
int16_t test_vcvtah_s16_f16 (float16_t a) {
148145
return vcvtah_s16_f16(a);
149146
}
@@ -163,9 +160,8 @@ int64_t test_vcvtah_s64_f16 (float16_t a) {
163160
}
164161

165162
// CHECK-LABEL: test_vcvtah_u16_f16
166-
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
167-
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
168-
// CHECK: ret i16 [[RET]]
163+
// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtau.i16.f16(half %a)
164+
// CHECK: ret i16 [[FCVT]]
169165
uint16_t test_vcvtah_u16_f16 (float16_t a) {
170166
return vcvtah_u16_f16(a);
171167
}
@@ -185,9 +181,8 @@ uint64_t test_vcvtah_u64_f16 (float16_t a) {
185181
}
186182

187183
// CHECK-LABEL: test_vcvtmh_s16_f16
188-
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
189-
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
190-
// CHECK: ret i16 [[RET]]
184+
// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtms.i16.f16(half %a)
185+
// CHECK: ret i16 [[FCVT]]
191186
int16_t test_vcvtmh_s16_f16 (float16_t a) {
192187
return vcvtmh_s16_f16(a);
193188
}
@@ -207,9 +202,8 @@ int64_t test_vcvtmh_s64_f16 (float16_t a) {
207202
}
208203

209204
// CHECK-LABEL: test_vcvtmh_u16_f16
210-
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
211-
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
212-
// CHECK: ret i16 [[RET]]
205+
// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtmu.i16.f16(half %a)
206+
// CHECK: ret i16 [[FCVT]]
213207
uint16_t test_vcvtmh_u16_f16 (float16_t a) {
214208
return vcvtmh_u16_f16(a);
215209
}
@@ -229,9 +223,8 @@ uint64_t test_vcvtmh_u64_f16 (float16_t a) {
229223
}
230224

231225
// CHECK-LABEL: test_vcvtnh_s16_f16
232-
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
233-
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
234-
// CHECK: ret i16 [[RET]]
226+
// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtns.i16.f16(half %a)
227+
// CHECK: ret i16 [[FCVT]]
235228
int16_t test_vcvtnh_s16_f16 (float16_t a) {
236229
return vcvtnh_s16_f16(a);
237230
}
@@ -251,9 +244,8 @@ int64_t test_vcvtnh_s64_f16 (float16_t a) {
251244
}
252245

253246
// CHECK-LABEL: test_vcvtnh_u16_f16
254-
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
255-
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
256-
// CHECK: ret i16 [[RET]]
247+
// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtnu.i16.f16(half %a)
248+
// CHECK: ret i16 [[FCVT]]
257249
uint16_t test_vcvtnh_u16_f16 (float16_t a) {
258250
return vcvtnh_u16_f16(a);
259251
}
@@ -273,9 +265,8 @@ uint64_t test_vcvtnh_u64_f16 (float16_t a) {
273265
}
274266

275267
// CHECK-LABEL: test_vcvtph_s16_f16
276-
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
277-
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
278-
// CHECK: ret i16 [[RET]]
268+
// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtps.i16.f16(half %a)
269+
// CHECK: ret i16 [[FCVT]]
279270
int16_t test_vcvtph_s16_f16 (float16_t a) {
280271
return vcvtph_s16_f16(a);
281272
}
@@ -295,9 +286,8 @@ int64_t test_vcvtph_s64_f16 (float16_t a) {
295286
}
296287

297288
// CHECK-LABEL: test_vcvtph_u16_f16
298-
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
299-
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
300-
// CHECK: ret i16 [[RET]]
289+
// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtpu.i16.f16(half %a)
290+
// CHECK: ret i16 [[FCVT]]
301291
uint16_t test_vcvtph_u16_f16 (float16_t a) {
302292
return vcvtph_u16_f16(a);
303293
}

0 commit comments

Comments
 (0)