Skip to content

Commit 5ce1c04

Browse files
Sergei LarinSergeiYLarin
authored andcommitted
[Hexagon] Add an option to use fast FP to int convert for some HVX cases
Lowering several flavors of fptosi for HVX can be done faster, but violates c/c++ convention on some arch tags. Nevertheless customers are using direct intrinsics with "incorrect" rounding mode and want compiler to do the same. Default behavior is not changed. Patch By: Fateme Hosseini Co-authored-by: Sergei Larin <[email protected]>
1 parent 3694798 commit 5ce1c04

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
3131
cl::Hidden, cl::init(16),
3232
cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
3333

34+
static cl::opt<bool>
35+
EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false),
36+
cl::desc("Enable FP fast conversion routine."));
37+
3438
static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
3539
static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
3640
static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
@@ -2970,6 +2974,32 @@ HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
29702974
MVT ResTy = ty(Op);
29712975
assert(InpTy.changeTypeToInteger() == ResTy);
29722976

2977+
// At this point this is an experiment under a flag.
2978+
// In arch before V81 the rounding mode is towards nearest value.
2979+
// The C/C++ standard requires rounding towards zero:
2980+
// C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
2981+
// finite value of real floating type is converted to an integer type, the
2982+
// fractional part is discarded (i.e., the value is truncated toward zero)."
2983+
// C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
2984+
// floating-point type can be converted to a prvalue of an integer type. The
2985+
// conversion truncates; that is, the fractional part is discarded."
2986+
if (InpTy == MVT::v64f16) {
2987+
if (Subtarget.useHVXV81Ops()) {
2988+
// This is c/c++ compliant
2989+
SDValue ConvVec =
2990+
getInstr(Hexagon::V6_vconv_h_hf_rnd, dl, ResTy, {Op0}, DAG);
2991+
return ConvVec;
2992+
} else if (EnableFpFastConvert) {
2993+
// Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
2994+
SDValue ConvVec = getInstr(Hexagon::V6_vconv_h_hf, dl, ResTy, {Op0}, DAG);
2995+
return ConvVec;
2996+
}
2997+
} else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
2998+
// Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
2999+
SDValue ConvVec = getInstr(Hexagon::V6_vconv_w_sf, dl, ResTy, {Op0}, DAG);
3000+
return ConvVec;
3001+
}
3002+
29733003
// int32_t conv_f32_to_i32(uint32_t inp) {
29743004
// // s | exp8 | frac23
29753005
//
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=hexagon -mv73 -mhvx -mattr=+hvx-length128b -hexagon-hvx-widen=32 -hexagon-fp-fast-convert=true < %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
5+
target triple = "hexagon"
6+
7+
; f16 -> s16
8+
; No widening
9+
define void @f16s16_0(ptr %a0, ptr %a1) {
10+
; CHECK-LABEL: f16s16_0:
11+
; CHECK: {
12+
; CHECK-NEXT: v1.h = v0.hf
13+
; CHECK-NEXT: jumpr r31
14+
; CHECK-NEXT: v0.cur = vmem(r0+#0)
15+
; CHECK-NEXT: vmem(r1+#0) = v1.new
16+
; CHECK-NEXT: }
17+
%v0 = load <64 x half>, ptr %a0, align 128
18+
%v1 = fptosi <64 x half> %v0 to <64 x i16>
19+
store <64 x i16> %v1, ptr %a1, align 128
20+
ret void
21+
}
22+
23+
; Widen result #2
24+
define void @f32s8_2(ptr %a0, ptr %a1) #0 {
25+
; CHECK-LABEL: f32s8_2:
26+
; CHECK: v{{.*}}.w = v{{.*}}.sf
27+
%v0 = load <32 x float>, ptr %a0, align 128
28+
%v1 = fptosi <32 x float> %v0 to <32 x i8>
29+
store <32 x i8> %v1, ptr %a1, align 128
30+
ret void
31+
}

0 commit comments

Comments
 (0)