Skip to content

Commit 74b49be

Browse files
fhossein-quicjverma-quicyandalurMuntasir MallickTatiana Larina
committed
Passes for widening vector operations and shuffle opt
Introduce Hexagon-specific passes to generate widening vector instructions for integer and floating-point operations using generic LLVM intrinsics. This enables widening operations for short vectors and improves type legalization by allowing operands to be widened to appropriate types. The patch also includes a shuffle optimization pass to relocate and validate shufflevector instructions during widening legalization. Co-authored-by: Jyotsna Verma <[email protected]> Co-authored-by: Yashas Andaluri <[email protected]> Co-authored-by: Fateme Hosseini <[email protected]> Co-authored-by: Muntasir Mallick <[email protected]> Co-authored-by: Tatiana Larina <[email protected]> Co-authored-by: Kaushik Kulkarni <[email protected]>
1 parent 3694798 commit 74b49be

37 files changed

+4378
-58
lines changed

llvm/include/llvm/IR/IntrinsicsHexagon.td

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
//
1515
// All Hexagon intrinsics start with "llvm.hexagon.".
1616
let TargetPrefix = "hexagon" in {
17-
/// Hexagon_Intrinsic - Base class for the majority of Hexagon intrinsics.
17+
/// Hexagon_Intrinsic - Base class for majority of Hexagon intrinsics.
1818
class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
1919
list<LLVMType> param_types,
2020
list<IntrinsicProperty> properties>
@@ -435,6 +435,84 @@ def int_hexagon_V6_vmaskedstorenq_128B: Hexagon_custom_vms_Intrinsic_128B;
435435
def int_hexagon_V6_vmaskedstorentq_128B: Hexagon_custom_vms_Intrinsic_128B;
436436
def int_hexagon_V6_vmaskedstorentnq_128B: Hexagon_custom_vms_Intrinsic_128B;
437437

438+
// Carryo
439+
// The script can't autogenerate clang builtins for vaddcarryo/vsubarryo,
440+
// and they are marked in HexagonIset.py as not having intrinsics at all.
441+
// The script could generate intrinsics, but instead of doing intrinsics
442+
// without builtins, just put the intrinsics here.
443+
444+
// tag : V6_vaddcarryo
445+
class Hexagon_custom_v16i32v64i1_v16i32v16i32_Intrinsic<
446+
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
447+
: Hexagon_NonGCC_Intrinsic<
448+
[llvm_v16i32_ty,llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
449+
intr_properties>;
450+
451+
// tag : V6_vaddcarryo
452+
class Hexagon_custom_v32i32v128i1_v32i32v32i32_Intrinsic_128B<
453+
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
454+
: Hexagon_NonGCC_Intrinsic<
455+
[llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
456+
intr_properties>;
457+
458+
// Pseudo intrinsics for widening vector isntructions that
459+
// get replaced with the real Hexagon instructions during
460+
// instruction lowering.
461+
class Hexagon_widenvec_Intrinsic
462+
: Hexagon_NonGCC_Intrinsic<
463+
[llvm_anyvector_ty],
464+
[LLVMTruncatedType<0>, LLVMTruncatedType<0>],
465+
[IntrNoMem]>;
466+
467+
class Hexagon_non_widenvec_Intrinsic
468+
: Hexagon_NonGCC_Intrinsic<
469+
[llvm_anyvector_ty],
470+
[LLVMMatchType<0>, LLVMMatchType<0>],
471+
[IntrNoMem]>;
472+
473+
// Widening vector add
474+
def int_hexagon_vadd_su: Hexagon_widenvec_Intrinsic;
475+
def int_hexagon_vadd_uu: Hexagon_widenvec_Intrinsic;
476+
def int_hexagon_vadd_ss: Hexagon_widenvec_Intrinsic;
477+
def int_hexagon_vadd_us: Hexagon_widenvec_Intrinsic;
478+
479+
480+
// Widening vector subtract
481+
def int_hexagon_vsub_su: Hexagon_widenvec_Intrinsic;
482+
def int_hexagon_vsub_uu: Hexagon_widenvec_Intrinsic;
483+
def int_hexagon_vsub_ss: Hexagon_widenvec_Intrinsic;
484+
def int_hexagon_vsub_us: Hexagon_widenvec_Intrinsic;
485+
486+
// Widening vector multiply
487+
def int_hexagon_vmpy_su: Hexagon_widenvec_Intrinsic;
488+
def int_hexagon_vmpy_uu: Hexagon_widenvec_Intrinsic;
489+
def int_hexagon_vmpy_ss: Hexagon_widenvec_Intrinsic;
490+
def int_hexagon_vmpy_us: Hexagon_widenvec_Intrinsic;
491+
492+
def int_hexagon_vavgu: Hexagon_non_widenvec_Intrinsic;
493+
def int_hexagon_vavgs: Hexagon_non_widenvec_Intrinsic;
494+
495+
class Hexagon_vasr_Intrinsic
496+
: Hexagon_NonGCC_Intrinsic<
497+
[LLVMSubdivide2VectorType<0>],
498+
[llvm_anyvector_ty, LLVMMatchType<0>, llvm_i32_ty],
499+
[IntrNoMem]>;
500+
501+
def int_hexagon_vasrsat_su: Hexagon_vasr_Intrinsic;
502+
def int_hexagon_vasrsat_uu: Hexagon_vasr_Intrinsic;
503+
def int_hexagon_vasrsat_ss: Hexagon_vasr_Intrinsic;
504+
505+
class Hexagon_widen_vec_scalar_Intrinsic
506+
: Hexagon_NonGCC_Intrinsic<
507+
[llvm_anyvector_ty],
508+
[LLVMTruncatedType<0>, llvm_i32_ty],
509+
[IntrNoMem]>;
510+
511+
// Widening vector scalar multiply
512+
def int_hexagon_vmpy_ub_b: Hexagon_widen_vec_scalar_Intrinsic;
513+
def int_hexagon_vmpy_ub_ub: Hexagon_widen_vec_scalar_Intrinsic;
514+
def int_hexagon_vmpy_uh_uh: Hexagon_widen_vec_scalar_Intrinsic;
515+
def int_hexagon_vmpy_h_h: Hexagon_widen_vec_scalar_Intrinsic;
438516

439517
// Intrinsic for instrumentation based profiling using a custom handler. The
440518
// name of the handler is passed as the first operand to the intrinsic. The

llvm/include/llvm/IR/IntrinsicsHexagonDep.td

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -491,20 +491,6 @@ class Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B<
491491
[llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v128i1_ty],
492492
intr_properties>;
493493

494-
// tag : V6_vaddcarryo
495-
class Hexagon_custom_v16i32v64i1_v16i32v16i32_Intrinsic<
496-
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
497-
: Hexagon_NonGCC_Intrinsic<
498-
[llvm_v16i32_ty,llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
499-
intr_properties>;
500-
501-
// tag : V6_vaddcarryo
502-
class Hexagon_custom_v32i32v128i1_v32i32v32i32_Intrinsic_128B<
503-
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
504-
: Hexagon_NonGCC_Intrinsic<
505-
[llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
506-
intr_properties>;
507-
508494
// tag : V6_vaddcarrysat
509495
class Hexagon_v16i32_v16i32v16i32v64i1_Intrinsic<string GCCIntSuffix,
510496
list<IntrinsicProperty> intr_properties = [IntrNoMem]>

llvm/lib/Target/Hexagon/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ add_llvm_target(HexagonCodeGen
3737
HexagonGenMemAbsolute.cpp
3838
HexagonGenMux.cpp
3939
HexagonGenPredicate.cpp
40+
HexagonGenWideningVecFloatInstr.cpp
41+
HexagonGenWideningVecInstr.cpp
4042
HexagonHardwareLoops.cpp
4143
HexagonHazardRecognizer.cpp
4244
HexagonInstrInfo.cpp
@@ -53,6 +55,7 @@ add_llvm_target(HexagonCodeGen
5355
HexagonNewValueJump.cpp
5456
HexagonOptAddrMode.cpp
5557
HexagonOptimizeSZextends.cpp
58+
HexagonOptShuffleVector.cpp
5659
HexagonPeephole.cpp
5760
HexagonQFPOptimizer.cpp
5861
HexagonRDFOpt.cpp

llvm/lib/Target/Hexagon/Hexagon.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ FunctionPass *createHexagonGenInsert();
9292
FunctionPass *createHexagonGenMemAbsolute();
9393
FunctionPass *createHexagonGenMux();
9494
FunctionPass *createHexagonGenPredicate();
95+
FunctionPass *
96+
createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &);
97+
FunctionPass *createHexagonGenWideningVecInstr(const HexagonTargetMachine &);
9598
FunctionPass *createHexagonHardwareLoops();
9699
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
97100
CodeGenOptLevel OptLevel);
@@ -102,6 +105,7 @@ FunctionPass *createHexagonMergeActivateWeight();
102105
FunctionPass *createHexagonNewValueJump();
103106
FunctionPass *createHexagonOptAddrMode();
104107
FunctionPass *createHexagonOptimizeSZextends();
108+
FunctionPass *createHexagonOptShuffleVector(const HexagonTargetMachine &);
105109
FunctionPass *createHexagonPacketizer(bool Minimal);
106110
FunctionPass *createHexagonPeephole();
107111
FunctionPass *createHexagonRDFOpt();

llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ struct PrintRegister {
5151
};
5252

5353
[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
54-
const PrintRegister &PR);
55-
raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) {
54+
const PrintRegister &PR) {
5655
return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
5756
}
5857

0 commit comments

Comments
 (0)