Skip to content

Commit 21871bb

Browse files
authored
[RISCV] Add fractional LMUL register classes for inline assembly. (#171278)
Inline assembly uses the first type from the register class to connect to the rest of SelectionDAG. By adding fractional LMUL register classes, we can ensure that this type is the size of the types we use for fractional LMUL in the rest of SelectionDAG. This allows us to remove some of the handling we had in splitValueIntoRegisterParts/joinRegisterPartsIntoValue. This code was incorrectly handling v16i4 arguments/returns which should be any_extend to v16i8 to match type legalization. Instead we widened v16i4 -> v32i4 then bitcasted to v16i8. This merged pairs of i4 elements into an i8 element instead of keeping them as separate elements that have been extended to i8. This is an alternative to #171243. Fixes #171141.
1 parent 0bac8f1 commit 21871bb

File tree

4 files changed

+59
-27
lines changed

4 files changed

+59
-27
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24323,9 +24323,11 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2432324323
break;
2432424324
}
2432524325
} else if (Constraint == "vr") {
24326-
// Check VM first so that mask types will use that instead of VR.
24326+
// Check VM and fractional LMUL first so that those types will use that
24327+
// class instead of VR.
2432724328
for (const auto *RC :
24328-
{&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
24329+
{&RISCV::VMRegClass, &RISCV::VRMF8RegClass, &RISCV::VRMF4RegClass,
24330+
&RISCV::VRMF2RegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
2432924331
&RISCV::VRM4RegClass, &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass,
2433024332
&RISCV::VRN3M1RegClass, &RISCV::VRN4M1RegClass,
2433124333
&RISCV::VRN5M1RegClass, &RISCV::VRN6M1RegClass,
@@ -24342,16 +24344,19 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2434224344
}
2434324345
}
2434424346
} else if (Constraint == "vd") {
24345-
// Check VMNoV0 first so that mask types will use that instead of VRNoV0.
24347+
// Check VMNoV0 and fractional LMUL first so that those types will use that
24348+
// class instead of VRNoV0.
2434624349
for (const auto *RC :
24347-
{&RISCV::VMNoV0RegClass, &RISCV::VRNoV0RegClass,
24348-
&RISCV::VRM2NoV0RegClass, &RISCV::VRM4NoV0RegClass,
24349-
&RISCV::VRM8NoV0RegClass, &RISCV::VRN2M1NoV0RegClass,
24350-
&RISCV::VRN3M1NoV0RegClass, &RISCV::VRN4M1NoV0RegClass,
24351-
&RISCV::VRN5M1NoV0RegClass, &RISCV::VRN6M1NoV0RegClass,
24352-
&RISCV::VRN7M1NoV0RegClass, &RISCV::VRN8M1NoV0RegClass,
24353-
&RISCV::VRN2M2NoV0RegClass, &RISCV::VRN3M2NoV0RegClass,
24354-
&RISCV::VRN4M2NoV0RegClass, &RISCV::VRN2M4NoV0RegClass}) {
24350+
{&RISCV::VMNoV0RegClass, &RISCV::VRMF8NoV0RegClass,
24351+
&RISCV::VRMF4NoV0RegClass, &RISCV::VRMF2NoV0RegClass,
24352+
&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
24353+
&RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
24354+
&RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
24355+
&RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
24356+
&RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
24357+
&RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
24358+
&RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
24359+
&RISCV::VRN2M4NoV0RegClass}) {
2435524360
if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
2435624361
return std::make_pair(0U, RC);
2435724362

@@ -25229,12 +25234,10 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts(
2522925234
return true;
2523025235
}
2523125236

25232-
if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
25233-
PartVT.isScalableVector()) {
25234-
if (ValueVT.isFixedLengthVector()) {
25235-
ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
25236-
Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
25237-
}
25237+
if (ValueVT.isFixedLengthVector() && PartVT.isScalableVector()) {
25238+
ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
25239+
Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
25240+
2523825241
LLVMContext &Context = *DAG.getContext();
2523925242
EVT ValueEltVT = ValueVT.getVectorElementType();
2524025243
EVT PartEltVT = PartVT.getVectorElementType();
@@ -25304,17 +25307,17 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
2530425307
return Val;
2530525308
}
2530625309

25307-
if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
25308-
PartVT.isScalableVector()) {
25310+
if (ValueVT.isFixedLengthVector() && PartVT.isScalableVector()) {
2530925311
LLVMContext &Context = *DAG.getContext();
2531025312
SDValue Val = Parts[0];
2531125313
EVT ValueEltVT = ValueVT.getVectorElementType();
2531225314
EVT PartEltVT = PartVT.getVectorElementType();
25313-
unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
25314-
if (ValueVT.isFixedLengthVector())
25315-
ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
25316-
.getSizeInBits()
25317-
.getKnownMinValue();
25315+
25316+
unsigned ValueVTBitSize =
25317+
getContainerForFixedLengthVector(ValueVT.getSimpleVT())
25318+
.getSizeInBits()
25319+
.getKnownMinValue();
25320+
2531825321
unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
2531925322
if (PartVTBitSize % ValueVTBitSize == 0) {
2532025323
assert(PartVTBitSize >= ValueVTBitSize);

llvm/lib/Target/RISCV/RISCVRegisterInfo.td

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,13 @@ class VReg<list<ValueType> regTypes, dag regList, int Vlmul, int nf = 1>
766766
defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,
767767
vbool32_t, vbool64_t];
768768

769+
defvar VMF8VTs = [vint8mf8_t];
770+
771+
defvar VMF4VTs = [vint8mf4_t, vint16mf4_t, vfloat16mf4_t, vbfloat16mf4_t];
772+
773+
defvar VMF2VTs = [vint8mf2_t, vint16mf2_t, vint32mf2_t,
774+
vfloat16mf2_t, vbfloat16mf2_t, vfloat32mf2_t];
775+
769776
defvar VM1VTs = [vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
770777
vbfloat16m1_t, vfloat16m1_t, vfloat32m1_t,
771778
vfloat64m1_t, vint8mf2_t, vint8mf4_t, vint8mf8_t,
@@ -809,11 +816,21 @@ def VRM8 : VReg<VM8VTs, (add V8M8, V16M8, V24M8, V0M8), 8>;
809816

810817
def VRM8NoV0 : VReg<VM8VTs, (sub VRM8, V0M8), 8>;
811818

819+
// Fractional LMUL register classes for inline assembly.
820+
def VRMF8 : VReg<VMF8VTs, (add VR), 1>;
821+
def VRMF8NoV0 : VReg<VMF8VTs, (add VRNoV0), 1>;
822+
823+
def VRMF4 : VReg<VMF4VTs, (add VR), 1>;
824+
def VRMF4NoV0 : VReg<VMF4VTs, (add VRNoV0), 1>;
825+
826+
def VRMF2 : VReg<VMF2VTs, (add VR), 1>;
827+
def VRMF2NoV0 : VReg<VMF2VTs, (add VRNoV0), 1>;
828+
812829
def VMV0 : VReg<VMaskVTs, (add V0), 1>;
813830

814831
// The register class is added for inline assembly for vector mask types.
815832
def VM : VReg<VMaskVTs, (add VR), 1>;
816-
def VMNoV0 : VReg<VMaskVTs, (sub VR, V0), 1>;
833+
def VMNoV0 : VReg<VMaskVTs, (add VRNoV0), 1>;
817834

818835
defvar VTupM1N2VTs = [riscv_nxv8i8x2, riscv_nxv4i8x2, riscv_nxv2i8x2, riscv_nxv1i8x2];
819836
defvar VTupM1N3VTs = [riscv_nxv8i8x3, riscv_nxv4i8x3, riscv_nxv2i8x3, riscv_nxv1i8x3];
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
3+
4+
define <vscale x 16 x i4> @foo() {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
8+
; CHECK-NEXT: vmv.v.i v8, 0
9+
; CHECK-NEXT: ret
10+
entry:
11+
ret <vscale x 16 x i4> zeroinitializer
12+
}

llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ body: |
5555
; CHECK-NEXT: %pt2:vrm4 = IMPLICIT_DEF
5656
; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm2nov0 = INIT_UNDEF
5757
; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm2_1
58-
; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vr = INIT_UNDEF
58+
; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrmf8 = INIT_UNDEF
5959
; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm1_0
6060
; CHECK-NEXT: early-clobber %6:vrm4 = PseudoVRGATHER_VI_M4 %pt2, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
6161
; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0
@@ -284,7 +284,7 @@ body: |
284284
; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_1
285285
; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2nov0 = INIT_UNDEF
286286
; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_1
287-
; CHECK-NEXT: [[INIT_UNDEF2:%[0-9]+]]:vr = INIT_UNDEF
287+
; CHECK-NEXT: [[INIT_UNDEF2:%[0-9]+]]:vrmf8 = INIT_UNDEF
288288
; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[INIT_UNDEF2]], %subreg.sub_vrm1_0
289289
; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
290290
; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0

0 commit comments

Comments
 (0)