Skip to content

Commit 36b320c

Browse files
committed
add new predicate
1 parent b1783fe commit 36b320c

File tree

4 files changed

+49
-48
lines changed

4 files changed

+49
-48
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3363,9 +3363,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
33633363
// Already computed the OpcodeOffset table, just index into it.
33643364
if (N.getOpcode() < OpcodeOffset.size())
33653365
MatcherIndex = OpcodeOffset[N.getOpcode()];
3366-
// if (N->getOpcode() == ISD::FADD) {
3367-
// MatcherIndex = 0;
3368-
// }
3366+
if (N->getOpcode() == ISD::FADD) {
3367+
MatcherIndex = 0;
3368+
}
33693369
LLVM_DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");
33703370
} else if (MatcherTable[0] == OPC_SwitchOpcode) {
33713371
// Otherwise, the table isn't computed, but the state machine does start

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -793,7 +793,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
793793
}
794794
}
795795

796-
if (N->getOpcode() == ISD::FADD && false) {
796+
if (N->getOpcode() == ISD::FADD) {
797797
llvm::dbgs() << "N->dump()\n";
798798
N->dump();
799799
MySelectCode(N);

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ void MySelectCode(SDNode *N) {
290290
#define TARGET_VAL(X) X & 255, unsigned(X) >> 8
291291
#define COVERAGE_IDX_VAL(X) X & 255, (unsigned(X) >> 8) & 255, (unsigned(X) >> 16) & 255, (unsigned(X) >> 24) & 255
292292
static const unsigned char MatcherTable[] = {
293-
// /*528829*/ /*SwitchOpcode*/ 118|128,4/*630*/, TARGET_VAL(ISD::FADD),// ->529463
293+
// /*528829*/ /*SwitchOpcode*/ 120|128,4/*632*/, TARGET_VAL(ISD::FADD),// ->529465
294294
/*528833*/ OPC_Scope, 76, /*->528911*/ // 4 children in Scope
295295
/*528835*/ OPC_MoveChild0,
296296
/*528836*/ OPC_CheckOpcode, TARGET_VAL(ISD::INTRINSIC_WO_CHAIN),
@@ -393,7 +393,7 @@ void MySelectCode(SDNode *N) {
393393
// Src: (fadd:{ *:[f32] } (AMDGPUfmul_legacy_impl:{ *:[f32] } (VOP3Mods:{ *:[f32] } f32:{ *:[f32] }:$src0, i32:{ *:[i32] }:$src0_mod), (VOP3Mods:{ *:[f32] } f32:{ *:[f32] }:$src1, i32:{ *:[i32] }:$src1_mod)), (VOP3Mods:{ *:[f32] } f32:{ *:[f32] }:$src2, i32:{ *:[i32] }:$src2_mod)) - Complexity = 33
394394
// Dst: (V_MAD_LEGACY_F32_e64:{ *:[f32] } ?:{ *:[i32] }:$src0_mod, ?:{ *:[f32] }:$src0, ?:{ *:[i32] }:$src1_mod, ?:{ *:[f32] }:$src1, ?:{ *:[i32] }:$src2_mod, ?:{ *:[f32] }:$src2, 0:{ *:[i1] }, 0:{ *:[i32] })
395395
/*529063*/ 0, /*End of Scope*/
396-
/*529064*/ /*Scope*/ 12|128,3/*396*/, /*->529462*/
396+
/*529064*/ /*Scope*/ 14|128,3/*398*/, /*->529464*/
397397
/*529066*/ OPC_RecordChild0, // #0 = $VOP3NoMods:src2
398398
/*529067*/ OPC_Scope, 74, /*->529143*/ // 2 children in Scope
399399
/*529069*/ OPC_MoveChild1,
@@ -428,7 +428,7 @@ void MySelectCode(SDNode *N) {
428428
// Src: (fadd:{ *:[f32] } (VOP3Mods:{ *:[f32] } f32:{ *:[f32] }:$src2, i32:{ *:[i32] }:$src2_mod), (AMDGPUfmul_legacy_impl:{ *:[f32] } (VOP3Mods:{ *:[f32] } f32:{ *:[f32] }:$src0, i32:{ *:[i32] }:$src0_mod), (VOP3Mods:{ *:[f32] } f32:{ *:[f32] }:$src1, i32:{ *:[i32] }:$src1_mod))) - Complexity = 33
429429
// Dst: (V_MAD_LEGACY_F32_e64:{ *:[f32] } ?:{ *:[i32] }:$src0_mod, ?:{ *:[f32] }:$src0, ?:{ *:[i32] }:$src1_mod, ?:{ *:[f32] }:$src1, ?:{ *:[i32] }:$src2_mod, ?:{ *:[f32] }:$src2, 0:{ *:[i1] }, 0:{ *:[i32] })
430430
/*529142*/ 0, /*End of Scope*/
431-
/*529143*/ /*Scope*/ 60|128,2/*316*/, /*->529461*/
431+
/*529143*/ /*Scope*/ 62|128,2/*318*/, /*->529463*/
432432
/*529145*/ OPC_RecordChild1, // #1 = $src1
433433
/*529146*/ OPC_Scope, 25, /*->529173*/ // 6 children in Scope
434434
/*529148*/ OPC_CheckPredicate3, // Predicate_anonymous_13768
@@ -543,36 +543,37 @@ void MySelectCode(SDNode *N) {
543543
// Src: (fadd:{ *:[f64] } (VOP3Mods:{ *:[f64] } f64:{ *:[f64] }:$src1, i32:{ *:[i32] }:$src1_modifiers), (VOP3Mods0:{ *:[f64] } f64:{ *:[f64] }:$src0, i32:{ *:[i32] }:$src0_modifiers, i1:{ *:[i1] }:$clamp, i32:{ *:[i32] }:$omod)) - Complexity = -973
544544
// Dst: (V_ADD_F64_e64:{ *:[f64] } i32:{ *:[i32] }:$src0_modifiers, f64:{ *:[f64] }:$src0, i32:{ *:[i32] }:$src1_modifiers, f64:{ *:[f64] }:$src1, i1:{ *:[i1] }:$clamp, i32:{ *:[i32] }:$omod)
545545
/*529389*/ 0, /*End of Scope*/
546-
/*529390*/ /*Scope*/ 33, /*->529424*/
547-
/*529391*/ OPC_CheckType, /*MVT::v2f16*/89,
548-
/*529393*/ OPC_CheckComplexPat, /*CP*/13, /*#*/0, // SelectVOP3PMods:$ #2 #3
549-
/*529396*/ OPC_CheckComplexPat, /*CP*/13, /*#*/1, // SelectVOP3PMods:$ #4 #5
550-
/*529399*/ OPC_EmitInteger, /*MVT::i1*/2, 0, // 0 #6
551-
/*529402*/ OPC_EmitInteger32, 0, // 0 #7
552-
/*529404*/ OPC_EmitInteger32, 0, // 0 #8
553-
/*529406*/ OPC_EmitInteger32, 0, // 0 #9
554-
/*529408*/ OPC_EmitInteger32, 0, // 0 #10
555-
/*529410*/ OPC_MorphNodeTo1None, TARGET_VAL(AMDGPU::V_PK_ADD_F16),
546+
/*529390*/ /*Scope*/ 37, /*->529428*/
547+
/*529391*/ OPC_CheckType, /*MVT::v2f32*/109,
548+
/*529393*/ OPC_CheckPredicate, 108, // Predicate_my_any_fadd
549+
/*529395*/ OPC_CheckPatternPredicate, 104, // (Subtarget->hasPackedFP32Ops())
550+
/*529397*/ OPC_CheckComplexPat, /*CP*/13, /*#*/0, // SelectVOP3PMods:$ #2 #3
551+
/*529400*/ OPC_CheckComplexPat, /*CP*/13, /*#*/1, // SelectVOP3PMods:$ #4 #5
552+
/*529403*/ OPC_EmitInteger, /*MVT::i1*/2, 0, // 0 #6
553+
/*529406*/ OPC_EmitInteger32, 0, // 0 #7
554+
/*529408*/ OPC_EmitInteger32, 0, // 0 #8
555+
/*529410*/ OPC_EmitInteger32, 0, // 0 #9
556+
/*529412*/ OPC_EmitInteger32, 0, // 0 #10
557+
/*529414*/ OPC_MorphNodeTo1None, TARGET_VAL(AMDGPU::V_PK_ADD_F32),
558+
/*MVT::v2f32*/109, 9/*#Ops*/, 3, 2, 5, 4, 6, 7, 8, 9, 10,
559+
// Src: (fadd:{ *:[v2f32] } (VOP3PMods:{ *:[v2f32] } v2f32:{ *:[v2f32] }:$src0, i32:{ *:[i32] }:$src0_modifiers), (VOP3PMods:{ *:[v2f32] } v2f32:{ *:[v2f32] }:$src1, i32:{ *:[i32] }:$src1_modifiers))<<P:Predicate_my_any_fadd>> - Complexity = -978
560+
// Dst: (V_PK_ADD_F32:{ *:[v2f32] } i32:{ *:[i32] }:$src0_modifiers, v2f32:{ *:[v2f32] }:$src0, i32:{ *:[i32] }:$src1_modifiers, v2f32:{ *:[v2f32] }:$src1)
561+
/*529428*/ /*Scope*/ 33, /*->529462*/
562+
/*529429*/ OPC_CheckType, /*MVT::v2f16*/89,
563+
/*529431*/ OPC_CheckComplexPat, /*CP*/13, /*#*/0, // SelectVOP3PMods:$ #2 #3
564+
/*529434*/ OPC_CheckComplexPat, /*CP*/13, /*#*/1, // SelectVOP3PMods:$ #4 #5
565+
/*529437*/ OPC_EmitInteger, /*MVT::i1*/2, 0, // 0 #6
566+
/*529440*/ OPC_EmitInteger32, 0, // 0 #7
567+
/*529442*/ OPC_EmitInteger32, 0, // 0 #8
568+
/*529444*/ OPC_EmitInteger32, 0, // 0 #9
569+
/*529446*/ OPC_EmitInteger32, 0, // 0 #10
570+
/*529448*/ OPC_MorphNodeTo1None, TARGET_VAL(AMDGPU::V_PK_ADD_F16),
556571
/*MVT::v2f16*/89, 9/*#Ops*/, 3, 2, 5, 4, 6, 7, 8, 9, 10,
557572
// Src: (fadd:{ *:[v2f16] } (VOP3PMods:{ *:[v2f16] } v2f16:{ *:[v2f16] }:$src0, i32:{ *:[i32] }:$src0_modifiers), (VOP3PMods:{ *:[v2f16] } v2f16:{ *:[v2f16] }:$src1, i32:{ *:[i32] }:$src1_modifiers)) - Complexity = -979
558573
// Dst: (V_PK_ADD_F16:{ *:[v2f16] } i32:{ *:[i32] }:$src0_modifiers, v2f16:{ *:[v2f16] }:$src0, i32:{ *:[i32] }:$src1_modifiers, v2f16:{ *:[v2f16] }:$src1)
559-
/*529424*/ /*Scope*/ 35, /*->529460*/
560-
/*529425*/ OPC_CheckType, /*MVT::v2f32*/109,
561-
/*529427*/ OPC_CheckPatternPredicate, 104, // (Subtarget->hasPackedFP32Ops())
562-
/*529429*/ OPC_CheckComplexPat, /*CP*/13, /*#*/0, // SelectVOP3PMods:$ #2 #3
563-
/*529432*/ OPC_CheckComplexPat, /*CP*/13, /*#*/1, // SelectVOP3PMods:$ #4 #5
564-
/*529435*/ OPC_EmitInteger, /*MVT::i1*/2, 0, // 0 #6
565-
/*529438*/ OPC_EmitInteger32, 0, // 0 #7
566-
/*529440*/ OPC_EmitInteger32, 0, // 0 #8
567-
/*529442*/ OPC_EmitInteger32, 0, // 0 #9
568-
/*529444*/ OPC_EmitInteger32, 0, // 0 #10
569-
/*529446*/ OPC_MorphNodeTo1None, TARGET_VAL(AMDGPU::V_PK_ADD_F32),
570-
/*MVT::v2f32*/109, 9/*#Ops*/, 3, 2, 5, 4, 6, 7, 8, 9, 10,
571-
// Src: (fadd:{ *:[v2f32] } (VOP3PMods:{ *:[v2f32] } v2f32:{ *:[v2f32] }:$src0, i32:{ *:[i32] }:$src0_modifiers), (VOP3PMods:{ *:[v2f32] } v2f32:{ *:[v2f32] }:$src1, i32:{ *:[i32] }:$src1_modifiers)) - Complexity = -979
572-
// Dst: (V_PK_ADD_F32:{ *:[v2f32] } i32:{ *:[i32] }:$src0_modifiers, v2f32:{ *:[v2f32] }:$src0, i32:{ *:[i32] }:$src1_modifiers, v2f32:{ *:[v2f32] }:$src1)
573-
/*529460*/ 0, /*End of Scope*/
574-
/*529461*/ 0, /*End of Scope*/
575-
/*529462*/ 0, /*End of Scope*/
574+
/*529462*/ 0, /*End of Scope*/
575+
/*529463*/ 0, /*End of Scope*/
576+
/*529464*/ 0, /*End of Scope*/
576577
}; // Total Array size is 563301 bytes
577578

578579
SelectCodeCommon(N, MatcherTable, sizeof(MatcherTable));

llvm/lib/Target/AMDGPU/test_v_pk.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,18 @@ define amdgpu_kernel void @add_kernel(ptr addrspace(1) nocapture readonly %0, pt
2222
%18 = load float, ptr %17, align 16
2323
%19 = getelementptr inbounds i8, ptr %17, i64 4
2424
%20 = load float, ptr %19, align 4
25-
25+
2626
%v_100 = insertelement <2 x float> undef, float %18, i32 0
2727
%v_102 = insertelement <2 x float> %v_100, float %20, i32 1
2828

2929
%21 = getelementptr inbounds i8, ptr %17, i64 8
3030
%22 = load float, ptr %21, align 8
3131
%23 = getelementptr inbounds i8, ptr %17, i64 12
3232
%24 = load float, ptr %23, align 4
33-
33+
3434
%v_200 = insertelement <2 x float> undef, float %22, i32 0
35-
%v_202 = insertelement <2 x float> %v_200, float %24, i32 1
36-
35+
%v_202 = insertelement <2 x float> %v_200, float %24, i32 1
36+
3737
%25 = getelementptr float, ptr addrspace(1) %1, i64 %15
3838
%26 = addrspacecast ptr addrspace(1) %25 to ptr
3939
%27 = sext i32 %12 to i64
@@ -47,33 +47,33 @@ define amdgpu_kernel void @add_kernel(ptr addrspace(1) nocapture readonly %0, pt
4747

4848
%36 = getelementptr inbounds i8, ptr %26, i64 8
4949
%37 = load float, ptr %36, align 8
50-
50+
5151
%v_300 = insertelement <2 x float> undef, float %34, i32 0
52-
%v_302 = insertelement <2 x float> %v_300, float %37, i32 1
53-
52+
%v_302 = insertelement <2 x float> %v_300, float %37, i32 1
53+
5454
%39 = getelementptr inbounds i8, ptr %26, i64 4
5555
%40 = load float, ptr %39, align 4
5656
%42 = load float, ptr %26, align 16
5757

5858
%v_400 = insertelement <2 x float> undef, float %40, i32 0
59-
%v_402 = insertelement <2 x float> %v_400, float %42, i32 1
59+
%v_402 = insertelement <2 x float> %v_400, float %42, i32 1
6060

6161
%v_500 = fadd <2 x float> %v_102, %v_402
62-
%v_501 = fadd <2 x float> %v_202, %v_302
62+
; %v_501 = fadd <2 x float> %v_202, %v_302
6363
; tail call void @llvm.amdgcn.iglp.opt(i32 4)
6464

65-
%v_45 = extractelement <2 x float> %v_501, i32 1
66-
%v_32 = extractelement <2 x float> %v_501, i32 0
65+
; %v_45 = extractelement <2 x float> %v_501, i32 1
66+
; %v_32 = extractelement <2 x float> %v_501, i32 0
6767
%v_30 = extractelement <2 x float> %v_500, i32 1
6868
%v_28 = extractelement <2 x float> %v_500, i32 0
6969

7070
%i_44 = sext i32 %10 to i64
7171
%p_45 = getelementptr float, ptr addrspace(1) %2, i64 %i_44
7272
store float %v_28, ptr addrspace(1) %p_45, align 4
7373

74-
%i_31 = sext i32 %14 to i64
75-
%p_32 = getelementptr float, ptr addrspace(1) %2, i64 %i_31
76-
store float %v_32, ptr addrspace(1) %p_32, align 4
74+
; %i_31 = sext i32 %14 to i64
75+
; %p_32 = getelementptr float, ptr addrspace(1) %2, i64 %i_31
76+
; store float %v_32, ptr addrspace(1) %p_32, align 4
7777

7878
%i_29 = sext i32 %13 to i64
7979
%p_30 = getelementptr float, ptr addrspace(1) %2, i64 %i_29

0 commit comments

Comments
 (0)