Skip to content

Commit 9a0448e

Browse files
Prince781github-actions[bot]
authored andcommitted
Automerge: [NVPTX] expand extractelt(v2f32) with dynamic index (#153078)
Addresses llvm/llvm-project#126337 (comment)
2 parents 359afe7 + 0036923 commit 9a0448e

File tree

2 files changed

+20
-7
lines changed

2 files changed

+20
-7
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
680680
// No support for these operations with v2f32.
681681
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Expand);
682682
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Expand);
683+
// Need custom lowering in case the index is dynamic.
684+
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
683685

684686
// Custom conversions to/from v2i8.
685687
setOperationAction(ISD::BITCAST, MVT::v2i8, Custom);

llvm/test/CodeGen/NVPTX/f32x2-instructions.ll

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,24 @@ define float @test_extract_1(<2 x float> %a) #0 {
7979
ret float %e
8080
}
8181

82-
; NOTE: disabled as -O3 miscompiles this into pointer arithmetic on
83-
; test_extract_i_param_0 where the symbol's address is not taken first (that
84-
; is, moved to a temporary)
85-
; define float @test_extract_i(<2 x float> %a, i64 %idx) #0 {
86-
; %e = extractelement <2 x float> %a, i64 %idx
87-
; ret float %e
88-
; }
82+
define float @test_extract_i(<2 x float> %a, i64 %idx) #0 {
83+
; CHECK-LABEL: test_extract_i(
84+
; CHECK: {
85+
; CHECK-NEXT: .reg .pred %p<2>;
86+
; CHECK-NEXT: .reg .b32 %r<4>;
87+
; CHECK-NEXT: .reg .b64 %rd<3>;
88+
; CHECK-EMPTY:
89+
; CHECK-NEXT: // %bb.0:
90+
; CHECK-NEXT: ld.param.b64 %rd2, [test_extract_i_param_1];
91+
; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_i_param_0];
92+
; CHECK-NEXT: setp.eq.b64 %p1, %rd2, 0;
93+
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1;
94+
; CHECK-NEXT: selp.f32 %r3, %r1, %r2, %p1;
95+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
96+
; CHECK-NEXT: ret;
97+
%e = extractelement <2 x float> %a, i64 %idx
98+
ret float %e
99+
}
89100

90101
define <2 x float> @test_fadd(<2 x float> %a, <2 x float> %b) #0 {
91102
; CHECK-NOF32X2-LABEL: test_fadd(

0 commit comments

Comments
 (0)