diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index d4f0cc94ebcf9..3daf25d551520 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -680,6 +680,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // No support for these operations with v2f32. setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Expand); + // Need custom lowering in case the index is dynamic. + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom); // Custom conversions to/from v2i8. setOperationAction(ISD::BITCAST, MVT::v2i8, Custom); diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll index b84a0ec7155e2..47b7c9a09be4a 100644 --- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll @@ -79,13 +79,24 @@ define float @test_extract_1(<2 x float> %a) #0 { ret float %e } -; NOTE: disabled as -O3 miscompiles this into pointer arithmetic on -; test_extract_i_param_0 where the symbol's address is not taken first (that -; is, moved to a temporary) -; define float @test_extract_i(<2 x float> %a, i64 %idx) #0 { -; %e = extractelement <2 x float> %a, i64 %idx -; ret float %e -; } +define float @test_extract_i(<2 x float> %a, i64 %idx) #0 { +; CHECK-LABEL: test_extract_i( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_extract_i_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_i_param_0]; +; CHECK-NEXT: setp.eq.b64 %p1, %rd2, 0; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-NEXT: selp.f32 %r3, %r1, %r2, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; + %e = extractelement <2 x float> %a, i64 %idx + ret float %e +} define <2 x float> @test_fadd(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NOF32X2-LABEL: test_fadd(