|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc < %s -mcpu=sm_100 -mattr=+ptx88 -O3 -disable-post-ra \ |
| 3 | +; RUN: -frame-pointer=all -verify-machineinstrs \ |
| 4 | +; RUN: | FileCheck %s --check-prefixes=CHECK-F32X2 |
| 5 | +; RUN: %if ptxas-12.7 %{ \ |
| 6 | +; RUN: llc < %s -mcpu=sm_100 -mattr=+ptx88 -O3 -disable-post-ra \ |
| 7 | +; RUN: -frame-pointer=all -verify-machineinstrs | %ptxas-verify -arch=sm_100 \ |
| 8 | +; RUN: %} |
| 9 | +target triple = "nvptx64-nvidia-cuda" |
| 10 | + |
| 11 | +; Since fdiv doesn't support f32x2, this will create BUILD_VECTORs that will be |
| 12 | +; folded into the store, turning it into st.global.v8.b32. |
| 13 | +define void @writevec(<8 x float> %v1, <8 x float> %v2, ptr addrspace(1) %p) { |
| 14 | +; CHECK-F32X2-LABEL: writevec( |
| 15 | +; CHECK-F32X2: { |
| 16 | +; CHECK-F32X2-NEXT: .reg .b32 %r<25>; |
| 17 | +; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| 18 | +; CHECK-F32X2-EMPTY: |
| 19 | +; CHECK-F32X2-NEXT: // %bb.0: |
| 20 | +; CHECK-F32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [writevec_param_0]; |
| 21 | +; CHECK-F32X2-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [writevec_param_0+16]; |
| 22 | +; CHECK-F32X2-NEXT: ld.param.v4.b32 {%r9, %r10, %r11, %r12}, [writevec_param_1+16]; |
| 23 | +; CHECK-F32X2-NEXT: div.rn.f32 %r13, %r8, %r12; |
| 24 | +; CHECK-F32X2-NEXT: div.rn.f32 %r14, %r7, %r11; |
| 25 | +; CHECK-F32X2-NEXT: div.rn.f32 %r15, %r6, %r10; |
| 26 | +; CHECK-F32X2-NEXT: div.rn.f32 %r16, %r5, %r9; |
| 27 | +; CHECK-F32X2-NEXT: ld.param.v4.b32 {%r17, %r18, %r19, %r20}, [writevec_param_1]; |
| 28 | +; CHECK-F32X2-NEXT: div.rn.f32 %r21, %r4, %r20; |
| 29 | +; CHECK-F32X2-NEXT: div.rn.f32 %r22, %r3, %r19; |
| 30 | +; CHECK-F32X2-NEXT: div.rn.f32 %r23, %r2, %r18; |
| 31 | +; CHECK-F32X2-NEXT: div.rn.f32 %r24, %r1, %r17; |
| 32 | +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [writevec_param_2]; |
| 33 | +; CHECK-F32X2-NEXT: st.global.v8.b32 [%rd1], {%r24, %r23, %r22, %r21, %r16, %r15, %r14, %r13}; |
| 34 | +; CHECK-F32X2-NEXT: ret; |
| 35 | + %v = fdiv <8 x float> %v1, %v2 |
| 36 | + store <8 x float> %v, ptr addrspace(1) %p, align 32 |
| 37 | + ret void |
| 38 | +} |
0 commit comments