|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| 2 | +; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 -mattr=+ptx82 | FileCheck %s -check-prefixes=NOMASK |
| 3 | +; RUN: %if ptxas-sm_50 && ptxas-isa-8.2 %{ llc < %s -march=nvptx64 -mcpu=sm_50 -mattr=+ptx82 | %ptxas-verify -arch=sm_50 %} |
| 4 | +; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 -mattr=+ptx83 | FileCheck %s -check-prefixes=MASK |
| 5 | +; RUN: %if ptxas-sm_50 && ptxas-isa-8.3 %{ llc < %s -march=nvptx64 -mcpu=sm_50 -mattr=+ptx83 | %ptxas-verify -arch=sm_50 %} |
| 6 | + |
| 7 | +; On older architectures and versions, we shouldn't be seeing a used bytes mask pragma. |
| 8 | + |
| 9 | +define i32 @global_8xi32(ptr %a, ptr %b) { |
| 10 | +; NOMASK-LABEL: global_8xi32( |
| 11 | +; NOMASK: { |
| 12 | +; NOMASK-NEXT: .reg .b32 %r<5>; |
| 13 | +; NOMASK-NEXT: .reg .b64 %rd<2>; |
| 14 | +; NOMASK-EMPTY: |
| 15 | +; NOMASK-NEXT: // %bb.0: |
| 16 | +; NOMASK-NEXT: ld.param.b64 %rd1, [global_8xi32_param_0]; |
| 17 | +; NOMASK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| 18 | +; NOMASK-NEXT: st.param.b32 [func_retval0], %r1; |
| 19 | +; NOMASK-NEXT: ret; |
| 20 | +; |
| 21 | +; MASK-LABEL: global_8xi32( |
| 22 | +; MASK: { |
| 23 | +; MASK-NEXT: .reg .b32 %r<5>; |
| 24 | +; MASK-NEXT: .reg .b64 %rd<2>; |
| 25 | +; MASK-EMPTY: |
| 26 | +; MASK-NEXT: // %bb.0: |
| 27 | +; MASK-NEXT: ld.param.b64 %rd1, [global_8xi32_param_0]; |
| 28 | +; MASK-NEXT: .pragma "used_bytes_mask 0xfff"; |
| 29 | +; MASK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| 30 | +; MASK-NEXT: st.param.b32 [func_retval0], %r1; |
| 31 | +; MASK-NEXT: ret; |
| 32 | + %a.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 16 %a, <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> poison) |
| 33 | + %first = extractelement <4 x i32> %a.load, i32 0 |
| 34 | + ret i32 %first |
| 35 | +} |
| 36 | +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr , <4 x i1>, <4 x i32>) |
0 commit comments