Skip to content

Commit 8cd79d7

Browse files
committed
Add new test to confirm no pragma on older ptx version
1 parent e180119 commit 8cd79d7

File tree

1 file changed

+36
-0
lines changed

1 file changed

+36
-0
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 -mattr=+ptx82 | FileCheck %s -check-prefixes=NOMASK
3+
; RUN: %if ptxas-sm_50 && ptxas-isa-8.2 %{ llc < %s -march=nvptx64 -mcpu=sm_50 -mattr=+ptx82 | %ptxas-verify -arch=sm_50 %}
4+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 -mattr=+ptx83 | FileCheck %s -check-prefixes=MASK
5+
; RUN: %if ptxas-sm_50 && ptxas-isa-8.3 %{ llc < %s -march=nvptx64 -mcpu=sm_50 -mattr=+ptx83 | %ptxas-verify -arch=sm_50 %}
6+
7+
; On older architectures and versions, we shouldn't be seeing a used bytes mask pragma.
8+
9+
define i32 @global_8xi32(ptr %a, ptr %b) {
10+
; NOMASK-LABEL: global_8xi32(
11+
; NOMASK: {
12+
; NOMASK-NEXT: .reg .b32 %r<5>;
13+
; NOMASK-NEXT: .reg .b64 %rd<2>;
14+
; NOMASK-EMPTY:
15+
; NOMASK-NEXT: // %bb.0:
16+
; NOMASK-NEXT: ld.param.b64 %rd1, [global_8xi32_param_0];
17+
; NOMASK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
18+
; NOMASK-NEXT: st.param.b32 [func_retval0], %r1;
19+
; NOMASK-NEXT: ret;
20+
;
21+
; MASK-LABEL: global_8xi32(
22+
; MASK: {
23+
; MASK-NEXT: .reg .b32 %r<5>;
24+
; MASK-NEXT: .reg .b64 %rd<2>;
25+
; MASK-EMPTY:
26+
; MASK-NEXT: // %bb.0:
27+
; MASK-NEXT: ld.param.b64 %rd1, [global_8xi32_param_0];
28+
; MASK-NEXT: .pragma "used_bytes_mask 0xfff";
29+
; MASK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
30+
; MASK-NEXT: st.param.b32 [func_retval0], %r1;
31+
; MASK-NEXT: ret;
32+
%a.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 16 %a, <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> poison)
33+
%first = extractelement <4 x i32> %a.load, i32 0
34+
ret i32 %first
35+
}
36+
declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr , <4 x i1>, <4 x i32>)

0 commit comments

Comments
 (0)