|
1 | 1 | # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll |
2 | | -# RUN: llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll |
3 | | -# RUN: %if ptxas %{ llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %} |
| 2 | +# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll --check-prefixes=CHECK,CHECK-CUDA |
| 3 | +# RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %} |
4 | 4 |
|
5 | 5 | # We only need to run this second time for texture tests, because |
6 | 6 | # there is a difference between unified and non-unified intrinsics. |
7 | 7 | # |
8 | 8 | # RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll |
9 | | -# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll |
| 9 | +# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll --check-prefixes=CHECK,CHECK-NVCL |
10 | 10 | # RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %} |
11 | 11 |
|
12 | 12 | # Verify that all instructions and intrinsics defined in TableGen |
@@ -115,6 +115,15 @@ def get_llvm_value_type(vec, ty_ptx): |
115 | 115 | return value[vec].format(ty=ty) |
116 | 116 |
|
117 | 117 |
|
| 118 | +id_counter = 0 |
| 119 | + |
| 120 | + |
| 121 | +def get_table_gen_id(): |
| 122 | + global id_counter |
| 123 | + id_counter += 1 |
| 124 | + return id_counter |
| 125 | + |
| 126 | + |
118 | 127 | def gen_triple(target): |
119 | 128 | if target == "cuda": |
120 | 129 | print('target triple = "nvptx64-unknown-cuda"\n') |
@@ -260,8 +269,9 @@ def gen_suld_tests(target, global_surf): |
260 | 269 | ret void |
261 | 270 | } |
262 | 271 | ; CHECK-LABEL: .entry ${test_name}_global |
263 | | - ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}] |
264 | | - ; |
| 272 | + ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf} |
| 273 | + ; CHECK-CUDA: ${instruction} ${reg_ret}, [[[REG${reg_id}]], ${reg_access}] |
| 274 | + ; CHECK-NVCL: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}] |
265 | 275 | define void @${test_name}_global(${retty}* %ret, ${access}) { |
266 | 276 | %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf}) |
267 | 277 | %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access}) |
@@ -304,6 +314,7 @@ def gen_suld_tests(target, global_surf): |
304 | 314 | "reg_ret": get_ptx_vec_reg(vec, dtype), |
305 | 315 | "reg_surf": get_ptx_surface(target), |
306 | 316 | "reg_access": get_ptx_surface_access(geom), |
| 317 | + "reg_id": get_table_gen_id(), |
307 | 318 | } |
308 | 319 | gen_test(template, params) |
309 | 320 | generated_items.append((params["intrinsic"], params["instruction"])) |
@@ -353,8 +364,9 @@ def gen_sust_tests(target, global_surf): |
353 | 364 | ret void |
354 | 365 | } |
355 | 366 | ; CHECK-LABEL: .entry ${test_name}_global |
356 | | - ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value} |
357 | | - ; |
| 367 | + ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf} |
| 368 | + ; CHECK-CUDA: ${instruction} [[[REG${reg_id}]], ${reg_access}], ${reg_value} |
| 369 | + ; CHECK-NVCL: ${instruction} [${global_surf}, ${reg_access}], ${reg_value} |
358 | 370 | define void @${test_name}_global(${value}, ${access}) { |
359 | 371 | %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf}) |
360 | 372 | tail call void @${intrinsic}(i64 %gs, ${access}, ${value}) |
@@ -408,6 +420,7 @@ def gen_sust_tests(target, global_surf): |
408 | 420 | "reg_value": get_ptx_vec_reg(vec, ctype), |
409 | 421 | "reg_surf": get_ptx_surface(target), |
410 | 422 | "reg_access": get_ptx_surface_access(geom), |
| 423 | + "reg_id": get_table_gen_id(), |
411 | 424 | } |
412 | 425 | gen_test(template, params) |
413 | 426 | generated_items.append((params["intrinsic"], params["instruction"])) |
@@ -614,7 +627,9 @@ def gen_tex_tests(target, global_tex, global_sampler): |
614 | 627 | ret void |
615 | 628 | } |
616 | 629 | ; CHECK-LABEL: .entry ${test_name}_global |
617 | | - ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] |
| 630 | + ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex} |
| 631 | + ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}] |
| 632 | + ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] |
618 | 633 | define void @${test_name}_global(${retty}* %ret, ${access}) { |
619 | 634 | %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex}) |
620 | 635 | ${get_sampler_handle} |
@@ -656,8 +671,8 @@ def gen_tex_tests(target, global_tex, global_sampler): |
656 | 671 |
|
657 | 672 | # FIXME: missing intrinsics. |
658 | 673 | # Support for tex.grad.{cube, acube} introduced in PTX ISA version |
659 | | - # 4.3. |
660 | | - if mipmap == "grad" and geom in ("cube", "acube"): |
| 674 | + # 4.3, currently supported only in unified mode. |
| 675 | + if not is_unified(target) and mipmap == "grad" and geom in ("cube", "acube"): |
661 | 676 | continue |
662 | 677 |
|
663 | 678 | # The instruction returns a two-element vector for destination |
@@ -698,6 +713,7 @@ def gen_tex_tests(target, global_tex, global_sampler): |
698 | 713 | "ptx_tex": get_ptx_texture(target), |
699 | 714 | "ptx_access": get_ptx_texture_access(geom, ctype), |
700 | 715 | "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler), |
| 716 | + "reg_id": get_table_gen_id(), |
701 | 717 | } |
702 | 718 | gen_test(template, params) |
703 | 719 | generated_items.append((params["intrinsic"], params["instruction"])) |
@@ -798,7 +814,9 @@ def gen_tld4_tests(target, global_tex, global_sampler): |
798 | 814 | ret void |
799 | 815 | } |
800 | 816 | ; CHECK-LABEL: .entry ${test_name}_global |
801 | | - ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] |
| 817 | + ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex} |
| 818 | + ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}] |
| 819 | + ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] |
802 | 820 | define void @${test_name}_global(${retty}* %ret, ${access}) { |
803 | 821 | %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex}) |
804 | 822 | ${get_sampler_handle} |
@@ -844,6 +862,7 @@ def gen_tld4_tests(target, global_tex, global_sampler): |
844 | 862 | "ptx_tex": get_ptx_texture(target), |
845 | 863 | "ptx_access": get_ptx_tld4_access(geom), |
846 | 864 | "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler), |
| 865 | + "reg_id": get_table_gen_id(), |
847 | 866 | } |
848 | 867 | gen_test(template, params) |
849 | 868 | generated_items.append((params["intrinsic"], params["instruction"])) |
|
0 commit comments