|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s |
| 3 | +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s |
| 4 | + |
| 5 | +declare {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64, float, i8, <3 x float>, <3 x float>, i32, <4 x i32>) |
| 6 | + |
| 7 | +define amdgpu_ps <10 x float> @image_bvh8_intersect_ray(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) { |
| 8 | +; GFX12-SDAG-LABEL: image_bvh8_intersect_ray: |
| 9 | +; GFX12-SDAG: ; %bb.0: ; %main_body |
| 10 | +; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7 |
| 11 | +; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5 |
| 12 | +; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3 |
| 13 | +; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| 14 | +; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3] |
| 15 | +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| 16 | +; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off |
| 17 | +; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off |
| 18 | +; GFX12-SDAG-NEXT: ; return to shader part epilog |
| 19 | +; |
| 20 | +; GFX12-GISEL-LABEL: image_bvh8_intersect_ray: |
| 21 | +; GFX12-GISEL: ; %bb.0: ; %main_body |
| 22 | +; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4 |
| 23 | +; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6 |
| 24 | +; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8 |
| 25 | +; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 0 |
| 26 | +; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3] |
| 27 | +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| 28 | +; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off |
| 29 | +; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off |
| 30 | +; GFX12-GISEL-NEXT: ; return to shader part epilog |
| 31 | +main_body: |
| 32 | + %ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0 |
| 33 | + %ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1 |
| 34 | + %ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2 |
| 35 | + %ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0 |
| 36 | + %ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1 |
| 37 | + %ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2 |
| 38 | + %v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 0, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr) |
| 39 | + %a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0 |
| 40 | + %r = bitcast <10 x i32> %a to <10 x float> |
| 41 | + %o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1 |
| 42 | + store <3 x float> %o, ptr addrspace(1) %origin |
| 43 | + %d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2 |
| 44 | + store <3 x float> %d, ptr addrspace(1) %dir |
| 45 | + ret <10 x float> %r |
| 46 | +} |
| 47 | + |
| 48 | +define amdgpu_ps <10 x float> @image_bvh8_intersect_ray_1(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) { |
| 49 | +; GFX12-SDAG-LABEL: image_bvh8_intersect_ray_1: |
| 50 | +; GFX12-SDAG: ; %bb.0: ; %main_body |
| 51 | +; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7 |
| 52 | +; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5 |
| 53 | +; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3 |
| 54 | +; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 1 |
| 55 | +; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3] |
| 56 | +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| 57 | +; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off |
| 58 | +; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off |
| 59 | +; GFX12-SDAG-NEXT: ; return to shader part epilog |
| 60 | +; |
| 61 | +; GFX12-GISEL-LABEL: image_bvh8_intersect_ray_1: |
| 62 | +; GFX12-GISEL: ; %bb.0: ; %main_body |
| 63 | +; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4 |
| 64 | +; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6 |
| 65 | +; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8 |
| 66 | +; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 1 |
| 67 | +; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3] |
| 68 | +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| 69 | +; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off |
| 70 | +; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off |
| 71 | +; GFX12-GISEL-NEXT: ; return to shader part epilog |
| 72 | +main_body: |
| 73 | + %ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0 |
| 74 | + %ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1 |
| 75 | + %ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2 |
| 76 | + %ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0 |
| 77 | + %ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1 |
| 78 | + %ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2 |
| 79 | + %v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 1, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr) |
| 80 | + %a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0 |
| 81 | + %r = bitcast <10 x i32> %a to <10 x float> |
| 82 | + %o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1 |
| 83 | + store <3 x float> %o, ptr addrspace(1) %origin |
| 84 | + %d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2 |
| 85 | + store <3 x float> %d, ptr addrspace(1) %dir |
| 86 | + ret <10 x float> %r |
| 87 | +} |
0 commit comments