|
33 | 33 | end |
34 | 34 | Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. |
35 | 35 |
|
36 | | - |
37 | 36 | @static for package in TEST_PACKAGES |
38 | 37 | FloatDefault = (package == PKG_METAL) ? Float32 : Float64 # Metal does not support Float64 |
39 | 38 |
|
@@ -655,14 +654,14 @@ eval(:( |
655 | 654 | return |
656 | 655 | end |
657 | 656 | @static if $package == $PKG_CUDA |
658 | | - @test occursin("loopoffset = ((CUDA.blockIdx()).z - 1) * 3", kernel) |
| 657 | + @test occursin("loopoffset = (((CUDA.blockIdx()).z - 1) * 3 + (ranges[3])[1]) - 1", kernel) # Alternative: @test occursin("loopoffset = ((CUDA.blockIdx()).z - 1) * 3", kernel) |
659 | 658 | elseif $package == $PKG_AMDGPU |
660 | | - @test occursin("loopoffset = ((AMDGPU.workgroupIdx()).z - 1) * 3", kernel) |
| 659 | + @test occursin("loopoffset = (((AMDGPU.workgroupIdx()).z - 1) * 3 + (ranges[3])[1]) - 1", kernel) # Alternative: @test occursin("loopoffset = ((AMDGPU.workgroupIdx()).z - 1) * 3", kernel) |
661 | 660 | elseif $package == $PKG_METAL |
662 | | - @test occursin("loopoffset = ((Metal.threadgroup_position_in_grid_3d()).z - 1) * 3", kernel) |
| 661 | + @test occursin("loopoffset = (((Metal.threadgroup_position_in_grid_3d()).z - 1) * 3 + (ranges[3])[1]) - 1", kernel) # Alternative: @test occursin("loopoffset = ((Metal.threadgroup_position_in_grid_3d()).z - 1) * 3", kernel) |
663 | 662 | end |
664 | 663 | @test occursin("for i = -4:3", kernel) |
665 | | - @test occursin("tz = i + loopoffset", kernel) |
| 664 | + @test occursin("iz = i + loopoffset", kernel) # Alternative: @test occursin("tz = i + loopoffset", kernel) |
666 | 665 | @test occursin("A2[ix - 1, iy + 2, iz] = (A_ixm1_iyp2_izp3 - 2A_ixm3_iyp2_iz) + A_ixm4_iyp2_izm2", kernel) |
667 | 666 | @test occursin("B2[ix + 1, iy + 2, iz + 1] = (B[ix + 1, iy + 2, iz + 2] - 2 * B[ix - 3, iy + 2, iz + 1]) + B[ix - 4, iy + 2, iz + 1]", kernel) |
668 | 667 | @test occursin("C2[ix - 1, iy + 2, iz - 1] = (C_ixm1_iyp2_iz - 2C_ixm1_iyp2_izm1) + C_ixm1_iyp2_izm1", kernel) |
@@ -712,14 +711,14 @@ eval(:( |
712 | 711 | return |
713 | 712 | end |
714 | 713 | @static if $package == $PKG_CUDA |
715 | | - @test occursin("loopoffset = ((CUDA.blockIdx()).z - 1) * 3", kernel) |
| 714 | + @test occursin("loopoffset = (((CUDA.blockIdx()).z - 1) * 3 + (ranges[3])[1]) - 1", kernel) # Alternative: @test occursin("loopoffset = ((CUDA.blockIdx()).z - 1) * 3", kernel) |
716 | 715 | elseif $package == $PKG_AMDGPU |
717 | | - @test occursin("loopoffset = ((AMDGPU.workgroupIdx()).z - 1) * 3", kernel) |
| 716 | + @test occursin("loopoffset = (((AMDGPU.workgroupIdx()).z - 1) * 3 + (ranges[3])[1]) - 1", kernel) # Alternative: @test occursin("loopoffset = ((AMDGPU.workgroupIdx()).z - 1) * 3", kernel) |
718 | 717 | elseif $package == $PKG_METAL |
719 | | - @test occursin("loopoffset = ((Metal.threadgroup_position_in_grid_3d()).z - 1) * 3", kernel) |
| 718 | + @test occursin("loopoffset = (((Metal.threadgroup_position_in_grid_3d()).z - 1) * 3 + (ranges[3])[1]) - 1", kernel) # Alternative: @test occursin("loopoffset = ((Metal.threadgroup_position_in_grid_3d()).z - 1) * 3", kernel) |
720 | 719 | end |
721 | 720 | @test occursin("for i = -4:3", kernel) |
722 | | - @test occursin("tz = i + loopoffset", kernel) |
| 721 | + @test occursin("iz = i + loopoffset", kernel) # Alternative: @test occursin("tz = i + loopoffset", kernel) |
723 | 722 | @test occursin("A2[ix - 1, iy + 2, iz] = (A_ixm1_iyp2_izp3 - 2A_ixm3_iyp2_iz) + A_ixm4_iyp2_izm2", kernel) |
724 | 723 | @test occursin("B2[ix + 1, iy + 2, iz + 1] = (B[ix + 1, iy + 2, iz + 2] - 2 * B[ix - 3, iy + 2, iz + 1]) + B[ix - 4, iy + 2, iz + 1]", kernel) |
725 | 724 | @test occursin("C2[ix - 1, iy + 2, iz - 1] = (C_ixm1_iyp2_iz - 2C_ixm1_iyp2_izm1) + C_ixm1_iyp2_izm1", kernel) |
|
0 commit comments