|
135 | 135 | @test sum2_10turbo(A) ≈ sum(A) |
136 | 136 | end |
137 | 137 | end |
| 138 | + |
| 139 | +# Test for Issue #543: W=1 nested VecUnroll store on ARM |
| 140 | +# This tests the case where vector width is 1 (scalar) with nested unrolling |
| 141 | +function issue543_noavx!(data_out, matrix, data_in) |
| 142 | + for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) |
| 143 | + res = zero(eltype(data_out)) |
| 144 | + for jj in axes(matrix, 2) |
| 145 | + res += matrix[j, jj] * data_in[v, i, jj] |
| 146 | + end |
| 147 | + data_out[v, i, j] = res |
| 148 | + end |
| 149 | + return nothing |
| 150 | +end |
| 151 | + |
| 152 | +function issue543_turbo!(data_out, matrix, data_in) |
| 153 | + @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) |
| 154 | + res = zero(eltype(data_out)) |
| 155 | + for jj in axes(matrix, 2) |
| 156 | + res += matrix[j, jj] * data_in[v, i, jj] |
| 157 | + end |
| 158 | + data_out[v, i, j] = res |
| 159 | + end |
| 160 | + return nothing |
| 161 | +end |
| 162 | + |
| 163 | +@testset "Issue #543: W=1 Nested VecUnroll" begin |
| 164 | + # Test the specific case that was failing: v=1 (first dim size 1) with n=5 |
| 165 | + # This triggers W=1 code paths where VecUnroll stores T instead of Vec{1,T} |
| 166 | + for v in [1, 2], n in [4, 5, 6, 7, 8] |
| 167 | + data_out_ref = StrideArray(undef, StaticInt(v), StaticInt(n), StaticInt(n)) |
| 168 | + data_out_turbo = StrideArray(undef, StaticInt(v), StaticInt(n), StaticInt(n)) |
| 169 | + matrix = StrideArray(undef, StaticInt(n), StaticInt(n)) |
| 170 | + data_in = rand(v, n, n) |
| 171 | + |
| 172 | + # Initialize with random data |
| 173 | + matrix .= rand.() |
| 174 | + |
| 175 | + fill!(data_out_ref, 0.0) |
| 176 | + fill!(data_out_turbo, 0.0) |
| 177 | + |
| 178 | + issue543_noavx!(data_out_ref, matrix, data_in) |
| 179 | + issue543_turbo!(data_out_turbo, matrix, data_in) |
| 180 | + |
| 181 | + @test data_out_turbo ≈ data_out_ref |
| 182 | + end |
| 183 | + |
| 184 | + # Also test with non-static first dimension but static others |
| 185 | + for v in [1, 2], n in [4, 5, 6] |
| 186 | + data_out_ref = StrideArray(undef, v, StaticInt(n), StaticInt(n)) |
| 187 | + data_out_turbo = StrideArray(undef, v, StaticInt(n), StaticInt(n)) |
| 188 | + matrix = StrideArray(undef, StaticInt(n), StaticInt(n)) |
| 189 | + data_in = rand(v, n, n) |
| 190 | + |
| 191 | + matrix .= rand.() |
| 192 | + |
| 193 | + fill!(data_out_ref, 0.0) |
| 194 | + fill!(data_out_turbo, 0.0) |
| 195 | + |
| 196 | + issue543_noavx!(data_out_ref, matrix, data_in) |
| 197 | + issue543_turbo!(data_out_turbo, matrix, data_in) |
| 198 | + |
| 199 | + @test data_out_turbo ≈ data_out_ref |
| 200 | + end |
| 201 | +end |
0 commit comments