|
8 | 8 | #include "../aot_emit_exception.h" |
9 | 9 | #include "../../aot/aot_runtime.h" |
10 | 10 |
|
| 11 | +static bool |
| 12 | +is_target_x86(AOTCompContext *comp_ctx) |
| 13 | +{ |
| 14 | + return !strncmp(comp_ctx->target_arch, "x86_64", 6) || |
| 15 | + !strncmp(comp_ctx->target_arch, "i386", 4); |
| 16 | +} |
| 17 | + |
11 | 18 | static LLVMValueRef |
12 | 19 | build_intx16_vector(const AOTCompContext *comp_ctx, |
13 | 20 | const LLVMTypeRef element_type, |
@@ -86,7 +93,7 @@ aot_compile_simd_shuffle(AOTCompContext *comp_ctx, |
86 | 93 | /* TODO: instructions for other CPUs */ |
87 | 94 | /* shufflevector is not an option, since it requires *mask as a const */ |
88 | 95 | bool |
89 | | -aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) |
| 96 | +aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) |
90 | 97 | { |
91 | 98 | LLVMValueRef vector, mask, max_lanes, condition, mask_lanes, result; |
92 | 99 | LLVMTypeRef param_types[2]; |
@@ -151,6 +158,109 @@ aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) |
151 | 158 | return false; |
152 | 159 | } |
153 | 160 |
|
| 161 | +bool |
| 162 | +aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) |
| 163 | +{ |
| 164 | + LLVMValueRef vector, mask, default_lane_value, condition, max_lane_id, |
| 165 | + result, idx, id, replace_with_zero, elem, elem_or_zero, undef; |
| 166 | + uint8 i; |
| 167 | + |
| 168 | + if (is_target_x86(comp_ctx)) { |
| 169 | + return aot_compile_simd_swizzle_x86(comp_ctx, func_ctx); |
| 170 | + } |
| 171 | + |
| 172 | + int const_lane_ids[16] = { 16, 16, 16, 16, 16, 16, 16, 16, |
| 173 | + 16, 16, 16, 16, 16, 16, 16, 16 }, |
| 174 | + const_zeors[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 175 | + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; |
| 176 | + |
| 177 | + if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, |
| 178 | + "mask"))) { |
| 179 | + goto fail; |
| 180 | + } |
| 181 | + |
| 182 | + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, |
| 183 | + V128_i8x16_TYPE, "vec"))) { |
| 184 | + goto fail; |
| 185 | + } |
| 186 | + |
| 187 | + if (!(undef = LLVMGetUndef(V128_i8x16_TYPE))) { |
| 188 | + HANDLE_FAILURE("LLVMGetUndef"); |
| 189 | + goto fail; |
| 190 | + } |
| 191 | + |
| 192 | + /* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */ |
| 193 | + if (!(max_lane_id = |
| 194 | + build_intx16_vector(comp_ctx, INT8_TYPE, const_lane_ids))) { |
| 195 | + goto fail; |
| 196 | + } |
| 197 | + |
| 198 | + if (!(condition = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, mask, |
| 199 | + max_lane_id, "out_of_range"))) { |
| 200 | + HANDLE_FAILURE("LLVMBuldICmp"); |
| 201 | + goto fail; |
| 202 | + } |
| 203 | + |
| 204 | + /* if the id is out of range (>=16), set the id as 0 */ |
| 205 | + if (!(default_lane_value = |
| 206 | + build_intx16_vector(comp_ctx, INT8_TYPE, const_zeors))) { |
| 207 | + goto fail; |
| 208 | + } |
| 209 | + |
| 210 | + if (!(idx = LLVMBuildSelect(comp_ctx->builder, condition, |
| 211 | + default_lane_value, mask, "mask"))) { |
| 212 | + HANDLE_FAILURE("LLVMBuildSelect"); |
| 213 | + goto fail; |
| 214 | + } |
| 215 | + |
| 216 | + for (i = 0; i < 16; i++) { |
| 217 | + if (!(id = LLVMBuildExtractElement(comp_ctx->builder, idx, I8_CONST(i), |
| 218 | + "id"))) { |
| 219 | + HANDLE_FAILURE("LLVMBuildExtractElement"); |
| 220 | + goto fail; |
| 221 | + } |
| 222 | + |
| 223 | + if (!(replace_with_zero = |
| 224 | + LLVMBuildExtractElement(comp_ctx->builder, condition, |
| 225 | + I8_CONST(i), "replace_with_zero"))) { |
| 226 | + HANDLE_FAILURE("LLVMBuildExtractElement"); |
| 227 | + goto fail; |
| 228 | + } |
| 229 | + |
| 230 | + if (!(elem = LLVMBuildExtractElement(comp_ctx->builder, vector, id, |
| 231 | + "vector[mask[i]]"))) { |
| 232 | + HANDLE_FAILURE("LLVMBuildExtractElement"); |
| 233 | + goto fail; |
| 234 | + } |
| 235 | + |
| 236 | + if (!(elem_or_zero = |
| 237 | + LLVMBuildSelect(comp_ctx->builder, replace_with_zero, |
| 238 | + I8_CONST(0), elem, "elem_or_zero"))) { |
| 239 | + HANDLE_FAILURE("LLVMBuildSelect"); |
| 240 | + goto fail; |
| 241 | + } |
| 242 | + |
| 243 | + if (!(undef = |
| 244 | + LLVMBuildInsertElement(comp_ctx->builder, undef, elem_or_zero, |
| 245 | + I8_CONST(i), "new_vector"))) { |
| 246 | + HANDLE_FAILURE("LLVMBuildInsertElement"); |
| 247 | + goto fail; |
| 248 | + } |
| 249 | + } |
| 250 | + |
| 251 | + if (!(result = LLVMBuildBitCast(comp_ctx->builder, undef, V128_i64x2_TYPE, |
| 252 | + "ret"))) { |
| 253 | + HANDLE_FAILURE("LLVMBuildBitCast"); |
| 254 | + goto fail; |
| 255 | + } |
| 256 | + |
| 257 | + PUSH_V128(result); |
| 258 | + |
| 259 | + return true; |
| 260 | +fail: |
| 261 | + return false; |
| 262 | +} |
| 263 | + |
154 | 264 | static bool |
155 | 265 | aot_compile_simd_extract(AOTCompContext *comp_ctx, |
156 | 266 | AOTFuncContext *func_ctx, |
|
0 commit comments