Skip to content

Commit 2895e5e

Browse files
committed
[AArch64][NEON] Fix poly lane intrinsics under -fno-lax-vector-conversions. Issue originally raised in #71362 (comment). Certain NEON intrinsics that operate on poly types (e.g. poly8x8_t) failed to compile with the -fno-lax-vector-conversions flag. This patch updates NeonEmitter.cpp to insert an explicit __builtin_bit_cast from poly types to the required signed integer vector types when generating lane-based intrinsics. A test neon-bitcast-poly is included.
1 parent 61a0653 commit 2895e5e

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

clang/utils/TableGen/NeonEmitter.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1401,12 +1401,20 @@ void Intrinsic::emitBodyAsBuiltinCall() {
14011401
if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
14021402
CastToType.makeInteger(8, true);
14031403
Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1404+
}
1405+
else if ((T.isPoly() ||
1406+
(T.isInteger() && !T.isSigned() &&
1407+
StringRef(Name).contains("_p8")) ||
1408+
StringRef(Name).contains("_p16") ||
1409+
StringRef(Name).contains("_p64"))) {
1410+
CastToType.makeSigned();
1411+
Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1412+
}
14041413
} else if (LocalCK == ClassI) {
14051414
if (CastToType.isInteger()) {
14061415
CastToType.makeSigned();
14071416
Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
14081417
}
1409-
}
14101418
}
14111419

14121420
S += Arg + ", ";
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
3+
4+
; This test verifies that NEON intrinsics using polynomial types (poly8/16/64) emit correct AArch64 instructions
5+
; after bitcasting to signed integer vectors. These intrinsics would previously fail under -fno-lax-vector-conversions.
6+
7+
define <8 x i8> @_Z18test_vcopy_lane_p811__Poly8x8_tS_(<8 x i8> %a, <8 x i8> %b) {
8+
; CHECK-LABEL: _Z18test_vcopy_lane_p811__Poly8x8_tS_:
9+
; CHECK: // %bb.0: // %entry
10+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
11+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
12+
; CHECK-NEXT: mov v0.b[0], v1.b[0]
13+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
14+
; CHECK-NEXT: ret
15+
entry:
16+
%vset_lane = shufflevector <8 x i8> %b, <8 x i8> %a, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17+
ret <8 x i8> %vset_lane
18+
}
19+
20+
define <4 x i16> @_Z18test_vset_lane_p16t12__Poly16x4_t(i16 %val, <4 x i16> %vec) {
21+
; CHECK-LABEL: _Z18test_vset_lane_p16t12__Poly16x4_t:
22+
; CHECK: // %bb.0: // %entry
23+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
24+
; CHECK-NEXT: mov v0.h[0], w0
25+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
26+
; CHECK-NEXT: ret
27+
entry:
28+
%vset_lane = insertelement <4 x i16> %vec, i16 %val, i64 0
29+
ret <4 x i16> %vset_lane
30+
}
31+
32+
define i64 @_Z18test_vget_lane_p6412__Poly64x1_t(<1 x i64> %vec){
33+
; CHECK-LABEL: _Z18test_vget_lane_p6412__Poly64x1_t:
34+
; CHECK: // %bb.0: // %entry
35+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
36+
; CHECK-NEXT: fmov x0, d0
37+
; CHECK-NEXT: ret
38+
entry:
39+
%vget_lane = extractelement <1 x i64> %vec, i64 0
40+
ret i64 %vget_lane
41+
}
42+
43+
define <16 x i8> @_Z18test_vsetq_lane_p8h12__Poly8x16_t(i8 %val, <16 x i8> %vec){
44+
; CHECK-LABEL: _Z18test_vsetq_lane_p8h12__Poly8x16_t:
45+
; CHECK: // %bb.0: // %entry
46+
; CHECK-NEXT: mov v0.b[0], w0
47+
; CHECK-NEXT: ret
48+
entry:
49+
%vset_lane = insertelement <16 x i8> %vec, i8 %val, i64 0
50+
ret <16 x i8> %vset_lane
51+
}

0 commit comments

Comments
 (0)