Skip to content

Commit 30b67c6

Browse files
[AArch64] Make ACLE intrinsics always available part1
A given arch feature might enabled by a pragma or a function attribute so in this cases would be nice to use intrinsics. Today GCC offers the intrinsics without the march flag[1]. PR[2] for ACLE to clarify the intention and remove the need for -march flag for a given intrinsics. This is going to be more useful when D127812 lands. [1] https://godbolt.org/z/bxcMhav3z [2] ARM-software/acle#214 Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D133359
1 parent 22e4203 commit 30b67c6

File tree

4 files changed

+56
-52
lines changed

4 files changed

+56
-52
lines changed

clang/include/clang/Basic/BuiltinsAArch64.def

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ BUILTIN(__builtin_arm_dmb, "vUi", "nc")
7474
BUILTIN(__builtin_arm_dsb, "vUi", "nc")
7575
BUILTIN(__builtin_arm_isb, "vUi", "nc")
7676

77-
BUILTIN(__builtin_arm_jcvt, "Zid", "nc")
77+
TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a")
7878

7979
// Prefetch
8080
BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
@@ -107,24 +107,24 @@ BUILTIN(__builtin_arm_tcancel, "vWUIi", "n")
107107
BUILTIN(__builtin_arm_ttest, "WUi", "nc")
108108

109109
// Armv8.5-A FP rounding intrinsics
110-
BUILTIN(__builtin_arm_rint32zf, "ff", "")
111-
BUILTIN(__builtin_arm_rint32z, "dd", "")
112-
BUILTIN(__builtin_arm_rint64zf, "ff", "")
113-
BUILTIN(__builtin_arm_rint64z, "dd", "")
114-
BUILTIN(__builtin_arm_rint32xf, "ff", "")
115-
BUILTIN(__builtin_arm_rint32x, "dd", "")
116-
BUILTIN(__builtin_arm_rint64xf, "ff", "")
117-
BUILTIN(__builtin_arm_rint64x, "dd", "")
110+
TARGET_BUILTIN(__builtin_arm_rint32zf, "ff", "", "v8.5a")
111+
TARGET_BUILTIN(__builtin_arm_rint32z, "dd", "", "v8.5a")
112+
TARGET_BUILTIN(__builtin_arm_rint64zf, "ff", "", "v8.5a")
113+
TARGET_BUILTIN(__builtin_arm_rint64z, "dd", "", "v8.5a")
114+
TARGET_BUILTIN(__builtin_arm_rint32xf, "ff", "", "v8.5a")
115+
TARGET_BUILTIN(__builtin_arm_rint32x, "dd", "", "v8.5a")
116+
TARGET_BUILTIN(__builtin_arm_rint64xf, "ff", "", "v8.5a")
117+
TARGET_BUILTIN(__builtin_arm_rint64x, "dd", "", "v8.5a")
118118

119119
// Armv8.5-A Random number generation intrinsics
120-
BUILTIN(__builtin_arm_rndr, "iWUi*", "n")
121-
BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n")
120+
TARGET_BUILTIN(__builtin_arm_rndr, "iWUi*", "n", "rand")
121+
TARGET_BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n", "rand")
122122

123123
// Armv8.7-A load/store 64-byte intrinsics
124-
BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n")
125-
BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n")
126-
BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n")
127-
BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n")
124+
TARGET_BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n", "ls64")
125+
TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64")
126+
TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64")
127+
TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
128128

129129
TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
130130
TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")

clang/lib/Headers/arm_acle.h

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -589,122 +589,123 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
589589
#endif
590590

591591
/* 9.7 CRC32 intrinsics */
592-
#if defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32
593-
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
592+
#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \
593+
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
594+
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
594595
__crc32b(uint32_t __a, uint8_t __b) {
595596
return __builtin_arm_crc32b(__a, __b);
596597
}
597598

598-
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
599+
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
599600
__crc32h(uint32_t __a, uint16_t __b) {
600601
return __builtin_arm_crc32h(__a, __b);
601602
}
602603

603-
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
604+
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
604605
__crc32w(uint32_t __a, uint32_t __b) {
605606
return __builtin_arm_crc32w(__a, __b);
606607
}
607608

608-
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
609+
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
609610
__crc32d(uint32_t __a, uint64_t __b) {
610611
return __builtin_arm_crc32d(__a, __b);
611612
}
612613

613-
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
614+
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
614615
__crc32cb(uint32_t __a, uint8_t __b) {
615616
return __builtin_arm_crc32cb(__a, __b);
616617
}
617618

618-
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
619+
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
619620
__crc32ch(uint32_t __a, uint16_t __b) {
620621
return __builtin_arm_crc32ch(__a, __b);
621622
}
622623

623-
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
624+
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
624625
__crc32cw(uint32_t __a, uint32_t __b) {
625626
return __builtin_arm_crc32cw(__a, __b);
626627
}
627628

628-
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
629+
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
629630
__crc32cd(uint32_t __a, uint64_t __b) {
630631
return __builtin_arm_crc32cd(__a, __b);
631632
}
632633
#endif
633634

634635
/* Armv8.3-A Javascript conversion intrinsic */
635-
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
636-
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
636+
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
637+
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
637638
__jcvt(double __a) {
638639
return __builtin_arm_jcvt(__a);
639640
}
640641
#endif
641642

642643
/* Armv8.5-A FP rounding intrinsics */
643-
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_FRINT)
644-
static __inline__ float __attribute__((__always_inline__, __nodebug__))
644+
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
645+
static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
645646
__rint32zf(float __a) {
646647
return __builtin_arm_rint32zf(__a);
647648
}
648649

649-
static __inline__ double __attribute__((__always_inline__, __nodebug__))
650+
static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
650651
__rint32z(double __a) {
651652
return __builtin_arm_rint32z(__a);
652653
}
653654

654-
static __inline__ float __attribute__((__always_inline__, __nodebug__))
655+
static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
655656
__rint64zf(float __a) {
656657
return __builtin_arm_rint64zf(__a);
657658
}
658659

659-
static __inline__ double __attribute__((__always_inline__, __nodebug__))
660+
static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
660661
__rint64z(double __a) {
661662
return __builtin_arm_rint64z(__a);
662663
}
663664

664-
static __inline__ float __attribute__((__always_inline__, __nodebug__))
665+
static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
665666
__rint32xf(float __a) {
666667
return __builtin_arm_rint32xf(__a);
667668
}
668669

669-
static __inline__ double __attribute__((__always_inline__, __nodebug__))
670+
static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
670671
__rint32x(double __a) {
671672
return __builtin_arm_rint32x(__a);
672673
}
673674

674-
static __inline__ float __attribute__((__always_inline__, __nodebug__))
675+
static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
675676
__rint64xf(float __a) {
676677
return __builtin_arm_rint64xf(__a);
677678
}
678679

679-
static __inline__ double __attribute__((__always_inline__, __nodebug__))
680+
static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
680681
__rint64x(double __a) {
681682
return __builtin_arm_rint64x(__a);
682683
}
683684
#endif
684685

685686
/* Armv8.7-A load/store 64-byte intrinsics */
686-
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_LS64)
687+
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
687688
typedef struct {
688689
uint64_t val[8];
689690
} data512_t;
690691

691-
static __inline__ data512_t __attribute__((__always_inline__, __nodebug__))
692+
static __inline__ data512_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
692693
__arm_ld64b(const void *__addr) {
693-
data512_t __value;
694-
__builtin_arm_ld64b(__addr, __value.val);
695-
return __value;
694+
data512_t __value;
695+
__builtin_arm_ld64b(__addr, __value.val);
696+
return __value;
696697
}
697-
static __inline__ void __attribute__((__always_inline__, __nodebug__))
698+
static __inline__ void __attribute__((__always_inline__, __nodebug__, target("ls64")))
698699
__arm_st64b(void *__addr, data512_t __value) {
699-
__builtin_arm_st64b(__addr, __value.val);
700+
__builtin_arm_st64b(__addr, __value.val);
700701
}
701-
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
702+
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
702703
__arm_st64bv(void *__addr, data512_t __value) {
703-
return __builtin_arm_st64bv(__addr, __value.val);
704+
return __builtin_arm_st64bv(__addr, __value.val);
704705
}
705-
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
706+
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
706707
__arm_st64bv0(void *__addr, data512_t __value) {
707-
return __builtin_arm_st64bv0(__addr, __value.val);
708+
return __builtin_arm_st64bv0(__addr, __value.val);
708709
}
709710
#endif
710711

@@ -759,12 +760,12 @@ __arm_st64bv0(void *__addr, data512_t __value) {
759760
#endif /* __ARM_FEATURE_TME */
760761

761762
/* Armv8.5-A Random number generation intrinsics */
762-
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG)
763-
static __inline__ int __attribute__((__always_inline__, __nodebug__))
763+
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
764+
static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
764765
__rndr(uint64_t *__p) {
765766
return __builtin_arm_rndr(__p);
766767
}
767-
static __inline__ int __attribute__((__always_inline__, __nodebug__))
768+
static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
768769
__rndrrs(uint64_t *__p) {
769770
return __builtin_arm_rndrrs(__p);
770771
}

clang/test/CodeGen/arm_acle.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ void test_sevl(void) {
124124
__sevl();
125125
}
126126

127-
#if __ARM_32BIT_STATE
127+
#ifdef __ARM_32BIT_STATE
128128
// AArch32-LABEL: @test_dbg(
129129
// AArch32-NEXT: entry:
130130
// AArch32-NEXT: call void @llvm.arm.dbg(i32 0)
@@ -1646,7 +1646,7 @@ void test_wsrf64(double v) {
16461646
#endif
16471647
}
16481648

1649-
#ifdef __ARM_64BIT_STATE
1649+
#if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_JCVT)
16501650
// AArch6483-LABEL: @test_jcvt(
16511651
// AArch6483-NEXT: entry:
16521652
// AArch6483-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.fjcvtzs(double [[V:%.*]])
@@ -1658,7 +1658,7 @@ int32_t test_jcvt(double v) {
16581658
#endif
16591659

16601660

1661-
#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG)
1661+
#if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_RNG)
16621662

16631663
// AArch6485-LABEL: @test_rndr(
16641664
// AArch6485-NEXT: entry:

clang/test/CodeGen/builtins-arm64.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ void prefetch(void) {
5959
// CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0)
6060
}
6161

62+
__attribute__((target("v8.5a")))
6263
int32_t jcvt(double v) {
6364
//CHECK-LABEL: @jcvt(
6465
//CHECK: call i32 @llvm.aarch64.fjcvtzs
@@ -133,6 +134,7 @@ unsigned int clsll(uint64_t v) {
133134
// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
134135
// CHECK-NEXT: ret i32 [[TMP3]]
135136
//
137+
__attribute__((target("rand")))
136138
int rndr(uint64_t *__addr) {
137139
return __builtin_arm_rndr(__addr);
138140
}
@@ -146,6 +148,7 @@ int rndr(uint64_t *__addr) {
146148
// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
147149
// CHECK-NEXT: ret i32 [[TMP3]]
148150
//
151+
__attribute__((target("rand")))
149152
int rndrrs(uint64_t *__addr) {
150153
return __builtin_arm_rndrrs(__addr);
151154
}

0 commit comments

Comments
 (0)