Skip to content

Commit 4b51e8a

Browse files
MagicalTuxclaude
andcommitted
enable ARM64 NEON optimizations
Use PRESUME mode (OPUS_ARM_PRESUME_NEON_INTR) to directly call NEON functions without RTCD overhead, since all ARM64 CPUs have NEON. Includes NEON-optimized implementations for: - Pitch detection (pitch_neon_intr.c) - CELT band operations (celt_neon_intr.c) - LPC inverse prediction gain - Noise shaping quantization (NSQ) - Biquad filtering Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent c22ec51 commit 4b51e8a

9 files changed

+84
-45
lines changed

config.h

Lines changed: 17 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -103,54 +103,33 @@
103103
/* Define to the sub-directory where libtool stores uninstalled libraries. */
104104
#define LT_OBJDIR ".libs/"
105105

106-
/* Make use of ARM asm optimization */
107-
/* #undef OPUS_ARM_ASM */
106+
/* ARM64 (AArch64) NEON optimizations
107+
* We use PRESUME mode (no RTCD) since all ARM64 CPUs have NEON.
108+
* RTCD is disabled to avoid needing the *_map.c files.
109+
*/
110+
#if defined(__aarch64__) || defined(_M_ARM64)
108111

109-
/* Use generic ARMv4 inline asm optimizations */
110-
/* #undef OPUS_ARM_INLINE_ASM */
112+
/* Compiler supports ARMv7/Aarch64 Neon Intrinsics */
113+
#define OPUS_ARM_MAY_HAVE_NEON_INTR 1
111114

112-
/* Use ARMv5E inline asm optimizations */
113-
/* #undef OPUS_ARM_INLINE_EDSP */
115+
/* Presume NEON is always available - directly call NEON functions */
116+
#define OPUS_ARM_PRESUME_NEON_INTR 1
114117

115-
/* Use ARMv6 inline asm optimizations */
116-
/* #undef OPUS_ARM_INLINE_MEDIA */
118+
/* Note: OPUS_HAVE_RTCD is NOT defined for ARM64, so we skip RTCD tables */
117119

118-
/* Use ARM NEON inline asm optimizations */
119-
/* #undef OPUS_ARM_INLINE_NEON */
120+
#endif /* ARM64/AArch64 */
120121

121-
/* Compiler supports Aarch64 DOTPROD Intrinsics */
122+
/* Unused ARM options */
123+
/* #undef OPUS_ARM_ASM */
124+
/* #undef OPUS_ARM_INLINE_ASM */
125+
/* #undef OPUS_ARM_INLINE_EDSP */
126+
/* #undef OPUS_ARM_INLINE_MEDIA */
127+
/* #undef OPUS_ARM_INLINE_NEON */
122128
/* #undef OPUS_ARM_MAY_HAVE_DOTPROD */
123-
124-
/* Define if assembler supports EDSP instructions */
125129
/* #undef OPUS_ARM_MAY_HAVE_EDSP */
126-
127-
/* Define if assembler supports ARMv6 media instructions */
128130
/* #undef OPUS_ARM_MAY_HAVE_MEDIA */
129-
130-
/* Define if compiler supports NEON instructions */
131131
/* #undef OPUS_ARM_MAY_HAVE_NEON */
132132

133-
/* Compiler supports ARMv7/Aarch64 Neon Intrinsics */
134-
/* #undef OPUS_ARM_MAY_HAVE_NEON_INTR */
135-
136-
/* Define if binary requires Aarch64 Neon Intrinsics */
137-
/* #undef OPUS_ARM_PRESUME_AARCH64_NEON_INTR */
138-
139-
/* Define if binary requires Aarch64 dotprod Intrinsics */
140-
/* #undef OPUS_ARM_PRESUME_DOTPROD */
141-
142-
/* Define if binary requires EDSP instruction support */
143-
/* #undef OPUS_ARM_PRESUME_EDSP */
144-
145-
/* Define if binary requires ARMv6 media instruction support */
146-
/* #undef OPUS_ARM_PRESUME_MEDIA */
147-
148-
/* Define if binary requires NEON instruction support */
149-
/* #undef OPUS_ARM_PRESUME_NEON */
150-
151-
/* Define if binary requires NEON intrinsics support */
152-
/* #undef OPUS_ARM_PRESUME_NEON_INTR */
153-
154133
/* This is a build of OPUS */
155134
#define OPUS_BUILD /**/
156135

generate.sh

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,17 @@ import "C"
3737
EOF
3838
}
3939

40-
# Note: ARM-specific directories (celt/arm, silk/arm) are excluded because:
41-
# - silk/arm requires fixed-point mode (FIXED_POINT) but we use float
42-
# - celt/arm has RTCD conflicts without full ARM optimization support
43-
# ARM builds will use the generic C implementation instead.
44-
for dir in src celt silk celt/x86 silk/float silk/x86; do
40+
# ARM NEON optimizations are included for arm64 with PRESUME mode (no RTCD).
41+
# The *_map.c files are excluded as they are only needed for RTCD.
42+
for dir in src celt silk celt/x86 silk/float silk/x86 celt/arm silk/arm; do
4543
COND=""
46-
case `basename "$dir"` in
47-
x86)
44+
case "$dir" in
45+
celt/x86|silk/x86)
4846
COND="x86 amd64"
4947
;;
48+
celt/arm|silk/arm)
49+
COND="arm64"
50+
;;
5051
esac
5152

5253
for file in $dir/*.c; do
@@ -69,6 +70,9 @@ for dir in src celt silk celt/x86 silk/float silk/x86; do
6970
*ne10*)
7071
# NE10 is an optional ARM library not typically available
7172
;;
73+
*/arm/*_map.c)
74+
# ARM RTCD map files are not needed when using PRESUME mode
75+
;;
7276
src/opus_compare.c)
7377
;;
7478
*)

inc-celt-arm-armcpu.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// +build arm64
2+
3+
package opus
4+
5+
/*
6+
#include <opus-1.5.2/celt/arm/armcpu.c>
7+
*/
8+
import "C"

inc-celt-arm-celt-neon-intr.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// +build arm64
2+
3+
package opus
4+
5+
/*
6+
#include <opus-1.5.2/celt/arm/celt_neon_intr.c>
7+
*/
8+
import "C"

inc-celt-arm-pitch-neon-intr.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// +build arm64
2+
3+
package opus
4+
5+
/*
6+
#include <opus-1.5.2/celt/arm/pitch_neon_intr.c>
7+
*/
8+
import "C"
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// +build arm64
2+
3+
package opus
4+
5+
/*
6+
#include <opus-1.5.2/silk/arm/LPC_inv_pred_gain_neon_intr.c>
7+
*/
8+
import "C"
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// +build arm64
2+
3+
package opus
4+
5+
/*
6+
#include <opus-1.5.2/silk/arm/NSQ_del_dec_neon_intr.c>
7+
*/
8+
import "C"

inc-silk-arm-NSQ-neon.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// +build arm64
2+
3+
package opus
4+
5+
/*
6+
#include <opus-1.5.2/silk/arm/NSQ_neon.c>
7+
*/
8+
import "C"
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// +build arm64
2+
3+
package opus
4+
5+
/*
6+
#include <opus-1.5.2/silk/arm/biquad_alt_neon_intr.c>
7+
*/
8+
import "C"

0 commit comments

Comments
 (0)