Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0e0f365
Add missing optimizations for xtensa/ and hexagon/ to forntend ops un…
shlmregev Dec 9, 2025
e9ac9bc
Revert change to copyright year
shlmregev Dec 9, 2025
79cb6be
Fix wrong include paths
shlmregev Dec 10, 2025
0ce1740
Reorder include paths
shlmregev Dec 10, 2025
766cfea
Move static inline functions out of the namespace block.
shlmregev Dec 10, 2025
fbb25a3
Fix path to KissFFT
shlmregev Dec 10, 2025
8558a4b
Remove file that was added by mistake
shlmregev Dec 10, 2025
c641963
Realized a lot of these optimizations were already added under src/
shlmregev Dec 10, 2025
bff6c9b
Reverted a few more files
shlmregev Dec 10, 2025
9698a5b
Fix compilation errors
shlmregev Dec 10, 2025
d881049
Merge branch 'main' into missing-signal-opts
shlmregev Dec 11, 2025
34163e0
Porting Reduce_All reference operator porting from TFLite to TFLM (#3…
rameshkunasi Jan 8, 2026
cd624d5
Provide default values for uninitialized variable (#3282)
shlmregev Jan 9, 2026
4eab169
Add Ingenic MIPS port to README.md (#3255)
yinzara Jan 9, 2026
bbeb38f
Solve compiler errors in decode op (#3284)
shlmregev Jan 12, 2026
cc70a1a
Fix coding style
shlmregev Dec 10, 2025
5299fbf
Merge branch 'tensorflow:main' into missing-signal-opts
shlmregev Jan 12, 2026
14b1b80
Match FFT int16 test's tolerance with audiofrontend's
shlmregev Jan 12, 2026
cddcffc
Merge branch 'main' into missing-signal-opts
shlmregev Jan 12, 2026
26f3ab8
Fix failure in micro_speech example
shlmregev Jan 13, 2026
d17ba45
Increase tolerance of feature extraction
shlmregev Jan 13, 2026
f2fe804
Incerased error interval for micro speech detection
shlmregev Jan 13, 2026
061ab9b
Incerased error interval for micro speech detection
shlmregev Jan 13, 2026
b1111a5
Incerased error interval for micro speech detection
shlmregev Jan 13, 2026
753badb
Incerased error interval for micro speech detection
shlmregev Jan 13, 2026
24991ce
Comment on looser tolerance in micro speech test and limit it to Xtensa
shlmregev Jan 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion signal/micro/kernels/fft_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ TF_LITE_MICRO_TEST(RfftTestSize512Int16) {
tflite::tflm_signal::Register_RFFT_INT16();
// See (b/287518815) for why this is needed.
#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
int tolerance = 9;
int tolerance = 16;
#else // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
int tolerance = 3;
#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
Expand Down
110 changes: 110 additions & 0 deletions signal/micro/kernels/hexagon/hexagon_square_root.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

.section .note.GNU-stack,"",@progbits

// SignalHexagonSqrt32
// input: R0 unsigned 32-bit
// output: R0 unsigned 32-bit
//
// The assembly routine below implements the following:
//
// uint16_t Sqrt32(uint32_t num) {
// uint32_t res = 0;
// uint32_t bit = ((int32_t)1) << 30U;
// while (bit > num)
// bit >>= 2;
// while (bit != 0) {
// if (num >= res + bit) {
// num -= res + bit;
// res = (res >> 1U) + bit;
// } else {
// res >>= 1U;
// }
// bit >>= 2U;
// }
// // Do rounding
// if (num > res && num != 0xFFFF)
// ++res;
// return res;
// }

.text
.p2align 2
.p2align 4,,15
.globl SignalHexagonSqrt32
.type SignalHexagonSqrt32, @function

// Register mnemonics
#define num R0 // input - as in loop above
#define res R1 // as in loop aboe
#define bit R2 // as in loop above
#define temp R3 // the quantity bit + res
#define zcount R4 // leading zeroes
#define res_shift R5 // the quantity res >> 1
#define bit_shift R6 // the quantity bit >> 2

SignalHexagonSqrt32:
// Set bit to the largest even-power of two
// that is less than or equal to the input
{
res = #0 // return value
bit = ##1073741824 // 2^30
zcount = cl0(num) // count leading zeroes
}
zcount = clrbit(zcount, #0) // even power of 2
{
bit = lsr(bit, zcount) // 2^30 right shifted
if (cmp.eq(bit.new, #0)) jump:nt .done // return if bit == 0
}
.falign
.loop:
{
// Calculate quantities to be used in the conditional below
temp = add(bit, res)
res_shift = lsr(res, #1)
bit_shift = lsr(bit, #2)
}
{
// Conditionally assign to num and res
p0 = cmp.ltu(temp, num)
if (p0.new) num = sub(num, temp)
if (p0.new) res = add(res_shift, bit)
if (!p0.new) res = res_shift
}
{
// Advance bit >> 2 and exit loop if done
bit = bit_shift
if (cmp.gt(bit.new, #0)) jump:t .loop
}
.falign
.done:
// if (num > res && res != 0xffff) {
// ++res
// }
// return res in num (R0)
{
temp = ##65535
}
{
p0 = cmp.gt(num, res)
p0 = !cmp.eq(res, temp)
if (p0.new) num = add(res, #1)
if (!p0.new) num = res
}
{
jumpr r31
}
.size SignalHexagonSqrt32, .-SignalHexagonSqrt32
187 changes: 187 additions & 0 deletions signal/micro/kernels/hexagon/rfft_int16.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include <stdint.h>
#include <string.h>

#include "signal/src/msb.h"
#include "signal/src/rfft.h"

// Do not reorder these headers. "typedef.h" must appear before rfft.h
#include "typedef.h"
extern "C" {
#include "rfft.h"
}

namespace tflm_signal {

// TODO(b/467010877) The twiddle tables should come from the tflite file
// This array includes the first 3*512/4=384 elements in the twiddle table in:
// HEXAGON_Tools/<TOOL_VERSION>/Examples/libcore/SigProc/cxFFT_IFFT/include
const uint16_t twiddles[] __attribute__((aligned(8))) = {
0x7fff, 0x0000, 0x0000, 0x8000, 0x0000, 0x8000, 0x0000, 0x8000, 0xa57e,
0xa57e, 0xa57e, 0x5a82, 0x5a82, 0xa57e, 0xcf05, 0x89bf, 0x89bf, 0xcf05,
0xa57e, 0xa57e, 0x89bf, 0xcf05, 0x30fc, 0x7642, 0x7642, 0xcf05, 0xe708,
0x8276, 0xb8e4, 0x9593, 0xcf05, 0x89bf, 0x9593, 0xb8e4, 0xe708, 0x7d8a,
0x30fc, 0x89bf, 0xb8e4, 0x9593, 0x8276, 0x18f9, 0x89bf, 0xcf05, 0x8276,
0xe708, 0x6a6e, 0x471d, 0x7d8a, 0xe708, 0xf375, 0x809e, 0xdad8, 0x8583,
0xe708, 0x8276, 0x9d0e, 0xaecd, 0xc3aa, 0x70e3, 0x471d, 0x9593, 0xc3aa,
0x8f1e, 0x809e, 0xf375, 0x9593, 0xb8e4, 0x8583, 0xdad8, 0x5134, 0x62f2,
0x6a6e, 0xb8e4, 0xdad8, 0x8583, 0x9d0e, 0xaecd, 0xb8e4, 0x9593, 0x8f1e,
0xc3aa, 0x0c8c, 0x7f62, 0x18f9, 0x8276, 0xaecd, 0x9d0e, 0x8f1e, 0x3c57,
0x8276, 0xe708, 0x809e, 0xf375, 0x7a7d, 0x2528, 0x7f62, 0xf375, 0xf9b9,
0x8028, 0xed38, 0x8163, 0xf375, 0x809e, 0xa129, 0xaa0b, 0xb3c1, 0x66d0,
0x5134, 0x9d0e, 0xc946, 0x8c4b, 0x83d7, 0xe0e7, 0x9d0e, 0xaecd, 0x877c,
0xd4e1, 0x41ce, 0x6dca, 0x70e3, 0xc3aa, 0xe0e7, 0x83d7, 0xaa0b, 0xa129,
0xc3aa, 0x8f1e, 0x9236, 0xbe32, 0xf9b9, 0x7fd9, 0x2528, 0x8583, 0xb3c1,
0x9931, 0x877c, 0x2b1f, 0x8583, 0xdad8, 0x8163, 0xed38, 0x73b6, 0x36ba,
0x7a7d, 0xdad8, 0xed38, 0x8163, 0xc946, 0x8c4b, 0xdad8, 0x8583, 0x9931,
0xb3c1, 0xd4e1, 0x7885, 0x3c57, 0x8f1e, 0xbe32, 0x9236, 0x8028, 0x0648,
0x8f1e, 0xc3aa, 0x83d7, 0xe0e7, 0x5ed7, 0x55f6, 0x62f2, 0xaecd, 0xd4e1,
0x877c, 0x9236, 0xbe32, 0xaecd, 0x9d0e, 0x8c4b, 0xc946, 0x1f1a, 0x7c2a,
0x0c8c, 0x809e, 0xaa0b, 0xa129, 0x9931, 0x4c40, 0x809e, 0xf375, 0x8028,
0xf9b9, 0x7e9d, 0x12c8, 0x7fd9, 0xf9b9, 0xfcdc, 0x800a, 0xf696, 0x8059,
0xf9b9, 0x8028, 0xa34c, 0xa7be, 0xac65, 0x60ec, 0x55f6, 0xa129, 0xcc22,
0x8afc, 0x8676, 0xd7da, 0xa129, 0xaa0b, 0x8894, 0xd1ef, 0x398d, 0x7255,
0x73b6, 0xc946, 0xe3f5, 0x831d, 0xb141, 0x9b18, 0xc946, 0x8c4b, 0x93dc,
0xbb86, 0xf055, 0x7f0a, 0x2b1f, 0x877c, 0xb64c, 0x975a, 0x84a3, 0x2224,
0x877c, 0xd4e1, 0x81e3, 0xea1e, 0x6f5f, 0x3f17, 0x7c2a, 0xe0e7, 0xf055,
0x80f7, 0xd1ef, 0x8894, 0xe0e7, 0x83d7, 0x9b18, 0xb141, 0xcc22, 0x7505,
0x41ce, 0x9236, 0xc0e9, 0x90a1, 0x800a, 0xfcdc, 0x9236, 0xbe32, 0x84a3,
0xdddd, 0x5843, 0x5cb4, 0x66d0, 0xb3c1, 0xd7da, 0x8676, 0x975a, 0xb64c,
0xb3c1, 0x9931, 0x8dab, 0xc674, 0x15e2, 0x7e1e, 0x12c8, 0x8163, 0xac65,
0x9f14, 0x93dc, 0x447b, 0x8163, 0xed38, 0x8059, 0xf696, 0x7ce4, 0x1c0c,
0x7e9d, 0xed38, 0xf696, 0x8059, 0xe3f5, 0x831d, 0xed38, 0x8163, 0x9f14,
0xac65, 0xbb86, 0x6c24, 0x4c40, 0x9931, 0xc674, 0x8dab, 0x81e3, 0xea1e,
0x9931, 0xb3c1, 0x8676, 0xd7da, 0x49b4, 0x68a7, 0x6dca, 0xbe32, 0xdddd,
0x84a3, 0xa34c, 0xa7be, 0xbe32, 0x9236, 0x90a1, 0xc0e9, 0x0324, 0x7ff6,
0x1f1a, 0x83d7, 0xb141, 0x9b18, 0x8afc, 0x33df, 0x83d7, 0xe0e7, 0x80f7,
0xf055, 0x776c, 0x2e11, 0x7885, 0xd4e1, 0xea1e, 0x81e3, 0xc0e9, 0x90a1,
0xd4e1, 0x877c, 0x975a, 0xb64c, 0xdddd, 0x7b5d, 0x36ba, 0x8c4b, 0xbb86,
0x93dc, 0x80f7, 0x0fab, 0x8c4b, 0xc946, 0x831d, 0xe3f5, 0x64e9, 0x4ec0,
0x5ed7, 0xaa0b, 0xd1ef, 0x8894, 0x8dab, 0xc674, 0xaa0b, 0xa129, 0x8afc,
0xcc22, 0x2827, 0x798a, 0x0648, 0x8028, 0xa7be, 0xa34c, 0x9f14, 0x539b,
0x8028, 0xf9b9, 0x800a, 0xfcdc, 0x7fa7, 0x096b, 0x7ff6, 0xfcdc, 0xfe6e,
0x8003, 0xfb4a, 0x8017, 0xfcdc, 0x800a, 0xa463, 0xa69c, 0xa8e3, 0x5dc8,
0x5843, 0xa34c, 0xcd92, 0x8a5b, 0x8806, 0xd368, 0xa34c, 0xa7be, 0x8927,
0xd079, 0x354e, 0x7460, 0x7505, 0xcc22, 0xe57e, 0x82c7, 0xb505, 0x9843,
0xcc22, 0x8afc, 0x94b6, 0xba33, 0xebab, 0x7e60, 0x2e11, 0x8894, 0xb797,
0x9674, 0x8377, 0x1d93, 0x8894, 0xd1ef, 0x822a, 0xe893, 0x6cf9, 0x4326,
0x7ce4, 0xe3f5, 0xf1e5, 0x80c8, 0xd65d, 0x86f7, 0xe3f5, 0x831d, 0x9c11,
0xb005, 0xc7dc, 0x7308, 0x447b, 0x93dc, 0xc248, 0x8fdd, 0x803e, 0xf827,
0x93dc, 0xbb86, 0x8511, 0xdc5a, 0x54ca, 0x5fe4, 0x68a7, 0xb64c, 0xd958,
0x85fb, 0x9a23, 0xb27f, 0xb64c, 0x975a, 0x8e62, 0xc50e, 0x113a, 0x7ed6,
0x15e2, 0x81e3, 0xad97, 0x9e0f, 0x916a, 0x4074, 0x81e3, 0xea1e, 0x8079,
0xf505, 0x7bc6, 0x209f, 0x7f0a, 0xf055, 0xf827, 0x803e, 0xe893, 0x822a,
0xf055, 0x80f7, 0xa01d, 0xab36, 0xb797, 0x698c, 0x4ec0, 0x9b18, 0xc7dc,
0x8cf9, 0x82c7, 0xe57e, 0x9b18, 0xb141, 0x86f7, 0xd65d, 0x45cd, 0x6b4b,
0x6f5f, 0xc0e9, 0xdf61, 0x843b, 0xa69c, 0xa463, 0xc0e9, 0x90a1, 0x916a,
0xbf8d, 0xfe6e, 0x7ffe, 0x2224, 0x84a3, 0xb27f, 0x9a23, 0x8927, 0x2f87,
0x84a3, 0xdddd, 0x812b, 0xeec7, 0x75a6, 0x326e, 0x798a, 0xd7da, 0xebab,
0x81a1, 0xc50e, 0x8e62, 0xd7da, 0x8676, 0x9843, 0xb505, 0xd958, 0x7a06,
0x398d, 0x8dab, 0xbcdb, 0x9307, 0x8079, 0x0afb, 0x8dab, 0xc674, 0x8377,
0xe26d, 0x61f1, 0x5269, 0x60ec, 0xac65, 0xd368, 0x8806, 0x8fdd, 0xc248,
0xac65, 0x9f14, 0x8ba1, 0xcab3, 0x23a7, 0x7aef, 0x096b, 0x8059, 0xa8e3,
0xa239, 0x9c11, 0x4ffb, 0x8059, 0xf696, 0x8017, 0xfb4a, 0x7f38, 0x0e1c,
0x7fa7, 0xf696, 0xfb4a, 0x8017, 0xf1e5, 0x80c8, 0xf696, 0x8059, 0xa239,
0xa8e3, 0xb005, 0x63ef, 0x539b, 0x9f14, 0xcab3, 0x8ba1, 0x8511, 0xdc5a,
0x9f14, 0xac65, 0x8806, 0xd368, 0x3db8, 0x7023, 0x7255, 0xc674, 0xe26d,
0x8377, 0xad97, 0x9e0f, 0xc674, 0x8dab, 0x9307, 0xbcdb, 0xf505, 0x7f87,
0x2827, 0x8676, 0xb505, 0x9843, 0x85fb, 0x26a8, 0x8676, 0xd7da, 0x81a1,
0xebab, 0x719e, 0x3af3, 0x7b5d, 0xdddd, 0xeec7, 0x812b, 0xcd92, 0x8a5b,
0xdddd, 0x84a3, 0x9a23, 0xb27f, 0xd079, 0x76d9, 0x3f17, 0x90a1, 0xbf8d,
0x916a, 0x8003, 0x0192, 0x90a1, 0xc0e9, 0x843b, 0xdf61, 0x5b9d, 0x5964,
0x64e9, 0xb141, 0xd65d, 0x86f7, 0x94b6, 0xba33, 0xb141, 0x9b18, 0x8cf9,
0xc7dc, 0x1a83, 0x7d3a, 0x0fab, 0x80f7, 0xab36, 0xa01d, 0x9674, 0x486a,
0x80f7, 0xf055, 0x803e, 0xf827, 0x7dd6, 0x176e, 0x7e1e, 0xea1e, 0xf505,
0x8079, 0xdf61, 0x843b, 0xea1e, 0x81e3, 0x9e0f, 0xad97, 0xbf8d, 0x6e97,
0x49b4, 0x975a, 0xc50e, 0x8e62, 0x812b, 0xeec7, 0x975a, 0xb64c, 0x85fb,
0xd958, 0x4d81, 0x65de, 0x6c24, 0xbb86, 0xdc5a, 0x8511, 0xa01d, 0xab36,
0xbb86, 0x93dc, 0x8fdd, 0xc248, 0x07d9, 0x7fc2, 0x1c0c, 0x831d, 0xb005,
0x9c11, 0x8cf9, 0x3825, 0x831d, 0xe3f5, 0x80c8, 0xf1e5, 0x790a, 0x29a4,
0x776c, 0xd1ef, 0xe893, 0x822a, 0xbcdb, 0x9307, 0xd1ef, 0x8894, 0x9674,
0xb797, 0xe26d, 0x7c89, 0x33df, 0x8afc, 0xba33, 0x94b6, 0x81a1, 0x1455,
0x8afc, 0xcc22, 0x82c7, 0xe57e, 0x67bd, 0x4afb, 0x5cb4, 0xa7be, 0xd079,
0x8927, 0x8ba1, 0xcab3, 0xa7be, 0xa34c, 0x8a5b, 0xcd92, 0x2c99, 0x77fb,
0x0324, 0x800a, 0xa69c, 0xa463, 0xa239, 0x571e, 0x800a, 0xfcdc, 0x8003,
0xfe6e, 0x7fea, 0x04b6};

// Twiddle factors used for the last stage of N-point real FFT
// generated as j*W^k, k=1, 2, ... N/4
// That's 128 complex int16_t elements
// Or 256 real int16_t elements
const uint16_t rtwiddles[] __attribute__((aligned(8))) = {
0x0192, 0x7ffe, 0x0324, 0x7ff6, 0x04b6, 0x7fea, 0x0648, 0x7fd9, 0x07d9,
0x7fc2, 0x096b, 0x7fa7, 0x0afb, 0x7f87, 0x0c8c, 0x7f62, 0x0e1c, 0x7f38,
0x0fab, 0x7f0a, 0x113a, 0x7ed6, 0x12c8, 0x7e9d, 0x1455, 0x7e60, 0x15e2,
0x7e1e, 0x176e, 0x7dd6, 0x18f9, 0x7d8a, 0x1a83, 0x7d3a, 0x1c0c, 0x7ce4,
0x1d93, 0x7c89, 0x1f1a, 0x7c2a, 0x209f, 0x7bc6, 0x2224, 0x7b5d, 0x23a7,
0x7aef, 0x2528, 0x7a7d, 0x26a8, 0x7a06, 0x2827, 0x798a, 0x29a4, 0x790a,
0x2b1f, 0x7885, 0x2c99, 0x77fb, 0x2e11, 0x776c, 0x2f87, 0x76d9, 0x30fc,
0x7642, 0x326e, 0x75a6, 0x33df, 0x7505, 0x354e, 0x7460, 0x36ba, 0x73b6,
0x3825, 0x7308, 0x398d, 0x7255, 0x3af3, 0x719e, 0x3c57, 0x70e3, 0x3db8,
0x7023, 0x3f17, 0x6f5f, 0x4074, 0x6e97, 0x41ce, 0x6dca, 0x4326, 0x6cf9,
0x447b, 0x6c24, 0x45cd, 0x6b4b, 0x471d, 0x6a6e, 0x486a, 0x698c, 0x49b4,
0x68a7, 0x4afb, 0x67bd, 0x4c40, 0x66d0, 0x4d81, 0x65de, 0x4ec0, 0x64e9,
0x4ffb, 0x63ef, 0x5134, 0x62f2, 0x5269, 0x61f1, 0x539b, 0x60ec, 0x54ca,
0x5fe4, 0x55f6, 0x5ed7, 0x571e, 0x5dc8, 0x5843, 0x5cb4, 0x5964, 0x5b9d,
0x5a82, 0x5a82, 0x5b9d, 0x5964, 0x5cb4, 0x5843, 0x5dc8, 0x571e, 0x5ed7,
0x55f6, 0x5fe4, 0x54ca, 0x60ec, 0x539b, 0x61f1, 0x5269, 0x62f2, 0x5134,
0x63ef, 0x4ffb, 0x64e9, 0x4ec0, 0x65de, 0x4d81, 0x66d0, 0x4c40, 0x67bd,
0x4afb, 0x68a7, 0x49b4, 0x698c, 0x486a, 0x6a6e, 0x471d, 0x6b4b, 0x45cd,
0x6c24, 0x447b, 0x6cf9, 0x4326, 0x6dca, 0x41ce, 0x6e97, 0x4074, 0x6f5f,
0x3f17, 0x7023, 0x3db8, 0x70e3, 0x3c57, 0x719e, 0x3af3, 0x7255, 0x398d,
0x7308, 0x3825, 0x73b6, 0x36ba, 0x7460, 0x354e, 0x7505, 0x33df, 0x75a6,
0x326e, 0x7642, 0x30fc, 0x76d9, 0x2f87, 0x776c, 0x2e11, 0x77fb, 0x2c99,
0x7885, 0x2b1f, 0x790a, 0x29a4, 0x798a, 0x2827, 0x7a06, 0x26a8, 0x7a7d,
0x2528, 0x7aef, 0x23a7, 0x7b5d, 0x2224, 0x7bc6, 0x209f, 0x7c2a, 0x1f1a,
0x7c89, 0x1d93, 0x7ce4, 0x1c0c, 0x7d3a, 0x1a83, 0x7d8a, 0x18f9, 0x7dd6,
0x176e, 0x7e1e, 0x15e2, 0x7e60, 0x1455, 0x7e9d, 0x12c8, 0x7ed6, 0x113a,
0x7f0a, 0x0fab, 0x7f38, 0x0e1c, 0x7f62, 0x0c8c, 0x7f87, 0x0afb, 0x7fa7,
0x096b, 0x7fc2, 0x07d9, 0x7fd9, 0x0648, 0x7fea, 0x04b6, 0x7ff6, 0x0324,
0x7ffe, 0x0192, 0x7fff, 0x0000};

struct RfftState {
int16_t* aligned_input;
int32_t fft_length;
};

size_t RfftInt16GetNeededMemory(int32_t fft_length) {
return sizeof(RfftState) + 2 * sizeof(int16_t) * fft_length;
}

void* RfftInt16Init(int32_t fft_length, void* state, size_t state_size) {
RfftState* rfft_state = (RfftState*)state;
int16_t* unaligned_buffer = (int16_t*)(rfft_state + 1);
rfft_state->aligned_input =
(int16_t*)((uint32_t)(unaligned_buffer + fft_length) &
~((1 << tflite::tflm_signal::MostSignificantBit32(
fft_length)) -
1));
rfft_state->fft_length = fft_length;
return state;
}

void RfftInt16Apply(void* state, const int16_t* input,
Complex<int16_t>* output) {
RfftState* rfft_state = (RfftState*)state;
memcpy(rfft_state->aligned_input, input,
rfft_state->fft_length * sizeof(int16_t));
rfft(rfft_state->aligned_input, rfft_state->fft_length, (CWord2x16*)twiddles,
(CWord2x16*)rtwiddles, (CWord2x16*)output);
return;
}

} // namespace tflm_signal
28 changes: 28 additions & 0 deletions signal/micro/kernels/hexagon/square_root_32.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "signal/src/square_root.h"

extern "C" uint16_t SignalHexagonSqrt32(uint32_t num);

namespace tflite {
namespace tflm_signal {

// SignalHexagonSqrt32() is defined in assembly. This C wrapper is only
// necessary to force TFLM's source specialization to pick up the optimized
// Hexagon implementation instead of the portable one.
uint16_t Sqrt32(uint32_t num) { return SignalHexagonSqrt32(num); }

} // namespace tflm_signal
} // namespace tflite
Loading