Skip to content

Commit c78e515

Browse files
Performance improvements (75% faster) in data processing.
The accelerometer data has to be processed before the sending it to the model. For the ML Trainer filters, before: ~5843 microsec (~373072 cycles) After: ~1405 microsec (~ 80872 cycles) 1/4 of the time.
1 parent 2a93b24 commit c78e515

File tree

2 files changed

+49
-9
lines changed

2 files changed

+49
-9
lines changed

mlrunner/mldataprocessor.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,13 @@ MldpReturn_t filterStdDev(const float *data_in, const int in_size, float *data_o
6969
}
7070

7171
float std = 0;
72+
float f = 0;
7273
for (int i = 0; i < in_size; i++) {
73-
std += (data_in[i] - mean) * (data_in[i] - mean);
74+
f = data_in[i] - mean;
75+
std += f * f;
7476
}
7577
std /= in_size;
76-
*data_out = sqrt(std);
78+
*data_out = sqrtf(std);
7779

7880
return MLDP_SUCCESS;
7981
}
@@ -112,8 +114,8 @@ MldpReturn_t filterPeaks(const float *data_in, const int in_size, float *data_ou
112114
stdFilter[lag - 1] = stdDev_lag;
113115

114116
for (int i = lag; i < in_size; i++) {
115-
if (fabs(data_in[i] - avgFilter[i - 1]) > 0.1 &&
116-
fabs(data_in[i] - avgFilter[i - 1]) > threshold * stdFilter[i - 1]
117+
if (fabsf(data_in[i] - avgFilter[i - 1]) > 0.1f &&
118+
fabsf(data_in[i] - avgFilter[i - 1]) > threshold * stdFilter[i - 1]
117119
) {
118120
if (data_in[i] > avgFilter[i - 1]) {
119121
signals[i] = +1; // positive signal
@@ -124,7 +126,7 @@ MldpReturn_t filterPeaks(const float *data_in, const int in_size, float *data_ou
124126
signals[i] = -1; // negative signal
125127
}
126128
// make influence lower
127-
filteredY[i] = influence * data_in[i] + (1 - influence) * filteredY[i - 1];
129+
filteredY[i] = influence * data_in[i] + (1.0f - influence) * filteredY[i - 1];
128130
} else {
129131
signals[i] = 0; // no signal
130132
filteredY[i] = data_in[i];
@@ -151,7 +153,7 @@ MldpReturn_t filterTotalAcc(const float *data_in, const int in_size, float *data
151153

152154
float total = 0;
153155
for (int i = 0; i < in_size; i++) {
154-
total += fabs(data_in[i]);
156+
total += fabsf(data_in[i]);
155157
}
156158
*data_out = total;
157159

@@ -186,7 +188,7 @@ MldpReturn_t filterRms(const float *data_in, const int in_size, float *data_out,
186188
for (int i = 0; i < in_size; i++) {
187189
rms += data_in[i] * data_in[i];
188190
}
189-
*data_out = sqrt(rms / in_size);
191+
*data_out = sqrtf(rms / in_size);
190192

191193
return MLDP_SUCCESS;
192194
}

testextension.cpp

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,40 @@
1818
#define DEBUG_PRINT(...)
1919
#endif
2020

21+
22+
static inline void start_ticks_cpu() {
23+
CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
24+
DWT->CYCCNT = 0;
25+
DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
26+
}
27+
28+
static inline uint32_t ticks_cpu() {
29+
return DWT->CYCCNT;
30+
}
31+
32+
static uint32_t ticks[10];
33+
static uint32_t ticks_index = 0;
34+
static bool ticks_start_average = false;
35+
static inline uint32_t calcTicks(uint32_t ticks_start, uint32_t ticks_end) {
36+
ticks[ticks_index] = ticks_end - ticks_start;
37+
ticks_index++;
38+
if (ticks_index >= 9) {
39+
ticks_start_average = true;
40+
ticks_index = 0;
41+
}
42+
43+
if (!ticks_start_average) {
44+
return 0;
45+
}
46+
47+
uint32_t ticksAverage = 0;
48+
for (size_t i = 0; i < 10; i++) {
49+
ticksAverage += ticks[i];
50+
}
51+
return ticksAverage / 10;
52+
}
53+
54+
2155
namespace testrunner {
2256
static ml_actions_t *actions = NULL;
2357
static ml_predictions_t *predictions = NULL;
@@ -43,7 +77,9 @@ namespace testrunner {
4377

4478
unsigned int time_start = system_timer_current_time_us();
4579

80+
int32_t ticks_start = ticks_cpu() & 0x7FFFFFFF;
4681
float *modelData = mlDataProcessor.getProcessedData();
82+
int32_t ticks_end = ticks_cpu() & 0x7FFFFFFF;
4783
if (modelData == NULL) {
4884
DEBUG_PRINT("Failed to processed data for the model\n");
4985
uBit.panic(TEST_RUNNER_ERROR + 21);
@@ -60,8 +96,8 @@ namespace testrunner {
6096

6197
unsigned int time_end = system_timer_current_time_us();
6298

63-
DEBUG_PRINT("Prediction (%d micros + %d micros): ",
64-
time_mid - time_start, time_end - time_mid);
99+
DEBUG_PRINT("Prediction (%d micros + %d micros, %d ticks): ",
100+
time_mid - time_start, time_end - time_mid, calcTicks(ticks_start, ticks_end));
65101
if (predictions->index >= 0) {
66102
DEBUG_PRINT("%d %s\n",
67103
predictions->index,
@@ -218,6 +254,8 @@ namespace testrunner {
218254
uBit.messageBus.listen(TEST_RUNNER_ID_TIMER, ML_CODAL_TIMER_VALUE, &recordAccData, MESSAGE_BUS_LISTENER_DROP_IF_BUSY);
219255
uBit.timer.eventEvery(samplesPeriodMillisec, TEST_RUNNER_ID_TIMER, ML_CODAL_TIMER_VALUE);
220256

257+
start_ticks_cpu();
258+
221259
initialised = true;
222260

223261
DEBUG_PRINT("\tModel loaded\n");

0 commit comments

Comments
 (0)