Skip to content

Commit dda9ea0

Browse files
authored
Merge pull request #2363 from jepler/samd-neopixel
samd: neopixel: Fix neopixels after #2297
2 parents 8497ea8 + 1905d07 commit dda9ea0

File tree

1 file changed

+63
-94
lines changed
  • ports/atmel-samd/common-hal/neopixel_write

1 file changed

+63
-94
lines changed

ports/atmel-samd/common-hal/neopixel_write/__init__.c

Lines changed: 63 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -34,32 +34,70 @@
3434
#ifdef SAMD51
3535
#include "hri/hri_cmcc_d51.h"
3636
#include "hri/hri_nvmctrl_d51.h"
37-
38-
// This magical macro makes sure the delay isn't optimized out and is the
39-
// minimal three instructions.
40-
#define delay_cycles(cycles) \
41-
{ \
42-
uint32_t t; \
43-
asm volatile ( \
44-
"movs %[t], %[c]\n\t" \
45-
"loop%=:\n\t" \
46-
"subs %[t], #1\n\t" \
47-
"bne.n loop%=" : [t] "=r"(t) : [c] "I" (cycles)); \
48-
}
4937
#endif
5038

51-
// Ensure this code is compiled with -Os. Any other optimization level may change the timing of it
52-
// and break neopixels.
53-
#pragma GCC push_options
54-
#pragma GCC optimize ("Os")
39+
__attribute__((naked,noinline,aligned(16)))
40+
static void neopixel_send_buffer_core(volatile uint32_t *clraddr, uint32_t pinMask,
41+
const uint8_t *ptr, int numBytes);
42+
43+
static void neopixel_send_buffer_core(volatile uint32_t *clraddr, uint32_t pinMask,
44+
const uint8_t *ptr, int numBytes) {
45+
asm volatile(" push {r4, r5, r6, lr};"
46+
" add r3, r2, r3;"
47+
"loopLoad:"
48+
" ldrb r5, [r2, #0];" // r5 := *ptr
49+
" add r2, #1;" // ptr++
50+
" movs r4, #128;" // r4-mask, 0x80
51+
"loopBit:"
52+
" str r1, [r0, #4];" // set
53+
#ifdef SAMD21
54+
" movs r6, #3; d2: sub r6, #1; bne d2;" // delay 3
55+
#endif
56+
#ifdef SAMD51
57+
" movs r6, #3; d2: subs r6, #1; bne d2;" // delay 3
58+
#endif
59+
" tst r4, r5;" // mask&r5
60+
" bne skipclr;"
61+
" str r1, [r0, #0];" // clr
62+
"skipclr:"
63+
#ifdef SAMD21
64+
" movs r6, #6; d0: sub r6, #1; bne d0;" // delay 6
65+
#endif
66+
#ifdef SAMD51
67+
" movs r6, #6; d0: subs r6, #1; bne d0;" // delay 6
68+
#endif
69+
" str r1, [r0, #0];" // clr (possibly again, doesn't matter)
70+
#ifdef SAMD21
71+
" asr r4, r4, #1;" // mask >>= 1
72+
#endif
73+
#ifdef SAMD51
74+
" asrs r4, r4, #1;" // mask >>= 1
75+
#endif
76+
" beq nextbyte;"
77+
" uxtb r4, r4;"
78+
#ifdef SAMD21
79+
" movs r6, #2; d1: sub r6, #1; bne d1;" // delay 2
80+
#endif
81+
#ifdef SAMD51
82+
" movs r6, #2; d1: subs r6, #1; bne d1;" // delay 2
83+
#endif
84+
" b loopBit;"
85+
"nextbyte:"
86+
" cmp r2, r3;"
87+
" bcs neopixel_stop;"
88+
" b loopLoad;"
89+
"neopixel_stop:"
90+
" pop {r4, r5, r6, pc};"
91+
"");
92+
}
5593

5694
uint64_t next_start_tick_ms = 0;
5795
uint32_t next_start_tick_us = 1000;
5896

5997
void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout, uint8_t *pixels, uint32_t numBytes) {
6098
// This is adapted directly from the Adafruit NeoPixel library SAMD21G18A code:
6199
// https://github.com/adafruit/Adafruit_NeoPixel/blob/master/Adafruit_NeoPixel.cpp
62-
uint8_t *ptr, *end, p, bitMask;
100+
// and the asm version from https://github.com/microsoft/uf2-samdx1/blob/master/inc/neopixel.h
63101
uint32_t pinMask;
64102
PortGroup* port;
65103

@@ -71,100 +109,32 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
71109
mp_hal_disable_all_interrupts();
72110

73111

74-
#ifdef SAMD21
75-
// Make sure the NVM cache is consistently timed.
76-
NVMCTRL->CTRLB.bit.READMODE = NVMCTRL_CTRLB_READMODE_DETERMINISTIC_Val;
77-
#endif
78-
79112
#ifdef SAMD51
80113
// When this routine is positioned at certain addresses, the timing logic
81114
// below can be too fast by about 2.5x. This is some kind of (un)fortunate code
82-
// positiong with respect to a cache line.
115+
// positioning with respect to a cache line.
83116
// Theoretically we should turn on off the CMCC caches and the
84117
// NVM caches to ensure consistent timing. Testing shows the the NVMCTRL
85118
// cache disabling seems to make the difference. But turn both off to make sure.
86119
// It's difficult to test because additions to the code before the timing loop
87-
// below change instruction placement. Testing was done by adding cache changes
88-
// below the loop (so only the first time through is wrong).
120+
// below change instruction placement. (though this should be less true now that
121+
// the main code is in the cache-aligned function neopixel_send_buffer_core)
122+
// Testing was done by adding cache changes below the loop (so only the
123+
// first time through is wrong).
89124
//
90125
// Turn off instruction, data, and NVM caches to force consistent timing.
91126
// Invalidate existing cache entries.
92127
hri_cmcc_set_CFG_reg(CMCC, CMCC_CFG_DCDIS | CMCC_CFG_ICDIS);
93128
hri_cmcc_write_MAINT0_reg(CMCC, CMCC_MAINT0_INVALL);
94129
hri_nvmctrl_set_CTRLA_CACHEDIS0_bit(NVMCTRL);
95130
hri_nvmctrl_set_CTRLA_CACHEDIS1_bit(NVMCTRL);
96-
#endif
131+
#endif
97132

98133
uint32_t pin = digitalinout->pin->number;
99134
port = &PORT->Group[GPIO_PORT(pin)]; // Convert GPIO # to port register
100135
pinMask = (1UL << (pin % 32)); // From port_pin_set_output_level ASF code.
101-
ptr = pixels;
102-
end = ptr + numBytes;
103-
p = *ptr++;
104-
bitMask = 0x80;
105-
106-
volatile uint32_t *set = &(port->OUTSET.reg),
107-
*clr = &(port->OUTCLR.reg);
108-
109-
for(;;) {
110-
*set = pinMask;
111-
// This is the time where the line is always high regardless of the bit.
112-
// For the SK6812 its 0.3us +- 0.15us
113-
#ifdef SAMD21
114-
asm("nop; nop;");
115-
#endif
116-
#ifdef SAMD51
117-
delay_cycles(2);
118-
#endif
119-
if((p & bitMask) != 0) {
120-
// This is the high delay unique to a one bit.
121-
// For the SK6812 its 0.3us
122-
#ifdef SAMD21
123-
asm("nop; nop; nop; nop; nop; nop; nop;");
124-
#endif
125-
#ifdef SAMD51
126-
delay_cycles(3);
127-
#endif
128-
*clr = pinMask;
129-
} else {
130-
*clr = pinMask;
131-
// This is the low delay unique to a zero bit.
132-
// For the SK6812 its 0.3us
133-
#ifdef SAMD21
134-
asm("nop; nop;");
135-
#endif
136-
#ifdef SAMD51
137-
delay_cycles(2);
138-
#endif
139-
}
140-
if((bitMask >>= 1) != 0) {
141-
// This is the delay between bits in a byte and is the 1 code low
142-
// level time from the datasheet.
143-
// For the SK6812 its 0.6us +- 0.15us
144-
#ifdef SAMD21
145-
asm("nop; nop; nop; nop; nop;");
146-
#endif
147-
#ifdef SAMD51
148-
delay_cycles(4);
149-
#endif
150-
} else {
151-
if(ptr >= end) break;
152-
p = *ptr++;
153-
bitMask = 0x80;
154-
// This is the delay between bytes. It's similar to the other branch
155-
// in the if statement except its tuned to account for the time the
156-
// above operations take.
157-
// For the SK6812 its 0.6us +- 0.15us
158-
#ifdef SAMD51
159-
delay_cycles(3);
160-
#endif
161-
}
162-
}
163-
164-
#ifdef SAMD21
165-
// Speed up! (But inconsistent timing.)
166-
NVMCTRL->CTRLB.bit.READMODE = NVMCTRL_CTRLB_READMODE_NO_MISS_PENALTY_Val;
167-
#endif
136+
volatile uint32_t *clr = &(port->OUTCLR.reg);
137+
neopixel_send_buffer_core(clr, pinMask, pixels, numBytes);
168138

169139
#ifdef SAMD51
170140
// Turn instruction, data, and NVM caches back on.
@@ -189,4 +159,3 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
189159

190160
}
191161

192-
#pragma GCC pop_options

0 commit comments

Comments
 (0)