Skip to content

Commit 2d07251

Browse files
authored
Fix #17, optional loop unroll optimization (#18)
- fix #17, loop unroll option, improving performance, kudos to nt314p - added flag to select LOOP UNROLL (is optional as it gives larger code size) - update readme.md - minor edits
1 parent 60d5ebd commit 2d07251

File tree

8 files changed

+285
-69
lines changed

8 files changed

+285
-69
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
66
and this project adheres to [Semantic Versioning](http://semver.org/).
77

88

9+
## [0.4.0] - 2024-09-10
10+
- fix #17, loop unroll option, improving performance, kudos to nt314p
11+
- added flag to select LOOP UNROLL (is optional as it gives larger code size)
12+
- update readme.md
13+
- minor edits
14+
15+
----
16+
917
## [0.3.4] - 2024-07-22
1018
- add **void read(uint8_t \*array, uint8_t size)** (experimental)
1119
- update readme.md

FastShiftIn.cpp

Lines changed: 152 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//
22
// FILE: FastShiftIn.cpp
33
// AUTHOR: Rob Tillaart
4-
// VERSION: 0.3.4
4+
// VERSION: 0.4.0
55
// PURPOSE: Fast ShiftIn for 74HC165 register, AVR optimized
66
// DATE: 2013-09-29
77
// URL: https://github.com/RobTillaart/FastShiftIn
@@ -148,74 +148,196 @@ uint8_t FastShiftIn::readLSBFIRST()
148148
{
149149
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
150150

151+
#if defined(FASTSHIFTIN_AVR_LOOP_UNROLLED) // AVR SPEED OPTIMIZED #17
152+
153+
uint8_t rv = 0;
154+
uint8_t cbmask1 = _clockBit;
155+
uint8_t inmask1 = _dataInBit;
156+
157+
volatile uint8_t* localDataInRegister = _dataInRegister;
158+
volatile uint8_t* localClockRegister = _clockRegister;
159+
160+
// disable interrupts (for all bits)
161+
uint8_t oldSREG = SREG;
162+
noInterrupts();
163+
164+
uint8_t r = *localClockRegister;
165+
*localClockRegister |= cbmask1; // clock pulse HIGH
166+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01; // read one bit
167+
*localClockRegister = r; // clock pulse LOW
168+
169+
*localClockRegister |= cbmask1; // clock pulse HIGH
170+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02; // read one bit
171+
*localClockRegister = r; // clock pulse LOW
172+
173+
*localClockRegister |= cbmask1; // clock pulse HIGH
174+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04; // read one bit
175+
*localClockRegister = r; // clock pulse LOW
176+
177+
*localClockRegister |= cbmask1; // clock pulse HIGH
178+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08; // read one bit
179+
*localClockRegister = r; // clock pulse LOW
180+
181+
*localClockRegister |= cbmask1; // clock pulse HIGH
182+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10; // read one bit
183+
*localClockRegister = r; // clock pulse LOW
184+
185+
*localClockRegister |= cbmask1; // clock pulse HIGH
186+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20; // read one bit
187+
*localClockRegister = r; // clock pulse LOW
188+
189+
*localClockRegister |= cbmask1; // clock pulse HIGH
190+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40; // read one bit
191+
*localClockRegister = r; // clock pulse LOW
192+
193+
*localClockRegister |= cbmask1; // clock pulse HIGH
194+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80; // read one bit
195+
*localClockRegister = r; // clock pulse LOW
196+
197+
// restore interrupt state
198+
SREG = oldSREG;
199+
200+
_lastValue = rv;
201+
202+
#else // AVR SIZE OPTIMIZED
203+
151204
uint8_t rv = 0;
152205
uint8_t cbmask1 = _clockBit;
153-
uint8_t cbmask2 = ~_clockBit;
154206
uint8_t inmask1 = _dataInBit;
155207

208+
volatile uint8_t* localDataInRegister = _dataInRegister;
209+
volatile uint8_t* localClockRegister = _clockRegister;
210+
211+
// disable interrupts (for all bits)
212+
uint8_t oldSREG = SREG;
213+
noInterrupts();
214+
215+
uint8_t r = *localClockRegister;
216+
156217
for (uint8_t m = 0x01; m > 0; m <<= 1)
157218
{
158-
// remember state register
159-
uint8_t oldSREG = SREG;
160-
// disable interrupts
161-
noInterrupts();
162219
// clock pulse HIGH
163-
*_clockRegister |= cbmask1;
220+
*localClockRegister |= cbmask1;
164221
// read one bit
165-
if ((*_dataInRegister & inmask1) > 0) rv |= m;
222+
if ((*localDataInRegister & inmask1) > 0) rv |= m;
166223
// clock pulse LOW
167-
*_clockRegister &= cbmask2;
168-
// reset interrupts flag to previous state
169-
SREG = oldSREG;
224+
*localClockRegister = r;
170225
}
226+
227+
// reset interrupts flag to previous state
228+
SREG = oldSREG;
229+
171230
_lastValue = rv;
172-
return rv;
173231

174-
#else
232+
#endif // if (AVR)
233+
234+
#else // other platforms reference shiftOut()
175235

176236
// reference implementation
177237
_lastValue = shiftIn(_dataPinIn, _clockPin, LSBFIRST);
178-
return _lastValue;
179238

180239
#endif
240+
241+
// all paths will return _lastValue.
242+
return _lastValue;
181243
}
182244

183245

184246
uint8_t FastShiftIn::readMSBFIRST()
185247
{
186248
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
187249

188-
uint8_t rv = 0;
189-
uint8_t cbmask1 = _clockBit;
190-
uint8_t cbmask2 = ~_clockBit;
191-
uint8_t inmask1 = _dataInBit;
250+
#if defined(FASTSHIFTIN_AVR_LOOP_UNROLLED) // AVR SPEED OPTIMIZED
251+
252+
uint8_t rv = 0;
253+
uint8_t cbmask1 = _clockBit;
254+
uint8_t inmask1 = _dataInBit;
255+
256+
volatile uint8_t* localDataInRegister = _dataInRegister;
257+
volatile uint8_t* localClockRegister = _clockRegister;
258+
259+
// disable interrupts (for all bits)
260+
uint8_t oldSREG = SREG;
261+
noInterrupts();
262+
263+
uint8_t r = *localClockRegister;
264+
*localClockRegister |= cbmask1; // clock pulse HIGH
265+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80; // read one bit
266+
*localClockRegister = r; // clock pulse LOW
267+
268+
*localClockRegister |= cbmask1; // clock pulse HIGH
269+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40; // read one bit
270+
*localClockRegister = r; // clock pulse LOW
271+
272+
*localClockRegister |= cbmask1; // clock pulse HIGH
273+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20; // read one bit
274+
*localClockRegister = r; // clock pulse LOW
275+
276+
*localClockRegister |= cbmask1; // clock pulse HIGH
277+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10; // read one bit
278+
*localClockRegister = r; // clock pulse LOW
279+
280+
*localClockRegister |= cbmask1; // clock pulse HIGH
281+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08; // read one bit
282+
*localClockRegister = r; // clock pulse LOW
283+
284+
*localClockRegister |= cbmask1; // clock pulse HIGH
285+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04; // read one bit
286+
*localClockRegister = r; // clock pulse LOW
287+
288+
*localClockRegister |= cbmask1; // clock pulse HIGH
289+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02; // read one bit
290+
*localClockRegister = r; // clock pulse LOW
291+
292+
*localClockRegister |= cbmask1; // clock pulse HIGH
293+
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01; // read one bit
294+
*localClockRegister = r; // clock pulse LOW
295+
296+
// restore interrupt state
297+
SREG = oldSREG;
192298

299+
_lastValue = rv;
300+
301+
#else // AVR SIZE OPTIMIZED
302+
303+
uint8_t rv = 0;
304+
uint8_t cbmask1 = _clockBit;
305+
uint8_t inmask1 = _dataInBit;
306+
307+
volatile uint8_t* localDataInRegister = _dataInRegister;
308+
volatile uint8_t* localClockRegister = _clockRegister;
309+
310+
// disable interrupts (for all bits)
311+
uint8_t oldSREG = SREG;
312+
noInterrupts();
313+
314+
uint8_t r = *localClockRegister;
193315
for (uint8_t m = 0x80; m > 0; m >>= 1)
194316
{
195-
// remember state register
196-
uint8_t oldSREG = SREG;
197-
// disable interrupts
198-
noInterrupts();
199317
// clock pulse HIGH
200-
*_clockRegister |= cbmask1;
318+
*localClockRegister |= cbmask1;
201319
// read one bit
202-
if ((*_dataInRegister & inmask1) > 0) rv |= m;
320+
if ((*localDataInRegister & inmask1) > 0) rv |= m;
203321
// clock pulse LOW
204-
*_clockRegister &= cbmask2;
205-
// reset interrupts flag to previous state
206-
SREG = oldSREG;
322+
*localClockRegister = r;
207323
}
324+
325+
// reset interrupts flag to previous state
326+
SREG = oldSREG;
327+
208328
_lastValue = rv;
209-
return rv;
210329

211-
#else
330+
#endif // if (AVR)
331+
332+
#else // other platforms reference shiftOut()
212333

213334
// reference implementation
214335
_lastValue = shiftIn(_dataPinIn, _clockPin, MSBFIRST);
215-
return _lastValue;
216336

217337
#endif
218338

339+
// all paths will return _lastValue.
340+
return _lastValue;
219341
}
220342

221343

FastShiftIn.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// FILE: FastShiftIn.h
44
// AUTHOR: Rob Tillaart
5-
// VERSION: 0.3.4
5+
// VERSION: 0.4.0
66
// PURPOSE: Fast ShiftIn for 74HC165 register, AVR optimized
77
// DATE: 2013-09-29
88
// URL: https://github.com/RobTillaart/FastShiftIn
@@ -11,8 +11,10 @@
1111
#include "Arduino.h"
1212

1313

14-
#define FASTSHIFTIN_LIB_VERSION (F("0.3.4"))
14+
#define FASTSHIFTIN_LIB_VERSION (F("0.4.0"))
1515

16+
// uncomment next line to get SPEED OPTIMIZED CODE
17+
// #define FASTSHIFTIN_AVR_LOOP_UNROLLED 1
1618

1719
class FastShiftIn
1820
{

0 commit comments

Comments
 (0)