diff --git a/Arduino_Code/duco_hash.cpp b/Arduino_Code/duco_hash.cpp index 48113816..7af618a7 100644 --- a/Arduino_Code/duco_hash.cpp +++ b/Arduino_Code/duco_hash.cpp @@ -23,10 +23,10 @@ void duco_hash_block(duco_hash_state_t * hasher) { for (uint8_t i = 10; i < 80; i++) { if (i >= 16) { - w[i & 15] = sha1_rotl(1, w[(i-3) & 15] ^ w[(i-8) & 15] ^ w[(i-14) & 15] ^ w[(i-16) & 15]); + w[i & 15] = sha1_rotl(1,w[(i-3) & 15] ^ w[(i-8) & 15] ^ w[(i-14) & 15] ^ w[(i-16) & 15]); } - uint32_t temp = sha1_rotl(5, a) + e + w[i & 15]; + uint32_t temp = sha1_rotl5(a) + e + w[i & 15]; if (i < 20) { temp += (b & c) | ((~b) & d); temp += 0x5a827999; @@ -43,7 +43,7 @@ void duco_hash_block(duco_hash_state_t * hasher) { e = d; d = c; - c = sha1_rotl(30, b); + c = sha1_rotl30(b); b = a; a = temp; } @@ -105,13 +105,13 @@ void duco_hash_init(duco_hash_state_t * hasher, char const * prevHash) { } for (uint8_t i = 0; i < 10; i++) { - uint32_t temp = sha1_rotl(5, a) + e + w[i & 15]; + uint32_t temp = sha1_rotl5(a) + e + w[i & 15]; temp += (b & c) | ((~b) & d); temp += 0x5a827999; e = d; d = c; - c = sha1_rotl(30, b); + c = sha1_rotl30(b); b = a; a = temp; } diff --git a/Arduino_Code/duco_hash.h b/Arduino_Code/duco_hash.h index 462b75a9..055aa780 100644 --- a/Arduino_Code/duco_hash.h +++ b/Arduino_Code/duco_hash.h @@ -14,6 +14,16 @@ struct duco_hash_state_t { uint8_t total_bytes; }; +#ifdef __cplusplus +extern "C" { +#endif +// Declare external assembly functions +extern uint32_t sha1_rotl5(uint32_t val); +extern uint32_t sha1_rotl30(uint32_t val); +#ifdef __cplusplus +} +#endif + void duco_hash_init(duco_hash_state_t * hasher, char const * prevHash); uint8_t const * duco_hash_try_nonce(duco_hash_state_t * hasher, char const * nonce); diff --git a/Arduino_Code/duco_hash_asm.S b/Arduino_Code/duco_hash_asm.S new file mode 100644 index 00000000..791e905d --- /dev/null +++ b/Arduino_Code/duco_hash_asm.S @@ -0,0 +1,75 @@ +.global sha1_rotl5 +.global sha1_rotl30 + +; Input: +; - a value in r25:r22 (little-endian: r22 is LSB, r25 is MSB) +; Output: +; - temp result in r25:r22 +sha1_rotl5: + clr r20 ; Will accumulate the 5 carry-out bits + + lsl r22 ; Rotate #1 + rol r23 + rol r24 + rol r25 + rol r20 + + lsl r22 ; Rotate #2 + rol r23 + rol r24 + rol r25 + rol r20 + + lsl r22 ; Rotate #3 + rol r23 + rol r24 + rol r25 + rol r20 + + lsl r22 ; Rotate #4 + rol r23 + rol r24 + rol r25 + rol r20 + + lsl r22 ; Rotate #5 + rol r23 + rol r24 + rol r25 + rol r20 ; Now r20 contains the 5 bits that wrapped around + + ; r20 now contains bits 31..27 (original MSBs) in its lowest 5 bits + ; OR them into r22 (LSB) + or r22, r20 + + ret + +; Input: 32-bit value in r25:r22 (r25=MSB, r22=LSB) +; Output: Rotated result in r25:r22 +sha1_rotl30: + ; Save original bits 0-1 from the LSB (r22) and shift them to positions 6-7. + mov r20, r22 ; r20 = original r22 + andi r20, 0x03 ; r20 = (r22 & 0x03) + ; Multiply r20 by 64 (i.e. shift left by 6) + lsl r20 + lsl r20 + lsl r20 + lsl r20 + lsl r20 + lsl r20 ; now r20 = (r22 & 0x03) << 6 + + ; Shift the entire 32-bit value right by 2 bits (which is equivalent to rotating left by 30) + lsr r25 ; shift MSB (r25) right by 1; its LSB goes into carry + ror r24 ; rotate r24 right through carry + ror r23 ; rotate r23 right through carry + ror r22 ; rotate r22 right through carry -- first shift + + lsr r25 ; second shift: shift r25 right by 1 + ror r24 + ror r23 + ror r22 ; second shift + + ; OR the saved bits into the MSB (r25) + or r25, r20 + + ret