@@ -40,7 +40,7 @@ scratch RAM and the stack pointer is overwritten.
4040#define CTAG8 0x33
4141#define CTAG9 0x34
4242#define CTAG10 0x35 @ not used
43- #define CTAG11 0x36
43+ #define CTAG11 0x36 @ not used
4444#define CTAG12 0x37
4545#define CTAG13 0x38
4646#define CTAG14 0x39
@@ -93,6 +93,8 @@ scratch RAM and the stack pointer is overwritten.
9393.endif
9494.endm
9595
96+ @ Clear internal stripe load registers , and r0 - r3
97+ @ 0 <= offset <= 32
9698.macro clear03 offset= 0
9799 getchaffaddress r0 , \offset
98100 ldmia r0 , {r0 - r3}
@@ -158,6 +160,10 @@ RKshareC: @ Round key common share C; see comment at init_key
158160.space 4
159161RKshareCchange: @ Temporary used by ref_roundkey_share_s
160162.space 4
163+ IV0: @ 2 - way share of IV for block 0
164+ .space 36 @ Considering IV0 as a word pointer , the form at is IV = IV0 [ 0 , 1 , 2 , 3 ] ^ (IV0 [ 5 , 6 , 7 , 8 ], ror # 16 )
165+ @ The gap at IV0 [ 4 ] is to defe at unsharing by internal striped memory registers
166+ @ I.e. , there are implicit XORs IV0 [ 0 ] ^IV0 [ 4 ], IV0 [ 1 ] ^IV0 [ 5 ], ... , th at the 1 word offset renders useless
161167
162168@ Regardless of configuration , the code uses a single 256 - entry LUT ,
163169@ which is a simple S - box table.
@@ -323,11 +329,11 @@ gen_rand_sha:
323329 ldr r2 , =rstate_sha
324330 ldr r0 ,[ r2 , #jstate - rstate_sha ]
325331 movs r1 , # 1
326- movs r3 , r0 , lsl # 2
327- ands r3 , r3 , # 31
328- movs r3 , r1 , lsl r3 @ 1 <<( 4 * (r0& 7 ))
329- udiv r3 , r3 , r1 @ Takes constant + (r0& 7 ) cycles
330- lsrs r0 , r0 , # 1
332+ ands r3 , r0 , # 3
333+ movs r3 , r3 , lsl # 2
334+ movs r3 , r1 , lsl r3 @ 1 <<( 4 * (r0& 3 ))
335+ udiv r3 , r3 , r1 @ Takes constant + (r0& 3 ) cycles
336+ lsrs r0 , r0 , # 2
331337 bne 1f
332338 bl gen_rand_sha_nonpres
333339 ldr r2 , =rstate_sha
@@ -352,6 +358,7 @@ gen_rand_sha_nonpres:
352358 strb r3 ,[ r2 ] @ save updated SUM register offset in bottom byte of rstate_sha []
353359 bx r14
3543601 :
361+ @ [ CK_JITTER code was here ]
355362 movs r3 , #SHA256_SUM6_OFFSET + 1
356363 strb r3 ,[ r2 ] @ reset word counter: the + 1 is compensated for later
357364 movw r1 , #( 1 <<SHA256_CSR_BSWAP_LSB) + ( 1 <<SHA256_CSR_START_LSB)
@@ -437,10 +444,13 @@ gen_rand_lfsr_nonpres:
437444.balign 4
438445.thumb_func
439446decrypt:
447+ @ r0= 4 - way key , r1=IV_shareA , r2=IV_shareB , r3=message buffer , [ r13 ] =number of blocks
448+ ldr r12 ,[ r13 ] @ Pop 5th argument in r12 (which we are allowed to tre at as scratch according to AAPCS)
440449 push { r14 }
441450 GET_CANARY r14 , CTAG3 , 6
442451 SET_COUNT 23 , 6
443- push {r0 - r12 , r14 }
452+ push {r4 - r11 , r14 }
453+ push {r0 - r3 , r12 } @ Save the five arguments
444454 bl reset_sha_trng
445455 bl init_rstate
446456@ randomly re - share the LUT contents
@@ -463,11 +473,11 @@ decrypt:
463473 bl init_key_4way
464474 CHK_COUNT 31 , 6
465475 bl lock_key
466- pop {r0 - r2}
476+ pop {r0 - r3} @ r0=IV_shareA , r1=IV_shareB , r2=message , r3=num blocks
467477 bl ctr_crypt_s
468478 bl randomisechaff
469479 clear03
470- pop {r4 - r12 , r14 }
480+ pop {r4 - r11 , r14 }
471481 CHK_CANARY r14 , CTAG3 , 6
472482 pop { r15 }
473483
@@ -859,7 +869,7 @@ ref_roundkey_hvperms_s_exit: @ label exit point to be to able to specify to ana
859869.if ST_VPERM
860870.balign 4
861871.thumb_func
862- @ Rotate share registers r4 - r7 , r8 - r11 (r4 - >r5 - r6 - >r7 - >r4 etc.) by an addtional amount
872+ @ Cycle share registers r4 - r7 , r8 - r11 (r4 - >r5 - r6 - >r7 - >r4 etc.) by an addtional amount
863873@ given in the bottom two bits of R0 and update the rotation recorded at statevperm.
864874@ On entry R1 must point to statevperm.
865875@ Trashes r0 - r3 , r12
@@ -901,46 +911,7 @@ addstatevperm_exit: @ label exit point to be to able to specify to ana
901911 bx r14
902912.endif
903913
904- @ Switch from non - shared to shared state
905- @ Trashes r0 - r3 , r12
906- .balign 4
907- ns_to_s:
908- GET_CANARY r12 , CTAG11 , 6
909- push { r12 , r14 }
910- .if ST_SHAREC
911- bl gen_rand_sha_nonpres @ Create state share C ; all bytes the same
912- ands r0 , r0 , # 255
913- orrs r0 , r0 , r0 , lsl # 8
914- orrs r12 , r0 , r0 , lsl # 16
915- ldr r1 , =shareC
916- str r12 ,[ r1 ]
917- .else
918- movs r12 , # 0
919- .endif
920- bl gen_rand_sha_nonpres
921- eors r4 , r4 , r0
922- eor r8 , r12 , r0 , ror # 16
923- bl gen_rand_sha_nonpres
924- eors r5 , r5 , r0
925- eor r9 , r12 , r0 , ror # 16
926- bl gen_rand_sha_nonpres
927- eors r6 , r6 , r0
928- eor r10 , r12 , r0 , ror # 16
929- bl gen_rand_sha_nonpres
930- eors r7 , r7 , r0
931- eor r11 , r12 , r0 , ror # 16
932- .if ST_VPERM
933- bl gen_rand_sha_nonpres
934- ldr r1 , =statevperm
935- movs r2 , # 0
936- str r2 ,[ r1 ]
937- bl addstatevperm @ Initialise state vperm with SHA RNG , refresh with LFSR RNG
938- .endif
939- pop { r12 , r14 }
940- CHK_CANARY r12 , CTAG11 , 6
941- bx r14
942-
943- @ Conjugate lut_a , lut_b with shareC
914+ @ Conjugate lut_a , lut_b with (state) shareC
944915@ I.e. , EOR the input and output with shareC.
945916@ We need to pick one input for each share A and B , and one output for ONE of the shares A and B
946917@ Arbitrarily choosing a0 , b1 and d0
@@ -1653,44 +1624,65 @@ addrkey_s:
16531624.endif
16541625
16551626ctr_crypt_s:
1656- @ r0=IV , r1=cipher/plaintext buffer , r2 =number of blocks
1627+ @ r0=IV_shareA , r1=IV_shareB , r2= cipher/plaintext buffer, r3 =number of blocks
16571628 GET_CANARY r12 , CTAG0 , 6
16581629 push {r0 - r12 , r14 } @ save all registers so th at when we restore we overwrite any secrets
16591630
1660- push {r0 - r2}
1631+ push {r0 - r3}
1632+
16611633 SET_COUNT 93 , 6
16621634
16631635.if CT_BPERM
16641636@ Initialise 32 random numbers (which fit in half - words)
1637+ @ r3=number of blocks
16651638 ldr r4 , =bperm_rand
16661639 movs r5 , # 32
166716401 :
16681641 bl gen_rand_sha
1669- umull r0 , r3 , r0 , r2 @ Random number between 0 and n - 1 (n=#blocks)
1670- strh r3 ,[ r4 ], # 2
1642+ umull r0 , r2 , r0 , r3 @ Random number between 0 and n - 1 (n=#blocks)
1643+ strh r2 ,[ r4 ], # 2
16711644 subs r5 , r5 , # 1
16721645 bne 1b
16731646.endif
16741647
16751648 bl randomisechaff
1676- pop {r0 - r2}
1649+
1650+ @ Refresh IVshareA and IVshareB , convert to ror # 16 form at and store the result at IV0
1651+ @ Not doing shareC or state vperm at this point
1652+ pop {r0}
1653+ ldmia r0 , {r4 - r7} @ r4 - r7 = IVshareA
1654+ clear03 16
1655+ pop {r1}
1656+ ldmia r1 , { r8 - r11 } @ r8 - r11 = IVshareB
1657+ clear03 32
1658+ bl gen_rand_sha_nonpres ; eors r4,r4,r0; mov r8, r8, ror#16; eor r8, r8, r0,ror#16
1659+ bl gen_rand_sha_nonpres ; eors r5,r5,r0; mov r9, r9, ror#16; eor r9, r9, r0,ror#16
1660+ bl gen_rand_sha_nonpres ; eors r6,r6,r0; mov r10,r10,ror#16; eor r10,r10,r0,ror#16
1661+ bl gen_rand_sha_nonpres ; eors r7,r7,r0; mov r11,r11,ror#16; eor r11,r11,r0,ror#16
1662+ ldr r0 , =IV0
1663+ stmia r0 , {r4 - r7}
1664+ adds r0 , r0 , # 20
1665+ stmia r0 , { r8 - r11 }
1666+ pop {r1 , r2}
1667+ @ r1=cipher/plaintext buffer , r2=number of blocks
1668+
16771669 movs r3 , # 0
16781670 CHK_COUNT 93 , 6
16791671
16801672ctr_crypt_mainloop:
16811673 SET_COUNT 80 , 6
1682- @ here r0=IV , r1=cipher/plaintext buffer, r2=number of blocks , r3=block counter
1674+ @ r1=cipher/plaintext buffer , r2=number of blocks , r3=block counter
16831675
16841676@ Do as much preparatory stuff as possible th at doesn't involve the IV (to reduce interaction with it)
1685- push {r0 - r3}
1677+ push {r1 - r3}
16861678@ It 's OK for execution time to depend on the block counter r3 (" public") , but not the block number (secret)
16871679
16881680 tst r3 , #(REFCHAFF_PERIOD - 1 )
16891681 bne 1f
16901682 bl refreshchaff_and_lfsr
169116831 :
16921684
1693- ldr r3 ,[ r13 , # 12 ] @ get block count off the stack
1685+ ldr r3 ,[ r13 , # 8 ] @ get block count off the stack
16941686 tst r3 , #(REMAP_PERIOD - 1 )
16951687 bne 1f
16961688 bl remap @ shuffle the LUTs ; this preserves R3
@@ -1702,21 +1694,21 @@ ctr_crypt_mainloop:
17021694 bl ref_roundkey_shares_s @ refresh the round key shares
170316951 :
17041696
1705- ldr r3 ,[ r13 , # 12 ] @ get block count off the stack
1697+ ldr r3 ,[ r13 , # 8 ] @ get block count off the stack
17061698 tst r3 , #(REFROUNDKEYHVPERMS_PERIOD - 1 )
17071699 bne 1f
17081700 bl ref_roundkey_hvperms_s @ refresh the round key vperms
170917011 :
17101702
17111703 CHK_COUNT 81 , 6
17121704
1713- pop {r0 - r3}
1714- @ r0=IV , r1=cipher/plaintext buffer, r2=number of blocks , r3=block counter
1705+ pop {r1 - r3}
1706+ @ r1=cipher/plaintext buffer , r2=number of blocks , r3=block counter
17151707
17161708@ Now calculate r12 = block number - to - be - deciphered from r3 = block counter
17171709.if CT_BPERM
17181710@ Use a "swap-or-not" method to generate an "oblivious" permutation ; see makeperm.py version 7
1719- push {r0 , r1}
1711+ push {r1}
17201712 ldr r0 , =murmur3_constants
17211713 ldmia r0 , { r9 - r12 , r14 } @ load five murmur3_32 hash constants
17221714 ldr r0 , =bperm_rand
@@ -1752,57 +1744,53 @@ ctr_crypt_mainloop:
17521744 adds r4 , r4 , r7 @ r4=j if top bit of r6 , else i
17531745 subs r1 , r1 , # 1
17541746 bpl 1b
1755- pop {r0 , r1}
1747+ pop {r1}
17561748 mov r12 , r4
17571749.else
17581750 mov r12 , r3
17591751.endif
17601752 CHK_COUNT 82 , 6
17611753
1762- @ r0=IV , r1=cipher/plaintext buffer , r2=number of blocks , r3=block counter (monotonic) , r12 =block number (block to be deciphered)
1763- push {r0 - r3 , r12 }
1754+ @ r1=cipher/plaintext buffer , r2=number of blocks , r3=block counter (monotonic) , r12 =block number (block to be deciphered)
1755+ push {r1 - r3 , r12 }
1756+ @ r4 - r11 = IV0 , r12 =block number
17641757
17651758processIV: @ non - target label to assist power analysis
1766-
1767- @ It is not clear if the following addition of the block number in r12 to the IV can usefully
1768- @ be done in terms of shares. Instead we do an addition and subtraction whose overall effect
1769- @ is the same , and which provides a small degree of masking. The IV is not traditionally a secret ,
1770- @ though it will make it harder for the attacker if it is obscured.
1771- bl gen_rand_sha
1772- movs r8 , r0 , lsr# 16 @ only use 16 low bits so we don't get any overflows in the following , and so th at a carry from the first word is rare
1773- add r9 , r8 , r12 @ "masked" block number
1774- @ r8 =random , r9 =(block number) + r8 , stack=IV , ...
1775-
1776- ldr r0 ,[ r13 ] @ peek at stack to restore r0=IV ptr
1777- ldmia r0 , {r4 - r7} @ load IV
1778- clear03 @ barrier to remove traces of IV from internal CPU load registers
1779-
1780- @ Add in r9 in byte - big - endian , bit - little - endian (!) fashion , while trying to avoid rev operations
1781- @ as far as possible as these tend to expose (via power fluctuations) byte - level hamming weights.
1782- @ First do 128 - bit addition of r9 to byte - reversed IV
1783- rev r7 , r7
1784- cmn r7 , #MAX_NUM_BLOCKS @ Compare against maximum number of blocks
1785- bcs 1f
1786- add r7 , r7 , r9 @ This can temporarily overflow but it doesn't matter as we know th at r7 + r12 does not overflow
1787- sub r7 , r7 , r8
1788- b 2f
1789- 1 :
1790- adds r7 , r7 , r9
1791- rev r6 , r6 ; adcs r6,r6,#0
1792- rev r5 , r5 ; adcs r5,r5,#0
1793- rev r4 , r4 ; adcs r4,r4,#0
1794- @ Now do 128 - bit subtraction of r8 from byte - reversed IV
1795- subs r7 , r7 , r8
1796- sbcs r6 , r6 , # 0 ; rev r6,r6
1797- sbcs r5 , r5 , # 0 ; rev r5,r5
1798- sbcs r4 , r4 , # 0 ; rev r4,r4
1799- 2 :
1800- rev r7 , r7
1801- clear01 16
1759+ ldr r8 , =IV0
1760+ ldmia r8 , {r4 - r7} @ load IV0_A
1761+ clear03 16
1762+ add r8 , r8 , # 20
1763+ ldmia r8 , { r8 - r11 } @ load IV0_B
1764+ clear03 32
1765+ rev r0 , r12
1766+ eor r7 , r7 , r0 @ XOR in block number to IV0. IV(block n) = IV0 ^ n , cf standard CTR mode IV0 + n.
1767+ @ XOR (vs addition) is compatible with XOR - shares , so stealthier/simpler because don't have to unshare to work out IV(block n)
1768+ @ r4 - r11 = IV for the current block
18021769 CHK_COUNT 83 , 6
1770+ .if ST_SHAREC
1771+ bl gen_rand_sha_nonpres @ Create state share C ; all bytes the same
1772+ ands r0 , r0 , # 255
1773+ orrs r0 , r0 , r0 , lsl # 8
1774+ orrs r12 , r0 , r0 , lsl # 16
1775+ ldr r1 , =shareC
1776+ str r12 ,[ r1 ]
1777+ .else
1778+ movs r12 , # 0
1779+ .endif
1780+ @ r4 - r11 = IV for the current block w/o shareC , r12 =shareC
1781+ @ refresh state shares and mix in shareC
1782+ bl gen_rand_sha_nonpres ; eors r4,r4,r0; eor r4,r4,r12; movs r1,#0; eor r8, r8, r0,ror#16 @ Barriers between shares to prevent implicit r4^r8 etc
1783+ bl gen_rand_sha_nonpres ; eors r5,r5,r0; eor r5,r5,r12; movs r1,#0; eor r9, r9, r0,ror#16
1784+ bl gen_rand_sha_nonpres ; eors r6,r6,r0; eor r6,r6,r12; movs r1,#0; eor r10,r10,r0,ror#16
1785+ bl gen_rand_sha_nonpres ; eors r7,r7,r0; eor r7,r7,r12; movs r1,#0; eor r11,r11,r0,ror#16
1786+ .if ST_VPERM
1787+ bl gen_rand_sha_nonpres
1788+ ldr r1 , =statevperm
1789+ movs r2 , # 0
1790+ str r2 ,[ r1 ]
1791+ bl addstatevperm @ Initialise state vperm (use SHA RNG to start with , later refreshes are with LFSR RNG)
1792+ .endif
18031793
1804- @ r4 - r7 = IV for the current block
1805- bl ns_to_s @ convert IV + x to shares , which includes choosing and incorporating a random shareC
18061794 CHK_COUNT 84 , 6
18071795 bl conjshareC @ Add the effect of shareC to lut_a , lut_b
18081796 CHK_COUNT 85 , 6
@@ -1849,9 +1837,9 @@ rounds_s_mainloop:
18491837 bl addstatevperm
18501838.endif
18511839
1852- pop {r0 - r3 , r12 }
1853- push {r0 , r3}
1854- @ r0=IV , r1=cipher/plaintext buffer, r2=number of blocks , r3=block counter , r12 =block to be deciphered
1840+ pop {r1 - r3 , r12 }
1841+ push {r3}
1842+ @ r1=cipher/plaintext buffer , r2=number of blocks , r3=block counter , r12 =block to be deciphered
18551843
18561844decryption_start:
18571845@ Decrypt ciphertext using AES output in shares: r4 - r11
@@ -1893,8 +1881,8 @@ decryption_start:
18931881 sub r1 , r1 , r12 , lsl # 4 @ Restore r1 to point to start of buffer
18941882 CHK_COUNT 90 , 6
18951883
1896- pop {r0 , r3} @ Restore IV and block counter
1897- @ r0=IV , r1=cipher/plaintext buffer, r2=number of blocks , r3=block counter
1884+ pop {r3} @ Restore block counter
1885+ @ r1=cipher/plaintext buffer , r2=number of blocks , r3=block counter
18981886decryption_end:
18991887
19001888 adds r3 , r3 , # 1
0 commit comments