@@ -109,53 +109,23 @@ clearge32:
109109// For size >= 4KB, XC is loop unrolled 16 times (4KB = 256B * 16)
110110clearge4KB:
111111 XC $256 , 0 (R4), 0 (R4)
112- ADD $256 , R4
113- ADD $-256 , R5
114- XC $256 , 0 (R4), 0 (R4)
115- ADD $256 , R4
116- ADD $-256 , R5
117- XC $256 , 0 (R4), 0 (R4)
118- ADD $256 , R4
119- ADD $-256 , R5
120- XC $256 , 0 (R4), 0 (R4)
121- ADD $256 , R4
122- ADD $-256 , R5
123- XC $256 , 0 (R4), 0 (R4)
124- ADD $256 , R4
125- ADD $-256 , R5
126- XC $256 , 0 (R4), 0 (R4)
127- ADD $256 , R4
128- ADD $-256 , R5
129- XC $256 , 0 (R4), 0 (R4)
130- ADD $256 , R4
131- ADD $-256 , R5
132- XC $256 , 0 (R4), 0 (R4)
133- ADD $256 , R4
134- ADD $-256 , R5
135- XC $256 , 0 (R4), 0 (R4)
136- ADD $256 , R4
137- ADD $-256 , R5
138- XC $256 , 0 (R4), 0 (R4)
139- ADD $256 , R4
140- ADD $-256 , R5
141- XC $256 , 0 (R4), 0 (R4)
142- ADD $256 , R4
143- ADD $-256 , R5
144- XC $256 , 0 (R4), 0 (R4)
145- ADD $256 , R4
146- ADD $-256 , R5
147- XC $256 , 0 (R4), 0 (R4)
148- ADD $256 , R4
149- ADD $-256 , R5
150- XC $256 , 0 (R4), 0 (R4)
151- ADD $256 , R4
152- ADD $-256 , R5
153- XC $256 , 0 (R4), 0 (R4)
154- ADD $256 , R4
155- ADD $-256 , R5
156- XC $256 , 0 (R4), 0 (R4)
157- ADD $256 , R4
158- ADD $-256 , R5
112+ XC $256 , 256 (R4), 256 (R4)
113+ XC $256 , 512 (R4), 512 (R4)
114+ XC $256 , 768 (R4), 768 (R4)
115+ XC $256 , 1024 (R4), 1024 (R4)
116+ XC $256 , 1280 (R4), 1280 (R4)
117+ XC $256 , 1536 (R4), 1536 (R4)
118+ XC $256 , 1792 (R4), 1792 (R4)
119+ XC $256 , 2048 (R4), 2048 (R4)
120+ XC $256 , 2304 (R4), 2304 (R4)
121+ XC $256 , 2560 (R4), 2560 (R4)
122+ XC $256 , 2816 (R4), 2816 (R4)
123+ XC $256 , 3072 (R4), 3072 (R4)
124+ XC $256 , 3328 (R4), 3328 (R4)
125+ XC $256 , 3584 (R4), 3584 (R4)
126+ XC $256 , 3840 (R4), 3840 (R4)
127+ ADD $4096 , R4
128+ ADD $-4096 , R5
159129 CMP R5, $4096
160130 BGE clearge4KB
161131
@@ -180,7 +150,7 @@ clear32to255:
180150clear32:
181151 VZERO V1
182152 VST V1, 0 (R4)
183- VST V1, 16 (R4)
153+ VST V1, 16 (R4)
184154 RET
185155
186156clear33to64:
0 commit comments