@@ -109,53 +109,23 @@ clearge32:
109
109
// For size >= 4KB, XC is loop unrolled 16 times (4KB = 256B * 16)
110
110
clearge4KB:
111
111
XC $256 , 0 (R4), 0 (R4)
112
- ADD $256 , R4
113
- ADD $-256 , R5
114
- XC $256 , 0 (R4), 0 (R4)
115
- ADD $256 , R4
116
- ADD $-256 , R5
117
- XC $256 , 0 (R4), 0 (R4)
118
- ADD $256 , R4
119
- ADD $-256 , R5
120
- XC $256 , 0 (R4), 0 (R4)
121
- ADD $256 , R4
122
- ADD $-256 , R5
123
- XC $256 , 0 (R4), 0 (R4)
124
- ADD $256 , R4
125
- ADD $-256 , R5
126
- XC $256 , 0 (R4), 0 (R4)
127
- ADD $256 , R4
128
- ADD $-256 , R5
129
- XC $256 , 0 (R4), 0 (R4)
130
- ADD $256 , R4
131
- ADD $-256 , R5
132
- XC $256 , 0 (R4), 0 (R4)
133
- ADD $256 , R4
134
- ADD $-256 , R5
135
- XC $256 , 0 (R4), 0 (R4)
136
- ADD $256 , R4
137
- ADD $-256 , R5
138
- XC $256 , 0 (R4), 0 (R4)
139
- ADD $256 , R4
140
- ADD $-256 , R5
141
- XC $256 , 0 (R4), 0 (R4)
142
- ADD $256 , R4
143
- ADD $-256 , R5
144
- XC $256 , 0 (R4), 0 (R4)
145
- ADD $256 , R4
146
- ADD $-256 , R5
147
- XC $256 , 0 (R4), 0 (R4)
148
- ADD $256 , R4
149
- ADD $-256 , R5
150
- XC $256 , 0 (R4), 0 (R4)
151
- ADD $256 , R4
152
- ADD $-256 , R5
153
- XC $256 , 0 (R4), 0 (R4)
154
- ADD $256 , R4
155
- ADD $-256 , R5
156
- XC $256 , 0 (R4), 0 (R4)
157
- ADD $256 , R4
158
- ADD $-256 , R5
112
+ XC $256 , 256 (R4), 256 (R4)
113
+ XC $256 , 512 (R4), 512 (R4)
114
+ XC $256 , 768 (R4), 768 (R4)
115
+ XC $256 , 1024 (R4), 1024 (R4)
116
+ XC $256 , 1280 (R4), 1280 (R4)
117
+ XC $256 , 1536 (R4), 1536 (R4)
118
+ XC $256 , 1792 (R4), 1792 (R4)
119
+ XC $256 , 2048 (R4), 2048 (R4)
120
+ XC $256 , 2304 (R4), 2304 (R4)
121
+ XC $256 , 2560 (R4), 2560 (R4)
122
+ XC $256 , 2816 (R4), 2816 (R4)
123
+ XC $256 , 3072 (R4), 3072 (R4)
124
+ XC $256 , 3328 (R4), 3328 (R4)
125
+ XC $256 , 3584 (R4), 3584 (R4)
126
+ XC $256 , 3840 (R4), 3840 (R4)
127
+ ADD $4096 , R4
128
+ ADD $-4096 , R5
159
129
CMP R5, $4096
160
130
BGE clearge4KB
161
131
@@ -180,7 +150,7 @@ clear32to255:
180
150
clear32:
181
151
VZERO V1
182
152
VST V1, 0 (R4)
183
- VST V1, 16 (R4)
153
+ VST V1, 16 (R4)
184
154
RET
185
155
186
156
clear33to64:
0 commit comments