File tree Expand file tree Collapse file tree 1 file changed +76
-12
lines changed Expand file tree Collapse file tree 1 file changed +76
-12
lines changed Original file line number Diff line number Diff line change @@ -71,29 +71,93 @@ msa_large_loop:
71
71
no_msa:
72
72
// if less than 8 bytes, do one byte at a time
73
73
SGTU $8 , R2, R3
74
- BNE R3 , out
74
+ BNE R3, check4
75
75
76
- // do one byte at a time until 8 - aligned
76
+ // Check alignment
77
77
AND $7 , R1, R3
78
- BEQ R3 , words
78
+ BEQ R3, aligned
79
+
80
+ // Zero one byte at a time until we reach 8 byte alignment.
81
+ MOVV $8 , R5
82
+ SUBV R3, R5, R3
83
+ SUBV R3, R2, R2
84
+ align:
85
+ SUBV $1 , R3
79
86
MOVB R0, (R1)
80
87
ADDV $1 , R1
81
- JMP - 4 (PC)
88
+ BNE R3, align
82
89
83
- words:
84
- // do 8 bytes at a time if there is room
85
- ADDV $ - 7 , R4 , R2
90
+ aligned:
91
+ SGTU $8 , R2, R3
92
+ BNE R3, check4
93
+ SGTU $16 , R2, R3
94
+ BNE R3, zero8
95
+ SGTU $32 , R2, R3
96
+ BNE R3, zero16
97
+ SGTU $64 , R2, R3
98
+ BNE R3, zero32
99
+ loop64:
100
+ MOVV R0, (R1)
101
+ MOVV R0, 8 (R1)
102
+ MOVV R0, 16 (R1)
103
+ MOVV R0, 24 (R1)
104
+ MOVV R0, 32 (R1)
105
+ MOVV R0, 40 (R1)
106
+ MOVV R0, 48 (R1)
107
+ MOVV R0, 56 (R1)
108
+ ADDV $64 , R1
109
+ SUBV $64 , R2
110
+ SGTU $64 , R2, R3
111
+ BEQ R0, R3, loop64
112
+ BEQ R2, done
113
+
114
+ check32:
115
+ SGTU $32 , R2, R3
116
+ BNE R3, check16
117
+ zero32:
118
+ MOVV R0, (R1)
119
+ MOVV R0, 8 (R1)
120
+ MOVV R0, 16 (R1)
121
+ MOVV R0, 24 (R1)
122
+ ADDV $32 , R1
123
+ SUBV $32 , R2
124
+ BEQ R2, done
125
+
126
+ check16:
127
+ SGTU $16 , R2, R3
128
+ BNE R3, check8
129
+ zero16:
130
+ MOVV R0, (R1)
131
+ MOVV R0, 8 (R1)
132
+ ADDV $16 , R1
133
+ SUBV $16 , R2
134
+ BEQ R2, done
86
135
87
- SGTU R2 , R1 , R3
88
- BEQ R3 , out
136
+ check8:
137
+ SGTU $8 , R2, R3
138
+ BNE R3, check4
139
+ zero8:
89
140
MOVV R0, (R1)
90
141
ADDV $8 , R1
91
- JMP - 4 (PC)
142
+ SUBV $8 , R2
143
+ BEQ R2, done
92
144
93
- out :
145
+ check4:
146
+ SGTU $4 , R2, R3
147
+ BNE R3, loop1
148
+ zero4:
149
+ MOVB R0, (R1)
150
+ MOVB R0, 1 (R1)
151
+ MOVB R0, 2 (R1)
152
+ MOVB R0, 3 (R1)
153
+ ADDV $4 , R1
154
+ SUBV $4 , R2
155
+
156
+ loop1:
94
157
BEQ R1, R4, done
95
158
MOVB R0, (R1)
96
159
ADDV $1 , R1
97
- JMP - 3 (PC)
160
+ JMP loop1
98
161
done:
99
162
RET
163
+
You can’t perform that action at this time.
0 commit comments