@@ -132,8 +132,108 @@ cfg_select! {
132
132
}
133
133
}
134
134
}
135
+ target_arch = "loongarch64" => {
136
+ fn analyze_source_file_dispatch(
137
+ src: & str ,
138
+ lines: & mut Vec <RelativeBytePos >,
139
+ multi_byte_chars: & mut Vec <MultiByteChar >,
140
+ ) {
141
+ use std:: arch:: is_loongarch_feature_detected;
142
+
143
+ if is_loongarch_feature_detected!( "lsx" ) {
144
+ unsafe {
145
+ analyze_source_file_lsx( src, lines, multi_byte_chars) ;
146
+ }
147
+ } else {
148
+ analyze_source_file_generic(
149
+ src,
150
+ src. len( ) ,
151
+ RelativeBytePos :: from_u32( 0 ) ,
152
+ lines,
153
+ multi_byte_chars,
154
+ ) ;
155
+ }
156
+ }
157
+
158
+ /// Checks 16 byte chunks of text at a time. If the chunk contains
159
+ /// something other than printable ASCII characters and newlines, the
160
+ /// function falls back to the generic implementation. Otherwise it uses
161
+ /// LSX intrinsics to quickly find all newlines.
162
+ #[ target_feature( enable = "lsx" ) ]
163
+ unsafe fn analyze_source_file_lsx(
164
+ src: & str ,
165
+ lines: & mut Vec <RelativeBytePos >,
166
+ multi_byte_chars: & mut Vec <MultiByteChar >,
167
+ ) {
168
+ use std:: arch:: loongarch64:: * ;
169
+
170
+ const CHUNK_SIZE : usize = 16 ;
171
+
172
+ let ( chunks, tail) = src. as_bytes( ) . as_chunks:: <CHUNK_SIZE >( ) ;
173
+
174
+ // This variable keeps track of where we should start decoding a
175
+ // chunk. If a multi-byte character spans across chunk boundaries,
176
+ // we need to skip that part in the next chunk because we already
177
+ // handled it.
178
+ let mut intra_chunk_offset = 0 ;
179
+
180
+ for ( chunk_index, chunk) in chunks. iter( ) . enumerate( ) {
181
+ let chunk = unsafe { lsx_vld:: <0 >( chunk. as_ptr( ) as * const i8 ) } ;
182
+
183
+ // For character in the chunk, see if its byte value is < 0, which
184
+ // indicates that it's part of a UTF-8 char.
185
+ let multibyte_mask = lsx_vmskltz_b( chunk) ;
186
+ // Create a bit mask from the comparison results.
187
+ let multibyte_mask = lsx_vpickve2gr_w:: <0 >( multibyte_mask) ;
188
+
189
+ // If the bit mask is all zero, we only have ASCII chars here:
190
+ if multibyte_mask == 0 {
191
+ assert!( intra_chunk_offset == 0 ) ;
192
+
193
+ // Check for newlines in the chunk
194
+ let newlines_test = lsx_vseqi_b:: <{ b'\n' as i32 } >( chunk) ;
195
+ let newlines_mask = lsx_vmskltz_b( newlines_test) ;
196
+ let mut newlines_mask = lsx_vpickve2gr_w:: <0 >( newlines_mask) ;
197
+
198
+ let output_offset = RelativeBytePos :: from_usize( chunk_index * CHUNK_SIZE + 1 ) ;
199
+
200
+ while newlines_mask != 0 {
201
+ let index = newlines_mask. trailing_zeros( ) ;
202
+
203
+ lines. push( RelativeBytePos ( index) + output_offset) ;
204
+
205
+ // Clear the bit, so we can find the next one.
206
+ newlines_mask &= newlines_mask - 1 ;
207
+ }
208
+ } else {
209
+ // The slow path.
210
+ // There are multibyte chars in here, fallback to generic decoding.
211
+ let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
212
+ intra_chunk_offset = analyze_source_file_generic(
213
+ & src[ scan_start..] ,
214
+ CHUNK_SIZE - intra_chunk_offset,
215
+ RelativeBytePos :: from_usize( scan_start) ,
216
+ lines,
217
+ multi_byte_chars,
218
+ ) ;
219
+ }
220
+ }
221
+
222
+ // There might still be a tail left to analyze
223
+ let tail_start = src. len( ) - tail. len( ) + intra_chunk_offset;
224
+ if tail_start < src. len( ) {
225
+ analyze_source_file_generic(
226
+ & src[ tail_start..] ,
227
+ src. len( ) - tail_start,
228
+ RelativeBytePos :: from_usize( tail_start) ,
229
+ lines,
230
+ multi_byte_chars,
231
+ ) ;
232
+ }
233
+ }
234
+ }
135
235
_ => {
136
- // The target (or compiler version) does not support SSE2 ...
236
+ // The target (or compiler version) does not support SSE2/LSX ...
137
237
fn analyze_source_file_dispatch(
138
238
src: & str ,
139
239
lines: & mut Vec <RelativeBytePos >,
0 commit comments