Skip to content

Commit 2952b99

Browse files
authored
Rollup merge of #145963 - heiher:src-analysis-lsx, r=lqd
Add LSX accelerated implementation for source file analysis This patch introduces an LSX-optimized version of `analyze_source_file` for the `loongarch64` target. Similar to existing SSE2 implementation for x86, this version: - Processes 16-byte chunks at a time using LSX vector intrinsics. - Quickly identifies newlines in ASCII-only chunks. - Falls back to the generic implementation when multi-byte UTF-8 characters are detected or in the tail portion.
2 parents 6c77c4c + 5b43244 commit 2952b99

File tree

2 files changed

+107
-3
lines changed

2 files changed

+107
-3
lines changed

compiler/rustc_span/src/analyze_source_file.rs

Lines changed: 106 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ cfg_select! {
8181
// use `loadu`, which supports unaligned loading.
8282
let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
8383

84-
// For character in the chunk, see if its byte value is < 0, which
85-
// indicates that it's part of a UTF-8 char.
84+
// For each character in the chunk, see if its byte value is < 0,
85+
// which indicates that it's part of a UTF-8 char.
8686
let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
8787
// Create a bit mask from the comparison results.
8888
let multibyte_mask = _mm_movemask_epi8(multibyte_test);
@@ -132,8 +132,111 @@ cfg_select! {
132132
}
133133
}
134134
}
135+
target_arch = "loongarch64" => {
136+
fn analyze_source_file_dispatch(
137+
src: &str,
138+
lines: &mut Vec<RelativeBytePos>,
139+
multi_byte_chars: &mut Vec<MultiByteChar>,
140+
) {
141+
use std::arch::is_loongarch_feature_detected;
142+
143+
if is_loongarch_feature_detected!("lsx") {
144+
unsafe {
145+
analyze_source_file_lsx(src, lines, multi_byte_chars);
146+
}
147+
} else {
148+
analyze_source_file_generic(
149+
src,
150+
src.len(),
151+
RelativeBytePos::from_u32(0),
152+
lines,
153+
multi_byte_chars,
154+
);
155+
}
156+
}
157+
158+
/// Checks 16 byte chunks of text at a time. If the chunk contains
159+
/// something other than printable ASCII characters and newlines, the
160+
/// function falls back to the generic implementation. Otherwise it uses
161+
/// LSX intrinsics to quickly find all newlines.
162+
#[target_feature(enable = "lsx")]
163+
unsafe fn analyze_source_file_lsx(
164+
src: &str,
165+
lines: &mut Vec<RelativeBytePos>,
166+
multi_byte_chars: &mut Vec<MultiByteChar>,
167+
) {
168+
use std::arch::loongarch64::*;
169+
170+
const CHUNK_SIZE: usize = 16;
171+
172+
let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
173+
174+
// This variable keeps track of where we should start decoding a
175+
// chunk. If a multi-byte character spans across chunk boundaries,
176+
// we need to skip that part in the next chunk because we already
177+
// handled it.
178+
let mut intra_chunk_offset = 0;
179+
180+
for (chunk_index, chunk) in chunks.iter().enumerate() {
181+
// All LSX memory instructions support unaligned access, so using
182+
// vld is fine.
183+
let chunk = unsafe { lsx_vld::<0>(chunk.as_ptr() as *const i8) };
184+
185+
// For each character in the chunk, see if its byte value is < 0,
186+
// which indicates that it's part of a UTF-8 char.
187+
let multibyte_mask = lsx_vmskltz_b(chunk);
188+
// Create a bit mask from the comparison results.
189+
let multibyte_mask = lsx_vpickve2gr_w::<0>(multibyte_mask);
190+
191+
// If the bit mask is all zero, we only have ASCII chars here:
192+
if multibyte_mask == 0 {
193+
assert!(intra_chunk_offset == 0);
194+
195+
// Check for newlines in the chunk
196+
let newlines_test = lsx_vseqi_b::<{b'\n' as i32}>(chunk);
197+
let newlines_mask = lsx_vmskltz_b(newlines_test);
198+
let mut newlines_mask = lsx_vpickve2gr_w::<0>(newlines_mask);
199+
200+
let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
201+
202+
while newlines_mask != 0 {
203+
let index = newlines_mask.trailing_zeros();
204+
205+
lines.push(RelativeBytePos(index) + output_offset);
206+
207+
// Clear the bit, so we can find the next one.
208+
newlines_mask &= newlines_mask - 1;
209+
}
210+
} else {
211+
// The slow path.
212+
// There are multibyte chars in here, fallback to generic decoding.
213+
let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
214+
intra_chunk_offset = analyze_source_file_generic(
215+
&src[scan_start..],
216+
CHUNK_SIZE - intra_chunk_offset,
217+
RelativeBytePos::from_usize(scan_start),
218+
lines,
219+
multi_byte_chars,
220+
);
221+
}
222+
}
223+
224+
// There might still be a tail left to analyze
225+
let tail_start = src.len() - tail.len() + intra_chunk_offset;
226+
if tail_start < src.len() {
227+
analyze_source_file_generic(
228+
&src[tail_start..],
229+
src.len() - tail_start,
230+
RelativeBytePos::from_usize(tail_start),
231+
lines,
232+
multi_byte_chars,
233+
);
234+
}
235+
}
236+
}
135237
_ => {
136-
// The target (or compiler version) does not support SSE2 ...
238+
// The target (or compiler version) does not support vector instructions
239+
// our specialized implementations need (x86 SSE2, loongarch64 LSX)...
137240
fn analyze_source_file_dispatch(
138241
src: &str,
139242
lines: &mut Vec<RelativeBytePos>,

compiler/rustc_span/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
// tidy-alphabetical-start
1919
#![allow(internal_features)]
2020
#![cfg_attr(bootstrap, feature(round_char_boundary))]
21+
#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))]
2122
#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
2223
#![doc(rust_logo)]
2324
#![feature(array_windows)]

0 commit comments

Comments
 (0)