Skip to content

Commit 299225a

Browse files
authored
Merge pull request #119 from kas-gui/push-upmonwkyrtvx
Replace usage of swash::text with icu4x
2 parents 2565e7a + 4ff32e3 commit 299225a

File tree

5 files changed

+54
-89
lines changed

5 files changed

+54
-89
lines changed

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ log = "0.4"
5151
serde = { version = "1.0.123", features = ["derive"], optional = true }
5252
ab_glyph = { version = "0.2.10", optional = true }
5353
swash = "0.2.4"
54-
fontique = "0.7.0"
54+
fontique = { version = "0.7.0", features = ["icu_properties"] }
55+
icu_properties = "2.1.1"
56+
icu_segmenter = "2.1.2"
5557

5658
[dependencies.rustybuzz]
5759
version = "0.20.1"

src/display/text_runs.rs

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ use super::TextDisplay;
1111
use crate::conv::{to_u32, to_usize};
1212
use crate::fonts::{self, FontSelector, NoFontMatch};
1313
use crate::format::FormattableText;
14-
use crate::{Direction, Range, script_to_fontique, shaper};
15-
use swash::text::LineBreak as LB;
16-
use swash::text::cluster::Boundary;
14+
use crate::util::ends_with_hard_break;
15+
use crate::{Direction, Range, shaper};
16+
use icu_properties::{CodePointMapData, props::Script};
17+
use icu_segmenter::LineSegmenter;
1718
use unicode_bidi::{BidiInfo, LTR_LEVEL, RTL_LEVEL};
1819

1920
#[derive(Clone, Copy, Debug, PartialEq)]
@@ -195,14 +196,17 @@ impl TextDisplay {
195196
text,
196197
dpem,
197198
level: levels.first().cloned().unwrap_or(LTR_LEVEL),
198-
script: UNKNOWN_SCRIPT,
199+
script: Script::Unknown.into(),
199200
};
200201

201202
let mut start = 0;
202203
let mut breaks = Default::default();
203204

204-
let mut analyzer = swash::text::analyze(text.chars());
205-
let mut last_props = None;
205+
// TODO: allow segmenter configuration
206+
let segmenter = LineSegmenter::new_auto(Default::default());
207+
let mut break_iter = segmenter.segment_str(text);
208+
let mut next_break = break_iter.next();
209+
206210
let mut first_real = None;
207211

208212
let mut last_is_control = false;
@@ -218,13 +222,15 @@ impl TextDisplay {
218222
let is_htab = c == '\t';
219223
let control_break = is_htab || (last_is_control && !is_control);
220224

221-
let (props, boundary) = analyzer.next().unwrap();
222-
last_props = Some(props);
225+
let script = CodePointMapData::<Script>::new().get(c);
223226

224-
// Forcibly end the line?
225-
let hard_break = boundary == Boundary::Mandatory;
226227
// Is wrapping allowed at this position?
227-
let is_break = hard_break || boundary == Boundary::Line;
228+
let is_break = next_break == Some(index);
229+
// Forcibly end the line?
230+
let hard_break = is_break && ends_with_hard_break(&text[..index]);
231+
if is_break {
232+
next_break = break_iter.next();
233+
}
228234

229235
// Force end of current run?
230236
let bidi_break = levels[index] != input.level;
@@ -238,14 +244,12 @@ impl TextDisplay {
238244
}
239245

240246
let mut new_script = None;
241-
if props.script().is_real() {
247+
if !matches!(script, Script::Common | Script::Unknown | Script::Inherited) {
242248
if first_real.is_none() && !c.is_control() {
243249
first_real = Some(c);
244250
}
245-
let script = script_to_fontique(props.script());
246-
if input.script == UNKNOWN_SCRIPT {
247-
input.script = script;
248-
} else if script != UNKNOWN_SCRIPT && script != input.script {
251+
let script = script.into();
252+
if script != input.script {
249253
new_script = Some(script);
250254
}
251255
}
@@ -265,7 +269,7 @@ impl TextDisplay {
265269
start = index;
266270
non_control_end = index;
267271
input.level = levels[index];
268-
input.script = UNKNOWN_SCRIPT;
272+
input.script = Script::Unknown.into();
269273
breaks = Default::default();
270274
} else if is_break && !is_control {
271275
// We do break runs when hitting control chars, but only when
@@ -281,10 +285,8 @@ impl TextDisplay {
281285
}
282286
}
283287

284-
debug_assert!(analyzer.next().is_none());
285-
let hard_break = last_props
286-
.map(|props| matches!(props.line_break(), LB::BK | LB::CR | LB::LF | LB::NL))
287-
.unwrap_or(false);
288+
let is_break = next_break == Some(text.len());
289+
let hard_break = is_break && ends_with_hard_break(&text);
288290

289291
// Conclude: add last run. This may be empty, but we want it anyway.
290292
if !last_is_control {
@@ -340,16 +342,3 @@ impl TextDisplay {
340342
Ok(())
341343
}
342344
}
343-
344-
trait ScriptExt {
345-
#[allow(clippy::wrong_self_convention)]
346-
fn is_real(self) -> bool;
347-
}
348-
impl ScriptExt for swash::text::Script {
349-
fn is_real(self) -> bool {
350-
use swash::text::Script::*;
351-
!matches!(self, Common | Unknown | Inherited)
352-
}
353-
}
354-
355-
pub(crate) const UNKNOWN_SCRIPT: fontique::Script = fontique::Script(*b"Zzzz");

src/lib.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ pub use display::*;
2929
pub mod fonts;
3030
pub mod format;
3131

32-
mod swash_convert;
33-
pub(crate) use swash_convert::script_to_fontique;
34-
3532
mod text;
3633
pub use text::*;
3734

src/swash_convert.rs

Lines changed: 0 additions & 29 deletions
This file was deleted.

src/util.rs

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66
//! Utility types and traits
77
8+
use icu_properties::{CodePointMapData, props::LineBreak};
9+
use icu_segmenter::{LineSegmenter, iterators::LineBreakIterator, scaffold::Utf8};
810
use std::ops::Range;
9-
use std::str::{CharIndices, Chars};
10-
use swash::text::cluster::Boundary;
1111

1212
/// Describes the state-of-preparation of a [`TextDisplay`][crate::TextDisplay]
1313
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Ord, PartialOrd, Hash)]
@@ -68,27 +68,44 @@ impl<T: Clone> Iterator for OwningVecIter<T> {
6868
impl<T: Clone> ExactSizeIterator for OwningVecIter<T> {}
6969
impl<T: Clone> std::iter::FusedIterator for OwningVecIter<T> {}
7070

71+
/// Returns `true` when `text` ends with a hard break, assuming that it ends
72+
/// with a valid line break.
73+
///
74+
/// This filter is copied from icu_segmenter docs.
75+
pub(crate) fn ends_with_hard_break(text: &str) -> bool {
76+
text.chars().next_back().is_some_and(|c| {
77+
matches!(
78+
CodePointMapData::<LineBreak>::new().get(c),
79+
LineBreak::MandatoryBreak
80+
| LineBreak::CarriageReturn
81+
| LineBreak::LineFeed
82+
| LineBreak::NextLine
83+
)
84+
})
85+
}
86+
7187
/// Iterator over lines / paragraphs within the text
7288
///
7389
/// This iterator splits the input text into a sequence of "lines" at mandatory
7490
/// breaks (see [TR14#BK](https://www.unicode.org/reports/tr14/#BK)).
7591
/// The resulting slices cover the whole input text in order without overlap.
7692
pub struct LineIterator<'a> {
77-
analyzer: swash::text::Analyze<Chars<'a>>,
78-
char_indices: CharIndices<'a>,
93+
break_iter: LineBreakIterator<'static, 'a, Utf8>,
94+
text: &'a str,
7995
start: usize,
80-
len: usize,
8196
}
8297

8398
impl<'a> LineIterator<'a> {
8499
/// Construct
85100
#[inline]
86101
pub fn new(text: &'a str) -> Self {
102+
let segmenter = LineSegmenter::new_auto(Default::default());
103+
let mut break_iter = segmenter.segment_str(text);
104+
assert_eq!(break_iter.next(), Some(0)); // the iterator always reports a break at 0
87105
LineIterator {
88-
analyzer: swash::text::analyze(text.chars()),
89-
char_indices: text.char_indices(),
106+
break_iter,
107+
text,
90108
start: 0,
91-
len: text.len(),
92109
}
93110
}
94111
}
@@ -97,24 +114,13 @@ impl<'a> Iterator for LineIterator<'a> {
97114
type Item = Range<usize>;
98115

99116
fn next(&mut self) -> Option<Self::Item> {
100-
if self.start >= self.len {
101-
return None;
102-
}
103-
104-
for (index, _) in self.char_indices.by_ref() {
105-
let (_, boundary) = self.analyzer.next().unwrap();
106-
107-
if index > 0 && boundary == Boundary::Mandatory {
117+
while let Some(index) = self.break_iter.next() {
118+
if ends_with_hard_break(&self.text[..index]) || index == self.text.len() {
108119
let range = self.start..index;
109120
self.start = index;
110121
return Some(range);
111122
}
112123
}
113-
114-
debug_assert!(self.analyzer.next().is_none());
115-
116-
let range = self.start..self.len;
117-
self.start = self.len;
118-
Some(range)
124+
None
119125
}
120126
}

0 commit comments

Comments
 (0)