Skip to content

Commit 160ed2e

Browse files
committed
slim parley_data
1 parent c73f68a commit 160ed2e

File tree

9 files changed

+30
-55
lines changed

9 files changed

+30
-55
lines changed

parley/src/analysis/cluster.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
use alloc::vec::Vec;
55

6-
use crate::analysis::CompositeProps;
76
use icu_normalizer::properties::{CanonicalComposition, CanonicalDecomposition, Decomposed};
7+
use parley_data::Properties;
88

99
/// The maximum number of characters in a single cluster.
1010
const MAX_CLUSTER_SIZE: usize = 32;
@@ -117,7 +117,7 @@ impl CharCluster {
117117

118118
#[inline(always)]
119119
fn contributes_to_shaping(ch: char) -> bool {
120-
let props = CompositeProps.properties(ch as u32);
120+
let props = Properties::get(ch);
121121
crate::analysis::contributes_to_shaping(props.general_category(), props.script())
122122
}
123123

parley/src/analysis/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ pub(crate) mod cluster;
55

66
use alloc::vec::Vec;
77
use core::marker::PhantomData;
8+
use parley_data::Properties;
89

910
use crate::resolve::RangedStyle;
1011
use crate::{Brush, LayoutContext, WordBreak};
@@ -13,7 +14,6 @@ use icu_properties::CodePointMapData;
1314
use icu_properties::props::{BidiMirroringGlyph, GeneralCategory, GraphemeClusterBreak, Script};
1415
use icu_segmenter::options::{LineBreakOptions, LineBreakWordOption, WordBreakInvariantOptions};
1516
use icu_segmenter::{LineSegmenter, WordSegmenter};
16-
use parley_data::CompositeProps;
1717

1818
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1919
pub(crate) struct CharInfo {
@@ -361,7 +361,7 @@ pub(crate) fn analyze_text<B: Brush>(lcx: &mut LayoutContext<B>, text: &str) {
361361
// character's index, but we need our iterators to align, and the rest are simply
362362
// character-indexed.
363363
.fold(false, |is_mandatory_linebreak, (boundary, ch)| {
364-
let properties = CompositeProps.properties(ch as u32);
364+
let properties = Properties::get(ch);
365365
let script = properties.script();
366366
let grapheme_cluster_break = properties.grapheme_cluster_break();
367367
let bidi_class = properties.bidi_class();

parley_data/Cargo.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,11 @@ publish = false
1010

1111
[features]
1212
default = ["baked"]
13-
baked = ["dep:zerovec"]
14-
datagen = []
13+
baked = ["dep:icu_collections", "dep:zerovec"]
1514

1615
[dependencies]
1716
icu_properties = { workspace = true }
18-
icu_collections = { workspace = true }
17+
icu_collections = { workspace = true, optional = true }
1918
zerovec = { workspace = true, optional = true }
2019

2120
[lints]

parley_data/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
## Cargo features
1010

1111
- `baked` *(default)* embeds pre-generated ICU4X and composite data from `src/generated`, enabling use in `no_std` targets without a filesystem.
12-
- `datagen` enables code so the crate can participate in regeneration workflows. This feature is intended for developer use when refreshing the baked data and principally used by `../parley_data_gen`.
1312

1413
## Regenerating the baked data
1514

parley_data/src/generated/composite/mod.rs

Lines changed: 0 additions & 8 deletions
This file was deleted.

parley_data/src/generated/mod.rs

Lines changed: 5 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

parley_data/src/lib.rs

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,6 @@ use icu_properties::props::{BidiClass, GeneralCategory, GraphemeClusterBreak, Sc
1212
#[cfg(feature = "baked")]
1313
pub mod generated;
1414

15-
/// Lookup for [`Properties`]
16-
#[derive(Clone, Debug, Copy)]
17-
pub struct CompositeProps;
18-
19-
#[cfg(feature = "baked")]
20-
impl CompositeProps {
21-
/// Returns the properties for a given character.
22-
#[inline(always)]
23-
pub fn properties(&self, ch: u32) -> Properties {
24-
Properties(generated::COMPOSITE.get32(ch))
25-
}
26-
}
27-
2815
/// Unicode character properties relevant for text analysis.
2916
#[derive(Copy, Clone, Debug)]
3017
pub struct Properties(u32);
@@ -51,8 +38,13 @@ impl Properties {
5138
const IS_MANDATORY_LINE_BREAK_SHIFT: u32 =
5239
Self::IS_REGION_INDICATOR_SHIFT + Self::IS_REGION_INDICATOR_BITS;
5340

54-
/// Packs the given arguments into a single u32.
55-
#[cfg(feature = "datagen")]
41+
#[cfg(feature = "baked")]
42+
/// Returns the properties for a given character.
43+
pub fn get(ch: char) -> Self {
44+
Self(generated::COMPOSITE.get(ch))
45+
}
46+
47+
/// Creates a new [`Properties`] from the given properties
5648
pub fn new(
5749
script: Script,
5850
gc: GeneralCategory,
@@ -81,7 +73,7 @@ impl Properties {
8173
}
8274

8375
#[inline(always)]
84-
fn get(&self, shift: u32, bits: u32) -> u32 {
76+
fn bits(&self, shift: u32, bits: u32) -> u32 {
8577
(self.0 >> shift) & ((1 << bits) - 1)
8678
}
8779

@@ -92,7 +84,7 @@ impl Properties {
9284
clippy::cast_possible_truncation,
9385
reason = "script data only occupies SCRIPT_BITS bits; we cast to `u16` to fulfil the `from_icu4c_value` contract."
9486
)]
95-
Script::from_icu4c_value(self.get(Self::SCRIPT_SHIFT, Self::SCRIPT_BITS) as u16)
87+
Script::from_icu4c_value(self.bits(Self::SCRIPT_SHIFT, Self::SCRIPT_BITS) as u16)
9688
}
9789

9890
/// Returns the general category for the character.
@@ -102,7 +94,7 @@ impl Properties {
10294
clippy::cast_possible_truncation,
10395
reason = "general category data only occupies GC_BITS bits."
10496
)]
105-
GeneralCategory::try_from(self.get(Self::GC_SHIFT, Self::GC_BITS) as u8).unwrap()
97+
GeneralCategory::try_from(self.bits(Self::GC_SHIFT, Self::GC_BITS) as u8).unwrap()
10698
}
10799

108100
/// Returns the grapheme cluster break for the character.
@@ -112,7 +104,7 @@ impl Properties {
112104
clippy::cast_possible_truncation,
113105
reason = "cluster break data only occupies GCB_BITS bits"
114106
)]
115-
GraphemeClusterBreak::from_icu4c_value(self.get(Self::GCB_SHIFT, Self::GCB_BITS) as u8)
107+
GraphemeClusterBreak::from_icu4c_value(self.bits(Self::GCB_SHIFT, Self::GCB_BITS) as u8)
116108
}
117109

118110
/// Returns the bidirectional class for the character.
@@ -122,13 +114,13 @@ impl Properties {
122114
clippy::cast_possible_truncation,
123115
reason = "bidi class data only occupies BIDI_BITS bits"
124116
)]
125-
BidiClass::from_icu4c_value(self.get(Self::BIDI_SHIFT, Self::BIDI_BITS) as u8)
117+
BidiClass::from_icu4c_value(self.bits(Self::BIDI_SHIFT, Self::BIDI_BITS) as u8)
126118
}
127119

128120
/// Returns whether the character is an emoji or pictograph.
129121
#[inline(always)]
130122
pub fn is_emoji_or_pictograph(&self) -> bool {
131-
self.get(
123+
self.bits(
132124
Self::IS_EMOJI_OR_PICTOGRAPH_SHIFT,
133125
Self::IS_EMOJI_OR_PICTOGRAPH_BITS,
134126
) != 0
@@ -137,7 +129,7 @@ impl Properties {
137129
/// Returns whether the character is a variation selector.
138130
#[inline(always)]
139131
pub fn is_variation_selector(&self) -> bool {
140-
self.get(
132+
self.bits(
141133
Self::IS_VARIATION_SELECTOR_SHIFT,
142134
Self::IS_VARIATION_SELECTOR_BITS,
143135
) != 0
@@ -146,7 +138,7 @@ impl Properties {
146138
/// Returns whether the character is a region indicator.
147139
#[inline(always)]
148140
pub fn is_region_indicator(&self) -> bool {
149-
self.get(
141+
self.bits(
150142
Self::IS_REGION_INDICATOR_SHIFT,
151143
Self::IS_REGION_INDICATOR_BITS,
152144
) != 0
@@ -155,7 +147,7 @@ impl Properties {
155147
/// Returns whether the character is a mandatory linebreak.
156148
#[inline(always)]
157149
pub fn is_mandatory_linebreak(&self) -> bool {
158-
self.get(
150+
self.bits(
159151
Self::IS_MANDATORY_LINE_BREAK_SHIFT,
160152
Self::IS_MANDATORY_LINE_BREAK_BITS,
161153
) != 0

parley_data_gen/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ repository.workspace = true
99
publish = false
1010

1111
[dependencies]
12-
databake = { workspace = true, features = ["derive"] }
12+
databake = { workspace = true }
1313
icu_codepointtrie_builder = { workspace = true, features = ["wasm"] }
1414
icu_collections = { workspace = true, features = ["databake"] }
1515
icu_properties = { workspace = true, features = ["compiled_data"] }
16-
parley_data = { workspace = true, default-features = false, features = ["datagen"] }
16+
parley_data = { workspace = true }
1717

1818
[lints]
1919
workspace = true

parley_data_gen/src/lib.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,7 @@ pub fn generate(out: std::path::PathBuf) {
5454
}
5555
.build();
5656

57-
let composite_dir = out.join("composite");
58-
if !composite_dir.exists() {
59-
std::fs::create_dir(&composite_dir).unwrap();
60-
}
61-
let mut file = BufWriter::new(std::fs::File::create(composite_dir.join("mod.rs")).unwrap());
57+
let mut file = BufWriter::new(std::fs::File::create(out.join("mod.rs")).unwrap());
6258

6359
writeln!(&mut file, "{COPYRIGHT_HEADER}").unwrap();
6460
writeln!(&mut file, "/// Backing data for the `CompositeProps`").unwrap();
@@ -77,10 +73,4 @@ pub fn generate(out: std::path::PathBuf) {
7773
)
7874
.unwrap();
7975
}
80-
81-
let mut file = BufWriter::new(std::fs::File::create(out.join("mod.rs")).unwrap());
82-
83-
writeln!(&mut file, "{COPYRIGHT_HEADER}").unwrap();
84-
writeln!(&mut file, "mod composite;").unwrap();
85-
writeln!(&mut file, "pub use composite::*;").unwrap();
8676
}

0 commit comments

Comments
 (0)