Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions composite_props.json
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the raw data fed to PackTab

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions parley_bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ parley = { workspace = true, default-features = true }
parley_dev = { workspace = true }
skrifa = { workspace = true }
tango-bench = "0.6"
unicode_data = { workspace = true, features = ["baked"] }
icu_provider = { workspace = true, features = ["sync"] }
icu_provider_blob = { workspace = true }

[[bench]]
name = "main"
Expand Down
4 changes: 2 additions & 2 deletions parley_bench/benches/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

use tango_bench::{tango_benchmarks, tango_main};

use parley_bench::benches::{defaults, styled};
use parley_bench::benches::{composite_lookup_latency, defaults, styled};

tango_benchmarks!(defaults(), styled());
tango_benchmarks!(defaults(), styled(), composite_lookup_latency());
tango_main!();
14 changes: 13 additions & 1 deletion parley_bench/src/benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//!
//! This module provides a benchmark for the default style.

use crate::{ColorBrush, FONT_STACK, get_contexts, get_samples};
use crate::{ColorBrush, FONT_STACK, get_contexts, get_samples, lookup};
use parley::{
Alignment, AlignmentOptions, FontStack, FontStyle, FontWeight, Layout, RangedBuilder,
StyleProperty,
Expand Down Expand Up @@ -129,3 +129,15 @@ pub fn styled() -> Vec<Benchmark> {
})
.collect()
}

/// Benchmark the composite property lookup backends.
pub fn composite_lookup_latency() -> Vec<Benchmark> {
let samples = lookup::codepoint_samples();
let composite = lookup::composite_data();

vec![benchmark_fn("Composite lookup", move |b| {
b.iter(|| {
black_box(lookup::checksum_packtab(samples));
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change this to checksum_trie to compare performance with tango

})
})]
}
12 changes: 12 additions & 0 deletions parley_bench/src/bin/composite_packtab.rs
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These binaries were used to compare sizes

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Copyright 2025 the Parley Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Binary to exercise the PackTab composite lookup for size comparisons.

use parley_bench::lookup;

fn main() {
let samples = lookup::codepoint_samples();
let checksum = lookup::checksum_packtab(samples);
println!("PackTab checksum: {checksum}");
}
13 changes: 13 additions & 0 deletions parley_bench/src/bin/composite_trie.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2025 the Parley Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Binary to exercise the CodePointTrie composite lookup for size comparisons.

use parley_bench::lookup;

fn main() {
let samples = lookup::codepoint_samples();
let composite = lookup::composite_data();
let checksum = lookup::checksum_trie(samples, composite);
println!("Composite trie checksum: {checksum}");
}
1 change: 1 addition & 0 deletions parley_bench/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use parley::{
};

pub mod benches;
pub mod lookup;

/// A color brush.
#[derive(Clone, Copy, Debug, Default, PartialEq)]
Expand Down
62 changes: 62 additions & 0 deletions parley_bench/src/lookup.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2025 the Parley Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Helpers for comparing composite property lookups.

use std::sync::OnceLock;

use icu_provider::buf::AsDeserializingBufferProvider;
use icu_provider::{DataMarker, DataPayload, DataRequest, DataResponse, DynamicDataProvider};
use unicode_data::{CompositePropsV1, CompositePropsV1Data};

const SAMPLE_LEN: usize = 4096;

fn build_sample_codepoints() -> Vec<u32> {
let mut values = Vec::with_capacity(SAMPLE_LEN);
let mut state = 0x1234_5678_u32;
for _ in 0..SAMPLE_LEN {
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
values.push(state % 0x11_0000);
}
values
}

fn load_composite_payload() -> DataPayload<CompositePropsV1> {
let provider = icu_provider_blob::BlobDataProvider::try_new_from_static_blob(
unicode_data::generated::COMPOSITE_BLOB,
)
.expect("Composite blob should deserialize");

let response: DataResponse<CompositePropsV1> = provider
.as_deserializing()
.load_data(CompositePropsV1::INFO, DataRequest::default())
.expect("Composite data should load");

response.payload
}

/// Returns the baked composite trie.
pub fn composite_data() -> &'static CompositePropsV1Data<'static> {
static PAYLOAD: OnceLock<DataPayload<CompositePropsV1>> = OnceLock::new();
PAYLOAD.get_or_init(load_composite_payload).get()
}

/// Returns a deterministic set of scalar values that cover all Unicode planes.
pub fn codepoint_samples() -> &'static [u32] {
static SAMPLES: OnceLock<Vec<u32>> = OnceLock::new();
SAMPLES.get_or_init(build_sample_codepoints)
}

/// Accumulates a checksum from the packed properties stored in the CodePointTrie.
pub fn checksum_trie(samples: &[u32], composite: &'static CompositePropsV1Data<'static>) -> u32 {
samples
.iter()
.fold(0_u32, |acc, &cp| acc ^ u32::from(composite.properties(cp)))
}

/// Accumulates a checksum from the properties stored in the PackTab tables.
pub fn checksum_packtab(samples: &[u32]) -> u32 {
samples.iter().fold(0_u32, |acc, &cp| {
acc ^ u32::from(unicode_data::packtab_properties(cp))
})
}
4 changes: 4 additions & 0 deletions unicode_data/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@ icu_locale = { workspace = true }
icu_properties = { workspace = true }
icu_collections = { workspace = true, features = ["serde"] }
icu_provider = { workspace = true, features = ["alloc"] }
icu_provider_blob = { workspace = true }
yoke = { workspace = true, features = ["derive"] }
zerofrom = { workspace = true }
unicode-bidi = { workspace = true, features = ["smallvec"] }
serde = { workspace = true }
databake = { workspace = true, features = ["derive"], optional = true }

[dev-dependencies]
icu_provider_blob = { workspace = true }

[lints]
workspace = true
70 changes: 70 additions & 0 deletions unicode_data/src/generate_packtab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python3
""" MUST BE RUN IN PACKTAB REPO ROOT """

import sys
import json
sys.path.insert(0, 'Lib')

from packTab import pack_table, Code

with open('/Users/taj/repos/parley/composite_props.json', 'r') as f:
data = json.load(f)

print(f"Loaded {len(data)} Unicode codepoint properties")

# Find the most common value to use as default
from collections import Counter
value_counts = Counter(data)
default = value_counts.most_common(1)[0][0]
default_count = value_counts[default]

print(f"Most common value: {default} (appears {default_count} times, {100*default_count/len(data):.2f}%)")
print(f"Unique values: {len(value_counts)}")
print()

print("Running packTab compression...")
print("This may take a moment for ~1.1M values...")

# Pack the table with compression level 1 (balanced)
solution = pack_table(data, default=default, compression=3)

print()
print("Compression results:")
print(f" Uncompressed: {len(data) * 4} bytes (u32 array)")
print(f" Compressed: {solution.cost} bytes")
print(f" Compression ratio: {(len(data) * 4) / solution.cost:.2f}x")
print(f" Lookups needed: {solution.nLookups}")
print(f" Extra operations: {solution.nExtraOps}")
print()

print("Generating Rust code...")
code = Code("composite_props")
solution.genCode(code, "get", language="rust", private=False)

output_file = './composite_props_packtab.rs'
print(f"Writing to {output_file}...")

with open(output_file, 'w') as f:
f.write("// Auto-generated by packTab\n")
f.write("// Unicode composite properties lookup table\n")
f.write("//\n")
f.write(f"// Original size: {len(data)} u32 values = {len(data) * 4} bytes\n")
f.write(f"// Compressed size: {solution.cost} bytes\n")
f.write(f"// Compression ratio: {(len(data) * 4) / solution.cost:.2f}x\n")
f.write(f"// Lookups: {solution.nLookups}, Extra ops: {solution.nExtraOps}\n")
f.write("//\n")
f.write(f"// Default value: {default}\n")
f.write("//\n")
f.write("// Usage: composite_props_get(codepoint as usize) -> u32\n")
f.write("\n")

# Redirect print to file
code.print_code(file=f, language="rust", private=False)

print(f"✓ Successfully generated {output_file}")
print()
print("Summary:")
print(f" Input: {len(data):,} codepoints")
print(f" Output: {solution.cost:,} bytes")
print(f" Savings: {len(data) * 4 - solution.cost:,} bytes")
print(f" Space efficiency: {100 * solution.cost / (len(data) * 4):.2f}% of original")
41 changes: 41 additions & 0 deletions unicode_data/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

#![no_std]

#[cfg(test)]
extern crate std;

use icu_collections::codepointtrie::CodePointTrie;
use icu_properties::props::{GeneralCategory, GraphemeClusterBreak, Script};
use zerofrom::ZeroFrom;
Expand All @@ -16,6 +19,8 @@ use zerofrom::ZeroFrom;
#[cfg(feature = "baked")]
pub mod generated;

mod packtab;

/// A data provider of `CompositePropsV1`.
#[derive(Clone, Debug, Eq, PartialEq, yoke::Yokeable, ZeroFrom)]
#[cfg_attr(feature = "datagen", derive(databake::Bake))]
Expand Down Expand Up @@ -68,6 +73,13 @@ impl CompositePropsV1Data<'_> {
}
}

/// Returns the composite properties stored in the PackTab tables.
#[inline(always)]
pub fn packtab_properties(ch: u32) -> Properties {
debug_assert!(ch <= 0x10_FFFF, "Invalid scalar value: {ch:#X}");
Properties(packtab::composite_props_get(ch as usize))
}

impl unicode_bidi::BidiDataSource for CompositePropsV1Data<'_> {
fn bidi_class(&self, cp: char) -> unicode_bidi::BidiClass {
self.properties(cp as u32).bidi_class()
Expand Down Expand Up @@ -258,6 +270,35 @@ impl From<Properties> for u32 {
}
}

#[cfg(all(test, feature = "baked"))]
mod tests {
use super::*;
use icu_provider::buf::AsDeserializingBufferProvider;
use icu_provider::{DataMarker, DataRequest, DynamicDataProvider};

#[test]
fn packtab_matches_trie() {
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test to ensure that both packtab and trie return the same values

let provider = icu_provider_blob::BlobDataProvider::try_new_from_static_blob(
generated::COMPOSITE_BLOB,
)
.expect("Composite blob should deserialize");
let response: icu_provider::DataResponse<CompositePropsV1> = provider
.as_deserializing()
.load_data(CompositePropsV1::INFO, DataRequest::default())
.expect("Composite data should load");
let data = response.payload.get().clone();

for cp in 0_u32..=0x10_FFFF {
let trie_value: u32 = data.properties(cp).into();
let packtab_value: u32 = packtab_properties(cp).into();
assert_eq!(
trie_value, packtab_value,
"Mismatch at code point U+{cp:04X}"
);
}
}
}

#[cfg(feature = "datagen")]
fn unicode_to_unicode_bidi(bidi: icu_properties::props::BidiClass) -> unicode_bidi::BidiClass {
use icu_properties::props::BidiClass;
Expand Down
70 changes: 70 additions & 0 deletions unicode_data/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2025 the Parley Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Utility binary that dumps the composite property trie into a JSON array.

#[cfg(feature = "baked")]
use std::env;
#[cfg(feature = "baked")]
use std::fs::File;
#[cfg(feature = "baked")]
use std::io::{BufWriter, Write};
#[cfg(feature = "baked")]
use std::path::PathBuf;

#[cfg(feature = "baked")]
use icu_provider::buf::AsDeserializingBufferProvider;
#[cfg(feature = "baked")]
use icu_provider::{DataMarker, DataRequest, DataResponse, DynamicDataProvider};
#[cfg(feature = "baked")]
use unicode_data::{CompositePropsV1, CompositePropsV1Data};

#[cfg(feature = "baked")]
fn main() -> Result<(), Box<dyn std::error::Error>> {
const DEFAULT_OUTPUT: &str = "composite_props.json";
const MAX_UNICODE_SCALAR: u32 = 0x10_FFFF;

let output_path = env::args()
.nth(1)
.map(PathBuf::from)
.unwrap_or_else(|| PathBuf::from(DEFAULT_OUTPUT));

let provider = icu_provider_blob::BlobDataProvider::try_new_from_static_blob(
unicode_data::generated::COMPOSITE_BLOB,
)?;

let response: DataResponse<CompositePropsV1> = provider
.as_deserializing()
.load_data(CompositePropsV1::INFO, DataRequest::default())?;
let composite: &CompositePropsV1Data<'_> = response.payload.get();

let file = File::create(&output_path)?;
let mut writer = BufWriter::new(file);

writer.write_all(b"[")?;
for cp in 0_u32..=MAX_UNICODE_SCALAR {
let value: u32 = composite.properties(cp).into();
if cp != 0 {
writer.write_all(b",")?;
}
write!(writer, "{value}")?;
}
writer.write_all(b"]\n")?;

println!(
"Wrote {} composite property values to {}",
(MAX_UNICODE_SCALAR + 1),
output_path.display()
);

Ok(())
}

#[cfg(not(feature = "baked"))]
fn main() {
eprintln!(
"The unicode_data binary requires the `baked` feature. \
Rebuild with `--features baked` (enabled by default)."
);
std::process::exit(1);
}
Loading
Loading