Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
use self::shared::ModifierSet;
use std::fmt::Write;
use std::iter::Peekable;
use std::path::Path;

type StrResult<T> = Result<T, String>;

#[path = "src/shared.rs"]
mod shared;

/// A module of definitions.
struct Module<'a>(Vec<(&'a str, Binding<'a>)>);

Expand All @@ -29,7 +33,7 @@ enum Def<'a> {
/// A symbol, either a leaf or with modifiers.
enum Symbol<'a> {
Single(char),
Multi(Vec<(&'a str, char)>),
Multi(Vec<(ModifierSet<&'a str>, char)>),
}

/// A single line during parsing.
Expand All @@ -40,7 +44,7 @@ enum Line<'a> {
ModuleStart(&'a str),
ModuleEnd,
Symbol(&'a str, Option<char>),
Variant(&'a str, char),
Variant(ModifierSet<&'a str>, char),
}

fn main() {
Expand Down Expand Up @@ -110,7 +114,7 @@ fn tokenize(line: &str) -> StrResult<Line> {
validate_ident(part)?;
}
let c = decode_char(tail.ok_or("missing char")?)?;
Line::Variant(rest, c)
Line::Variant(ModifierSet::new_unchecked(rest), c)
} else {
validate_ident(head)?;
let c = tail.map(decode_char).transpose()?;
Expand Down Expand Up @@ -167,7 +171,7 @@ fn parse<'a>(

let symbol = if variants.len() > 0 {
if let Some(c) = c {
variants.insert(0, ("", c));
variants.insert(0, (ModifierSet::default(), c));
}
Symbol::Multi(variants)
} else {
Expand Down
52 changes: 48 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
/*!
Human-friendly notation for Unicode symbols.
*/
//! Human-friendly notation for Unicode symbols.
//!
//! ## Model
//! A [`Symbol`] is a collection of one or more _variants_.
//! Each variant is identified by a set of [_modifiers_](ModifierSet)
//! and has a single character as its value.
//! The modifiers themselves can in principle be any non-empty strings
//! that don't contain the character `.`, but codex only defines
//! ones that are entirely made of ASCII alphabetical characters.

pub use self::shared::ModifierSet;

mod shared;

/// A module of definitions.
#[derive(Debug, Copy, Clone)]
Expand Down Expand Up @@ -52,7 +62,41 @@ pub enum Symbol {
/// A symbol without modifiers.
Single(char),
/// A symbol with named modifiers. The symbol defaults to its first variant.
Multi(&'static [(&'static str, char)]),
Multi(&'static [(ModifierSet<&'static str>, char)]),
}

impl Symbol {
/// Get the symbol's character for a given set of modifiers.
pub fn get(&self, modifs: ModifierSet<&str>) -> Option<char> {
match self {
Self::Single(c) => modifs.is_empty().then_some(*c),
Self::Multi(list) => modifs.best_match_in(list.iter().copied()),
}
}

/// The characters that are covered by this symbol.
pub fn variants(&self) -> impl Iterator<Item = (ModifierSet<&str>, char)> {
enum Variants {
Single(std::iter::Once<char>),
Multi(std::slice::Iter<'static, (ModifierSet<&'static str>, char)>),
}
let mut iter = match self {
Self::Single(c) => Variants::Single(std::iter::once(*c)),
Self::Multi(sl) => Variants::Multi(sl.iter()),
};
std::iter::from_fn(move || match &mut iter {
Variants::Single(iter) => Some((ModifierSet::default(), iter.next()?)),
Variants::Multi(iter) => iter.next().copied(),
})
}

/// Possible modifiers for this symbol.
pub fn modifiers(&self) -> impl Iterator<Item = &str> + '_ {
self.variants()
.flat_map(|(m, _)| m.into_iter())
.collect::<std::collections::BTreeSet<_>>()
.into_iter()
}
}

/// A module that contains the other top-level modules.
Expand Down
126 changes: 126 additions & 0 deletions src/shared.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
use std::ops::Deref;

/// A set of modifiers.
#[derive(Debug, Copy, Clone)]
// note: the visibility needs to be `pub(crate)`,
// since build.rs outputs `ModifierSet(...)`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is outdated since it uses new_unchecked

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the point where it gets output is

codex/build.rs

Line 217 in 34edb09

Symbol::Multi(list) => write!(buf, "Multi(&{list:?})").unwrap(),

which uses the debug-formatting, which outputs ModifierSet(<string>).
Changing that would of course be possible, but it'd require constructing the slice manually instead of just using {:?} there.

pub struct ModifierSet<S>(pub(crate) S);

impl<S: Default> Default for ModifierSet<S> {
/// Construct the default modifier set.
///
/// This is typically the empty set,
/// though the remark from [`Self::new_unchecked`] applies
/// since `S::default()` could technically be anything.
fn default() -> Self {
Self(S::default())
}
}

impl<S: Deref<Target = str>> ModifierSet<S> {
/// Convert the underlying string to a slice.
pub fn as_deref(&self) -> ModifierSet<&str> {
ModifierSet(&self.0)
}

/// Construct a modifier set from a string,
/// where modifiers are separated by the character `.`.
///
/// It is not unsafe to use this function wrongly, but it can produce
/// unexpected results down the line. Correct usage should ensure that
/// `s` does not contain any empty modifiers (i.e. the sequence `..`)
/// and that no modifier occurs twice.
pub fn new_unchecked(s: S) -> Self {
Copy link
Member

@laurmaedje laurmaedje Jun 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not a fan of the term "unchecked" as it invokes unsafety feelings (even if documented). Also new typically creates something empty in Rust lingo. How about just calling this from_dotted_str or something?

Copy link
Collaborator Author

@T0mstone T0mstone Jun 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also new typically creates something empty in Rust lingo.

Not necessarily, e.g. Path::new.

Self(s)
}

/// Whether `self` is empty.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}

/// Add a modifier to the set, without checking that it is a valid modifier.
///
/// It is not unsafe to use this method wrongly, but that can produce
/// unexpected results down the line. Correct usage should ensure that
/// `modifier` is not empty and doesn't contain the character `.`.
pub fn add_unchecked(&mut self, m: &str)
where
S: for<'a> std::ops::AddAssign<&'a str>,
{
if !self.0.is_empty() {
self.0 += ".";
}
self.0 += m;
}

/// Iterate over the list of modifiers in an arbitrary order.
pub fn iter(&self) -> impl Iterator<Item = &str> {
self.into_iter()
}

/// Whether the set contains the modifier `m`.
pub fn contains(&self, m: &str) -> bool {
self.iter().any(|lhs| lhs == m)
}

/// Whether all modifiers in `self` are also present in `other`.
pub fn is_subset(&self, other: ModifierSet<&str>) -> bool {
self.iter().all(|m| other.contains(m))
}

/// Find the best match from the list.
///
/// To be considered a match, the modifier set must be a superset of
/// (or equal to) `self`. Among different matches, the best one is selected
/// by the following two criteria (in order):
/// 1. Number of modifiers in common with `self` (more is better).
/// 2. Total number of modifiers (fewer is better).
pub fn best_match_in<'a, T>(
&self,
variants: impl Iterator<Item = (ModifierSet<&'a str>, T)>,
) -> Option<T> {
let mut best = None;
let mut best_score = None;

// Find the best table entry with this name.
for candidate in variants.filter(|(set, _)| self.is_subset(*set)) {
let mut matching = 0;
let mut total = 0;
for modifier in candidate.0.iter() {
if self.contains(modifier) {
matching += 1;
}
total += 1;
}

let score = (matching, std::cmp::Reverse(total));
if best_score.is_none_or(|b| score > b) {
best = Some(candidate.1);
best_score = Some(score);
}
}

best
}
}

impl<'a, S: Deref<Target = str>> IntoIterator for &'a ModifierSet<S> {
type Item = &'a str;
type IntoIter = std::str::Split<'a, char>;

/// Iterate over the list of modifiers in an arbitrary order.
fn into_iter(self) -> Self::IntoIter {
self.0.split('.')
}
}

impl<'a> IntoIterator for ModifierSet<&'a str> {
type Item = &'a str;
type IntoIter = std::str::Split<'a, char>;

/// Iterate over the list of modifiers in an arbitrary order.
fn into_iter(self) -> Self::IntoIter {
self.0.split('.')
}
}