Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@ use std::fmt::Write;
use std::iter::Peekable;
use std::path::Path;

use self::shared::ModifierSet;

type StrResult<T> = Result<T, String>;

#[path = "src/shared.rs"]
mod shared;

/// A module of definitions.
struct Module<'a>(Vec<(&'a str, Binding<'a>)>);

Expand All @@ -29,7 +34,7 @@ enum Def<'a> {
/// A symbol, either a leaf or with modifiers.
enum Symbol<'a> {
Single(char),
Multi(Vec<(&'a str, char)>),
Multi(Vec<(ModifierSet<&'a str>, char)>),
}

/// A single line during parsing.
Expand All @@ -40,7 +45,7 @@ enum Line<'a> {
ModuleStart(&'a str),
ModuleEnd,
Symbol(&'a str, Option<char>),
Variant(&'a str, char),
Variant(ModifierSet<&'a str>, char),
}

fn main() {
Expand Down Expand Up @@ -110,7 +115,7 @@ fn tokenize(line: &str) -> StrResult<Line> {
validate_ident(part)?;
}
let c = decode_char(tail.ok_or("missing char")?)?;
Line::Variant(rest, c)
Line::Variant(ModifierSet::new_unchecked(rest), c)
} else {
validate_ident(head)?;
let c = tail.map(decode_char).transpose()?;
Expand Down Expand Up @@ -167,7 +172,7 @@ fn parse<'a>(

let symbol = if variants.len() > 0 {
if let Some(c) = c {
variants.insert(0, ("", c));
variants.insert(0, (ModifierSet::default(), c));
}
Symbol::Multi(variants)
} else {
Expand Down
51 changes: 47 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
/*!
Human-friendly notation for Unicode symbols.
*/
//! Human-friendly notation for Unicode symbols.
//!
//! ## Model
//! A [`Symbol`] is a collection of one or more _variants_. Each variant is
//! identified by a set of [_modifiers_](ModifierSet) and has a single character
//! as its value. The modifiers themselves can in principle be any non-empty
//! strings that don't contain the character `.`, but codex only defines ones
//! that are entirely made of ASCII alphabetical characters.

pub use self::shared::ModifierSet;

mod shared;

/// A module of definitions.
#[derive(Debug, Copy, Clone)]
Expand Down Expand Up @@ -52,7 +61,41 @@ pub enum Symbol {
/// A symbol without modifiers.
Single(char),
/// A symbol with named modifiers. The symbol defaults to its first variant.
Multi(&'static [(&'static str, char)]),
Multi(&'static [(ModifierSet<&'static str>, char)]),
}

impl Symbol {
/// Get the symbol's character for a given set of modifiers.
pub fn get(&self, modifs: ModifierSet<&str>) -> Option<char> {
match self {
Self::Single(c) => modifs.is_empty().then_some(*c),
Self::Multi(list) => modifs.best_match_in(list.iter().copied()),
}
}

/// The characters that are covered by this symbol.
pub fn variants(&self) -> impl Iterator<Item = (ModifierSet<&str>, char)> {
enum Variants {
Single(std::iter::Once<char>),
Multi(std::slice::Iter<'static, (ModifierSet<&'static str>, char)>),
}
let mut iter = match self {
Self::Single(c) => Variants::Single(std::iter::once(*c)),
Self::Multi(sl) => Variants::Multi(sl.iter()),
};
std::iter::from_fn(move || match &mut iter {
Variants::Single(iter) => Some((ModifierSet::default(), iter.next()?)),
Variants::Multi(iter) => iter.next().copied(),
})
}

/// Possible modifiers for this symbol.
pub fn modifiers(&self) -> impl Iterator<Item = &str> + '_ {
self.variants()
.flat_map(|(m, _)| m.into_iter())
.collect::<std::collections::BTreeSet<_>>()
.into_iter()
}
}

/// A module that contains the other top-level modules.
Expand Down
228 changes: 228 additions & 0 deletions src/shared.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
use std::ops::Deref;

/// A set of modifiers.
///
/// Beware: The [`Eq`] and [`Hash`] implementations are dependent on the
/// ordering of the modifiers, in opposition to what a set would usually
/// constitute. To test for set-wise equality, use [`iter`](Self::iter) and
/// collect into a true set type like [`HashSet`](std::collections::HashSet).
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct ModifierSet<S>(
// Note: the visibility needs to be `pub(crate)`, since build.rs outputs
// `ModifierSet(...)`.
pub(crate) S,
);

impl<S: Deref<Target = str>> ModifierSet<S> {
/// Constructs a modifier set from a string, where modifiers are separated by
/// the character `.`.
///
/// It is not unsafe to use this function wrongly, but it can produce
/// unexpected results down the line. Correct usage should ensure that `s`
/// does not contain any empty modifiers (i.e. the sequence `..`) and that
/// no modifier occurs twice.
pub fn new_unchecked(s: S) -> Self {
Copy link
Member

@laurmaedje laurmaedje Jun 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not a fan of the term "unchecked" as it invokes unsafety feelings (even if documented). Also new typically creates something empty in Rust lingo. How about just calling this from_dotted_str or something?

Copy link
Collaborator Author

@T0mstone T0mstone Jun 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also new typically creates something empty in Rust lingo.

Not necessarily, e.g. Path::new.

Self(s)
}

/// Whether `self` is empty.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}

/// Gets the string of modifiers separated by `.`.
pub fn as_str(&self) -> &str {
&self.0
}

/// Converts the underlying string to a slice.
pub fn as_deref(&self) -> ModifierSet<&str> {
ModifierSet(&self.0)
}

/// Add a modifier to the set, without checking that it is a valid modifier.
///
/// It is not unsafe to use this method wrongly, but that can produce
/// unexpected results down the line. Correct usage should ensure that
/// `modifier` is not empty and doesn't contain the character `.`.
pub fn add_unchecked(&mut self, m: &str)
where
S: for<'a> std::ops::AddAssign<&'a str>,
{
if !self.0.is_empty() {
self.0 += ".";
}
self.0 += m;
}

/// Iterates over the list of modifiers in an arbitrary order.
pub fn iter(&self) -> impl Iterator<Item = &str> {
self.into_iter()
}

/// Whether the set contains the modifier `m`.
pub fn contains(&self, m: &str) -> bool {
self.iter().any(|lhs| lhs == m)
}

/// Finds the best match from the list.
///
/// To be considered a match, the modifier set must be a superset of
/// (or equal to) `self`. Among different matches, the best one is selected
/// by the following two criteria (in order):
/// 1. Number of modifiers in common with `self` (more is better).
/// 2. Total number of modifiers (fewer is better).
///
/// If there are multiple best matches, the first of them is returned.
pub fn best_match_in<'a, T>(
&self,
variants: impl Iterator<Item = (ModifierSet<&'a str>, T)>,
) -> Option<T> {
let mut best = None;
let mut best_score = None;

// Find the best table entry with this name.
for candidate in variants.filter(|(set, _)| self.is_subset(*set)) {
let mut matching = 0;
let mut total = 0;
for modifier in candidate.0.iter() {
if self.contains(modifier) {
matching += 1;
}
total += 1;
}

let score = (matching, std::cmp::Reverse(total));
if best_score.is_none_or(|b| score > b) {
best = Some(candidate.1);
best_score = Some(score);
}
}

best
}

/// Whether all modifiers in `self` are also present in `other`.
pub fn is_subset(&self, other: ModifierSet<&str>) -> bool {
self.iter().all(|m| other.contains(m))
}
}

impl<S: Default> Default for ModifierSet<S> {
/// Constructs the default modifier set.
///
/// This is typically the empty set, though the remark from
/// [`Self::new_unchecked`] applies since `S::default()` could technically
/// be anything.
fn default() -> Self {
Self(S::default())
}
}

impl<'a, S: Deref<Target = str>> IntoIterator for &'a ModifierSet<S> {
type Item = &'a str;
type IntoIter = std::str::Split<'a, char>;

/// Iterate over the list of modifiers in an arbitrary order.
fn into_iter(self) -> Self::IntoIter {
let mut iter = self.0.split('.');
if self.0.is_empty() {
// empty the iterator
let _ = iter.next();
}
iter
}
}

impl<'a> IntoIterator for ModifierSet<&'a str> {
type Item = &'a str;
type IntoIter = std::str::Split<'a, char>;

/// Iterate over the list of modifiers in an arbitrary order.
fn into_iter(self) -> Self::IntoIter {
let mut iter = self.0.split('.');
if self.0.is_empty() {
// empty the iterator
let _ = iter.next();
}
iter
}
}

#[cfg(test)]
mod tests {
type ModifierSet = super::ModifierSet<&'static str>;

#[test]
fn default_is_empty() {
assert!(ModifierSet::default().is_empty());
}

#[test]
fn iter_count() {
assert_eq!(ModifierSet::default().iter().count(), 0);
assert_eq!(ModifierSet::new_unchecked("a").iter().count(), 1);
assert_eq!(ModifierSet::new_unchecked("a.b").iter().count(), 2);
assert_eq!(ModifierSet::new_unchecked("a.b.c").iter().count(), 3);
}

#[test]
fn subset() {
assert!(
ModifierSet::new_unchecked("a").is_subset(ModifierSet::new_unchecked("a.b"))
);
assert!(
ModifierSet::new_unchecked("a").is_subset(ModifierSet::new_unchecked("b.a"))
);
assert!(ModifierSet::new_unchecked("a.b")
.is_subset(ModifierSet::new_unchecked("b.c.a")));
}

#[test]
fn best_match() {
// 1. more modifiers in common with self
assert_eq!(
ModifierSet::new_unchecked("a.b").best_match_in(
[
(ModifierSet::new_unchecked("a.c"), 1),
(ModifierSet::new_unchecked("a.b"), 2),
]
.into_iter()
),
Some(2)
);
// 2. fewer modifiers in general
assert_eq!(
ModifierSet::new_unchecked("a").best_match_in(
[
(ModifierSet::new_unchecked("a"), 1),
(ModifierSet::new_unchecked("a.b"), 2),
]
.into_iter()
),
Some(1)
);
// the first rule takes priority over the second
assert_eq!(
ModifierSet::new_unchecked("a.b").best_match_in(
[
(ModifierSet::new_unchecked("a"), 1),
(ModifierSet::new_unchecked("a.b"), 2),
]
.into_iter()
),
Some(2)
);
// among multiple best matches, the first one is returned
assert_eq!(
ModifierSet::default().best_match_in(
[
(ModifierSet::new_unchecked("a"), 1),
(ModifierSet::new_unchecked("b"), 2)
]
.into_iter()
),
Some(1)
);
}
}