Skip to content

Commit 536dc49

Browse files
committed
Add new API
1 parent a3a16a7 commit 536dc49

File tree

3 files changed

+159
-7
lines changed

3 files changed

+159
-7
lines changed

build.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ enum Line<'a> {
2727
ModuleStart(&'a str),
2828
ModuleEnd,
2929
Symbol(&'a str, Option<char>),
30-
Variant(&'a str, char),
30+
Variant(ModifierSet<&'a str>, char),
3131
}
3232

3333
fn main() {
@@ -97,7 +97,7 @@ fn tokenize(line: &str) -> StrResult<Line> {
9797
validate_ident(part)?;
9898
}
9999
let c = decode_char(tail.ok_or("missing char")?)?;
100-
Line::Variant(rest, c)
100+
Line::Variant(ModifierSet(rest), c)
101101
} else {
102102
validate_ident(head)?;
103103
let c = tail.map(decode_char).transpose()?;
@@ -154,7 +154,7 @@ fn parse<'a>(
154154

155155
let symbol = if variants.len() > 0 {
156156
if let Some(c) = c {
157-
variants.insert(0, ("", c));
157+
variants.insert(0, (ModifierSet::empty(), c));
158158
}
159159
Symbol::Multi(variants)
160160
} else {

src/lib.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
/*!
22
Human-friendly notation for Unicode symbols.
3+
4+
## Model
5+
A [`Symbol`] is a collection of one or more _variants_.
6+
Each variant is identified by a set of _modifiers_ (see [`ModifierSet`])
7+
and has a single character as its value.
8+
The modifiers themselves can in principle be any non-empty strings
9+
that don't contain the character `.`, but codex only defines
10+
ones that are entirely made of ASCII alphabetical characters.
311
*/
412

513
include!("shared.rs");
@@ -26,6 +34,47 @@ impl Module {
2634
}
2735
}
2836

37+
impl<'a> ModifierSet<&'a str> {
38+
/// Iterate over the list of modifiers with the original lifetime.
39+
pub fn to_iter(self) -> impl Iterator<Item = &'a str> {
40+
self.0.split('.').filter(|s| !s.is_empty())
41+
}
42+
}
43+
44+
impl Symbol {
45+
/// Get the symbol's character for a given set of modifiers.
46+
pub fn get(&self, modifs: ModifierSet<&str>) -> Option<char> {
47+
match self {
48+
Self::Single(c) => modifs.is_empty().then_some(*c),
49+
Self::Multi(list) => modifs.best_match_in(list.iter().copied()),
50+
}
51+
}
52+
53+
/// The characters that are covered by this symbol.
54+
pub fn variants(&self) -> impl Iterator<Item = (ModifierSet<&str>, char)> {
55+
enum Variants {
56+
Single(std::iter::Once<char>),
57+
Multi(std::slice::Iter<'static, (ModifierSet<&'static str>, char)>),
58+
}
59+
let mut iter = match self {
60+
Self::Single(c) => Variants::Single(std::iter::once(*c)),
61+
Self::Multi(sl) => Variants::Multi(sl.iter()),
62+
};
63+
std::iter::from_fn(move || match &mut iter {
64+
Variants::Single(iter) => Some((ModifierSet::empty(), iter.next()?)),
65+
Variants::Multi(iter) => iter.next().copied(),
66+
})
67+
}
68+
69+
/// Possible modifiers for this symbol.
70+
pub fn modifiers(&self) -> impl Iterator<Item = &str> + '_ {
71+
self.variants()
72+
.flat_map(|(m, _)| m.to_iter())
73+
.collect::<std::collections::BTreeSet<_>>()
74+
.into_iter()
75+
}
76+
}
77+
2978
/// A module that contains the other top-level modules.
3079
pub const ROOT: Module = Module(&[
3180
("emoji", Binding::new(Def::Module(EMOJI))),

src/shared.rs

Lines changed: 107 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1+
use std::ops::{AddAssign, Deref};
2+
13
macro_rules! declare_types {
24
($(<$lt:lifetime>)?
35
$(derive($($Der:ident),*),)?
46
str = $s:ty,
5-
List = $L:ident<_>
7+
List = $List:ident<_>
68
) => {
79
/// A module of definitions.
810
$(#[derive($($Der),*)])?
9-
pub struct Module<$($lt)?>($L<($s, Binding<$($lt)?>)>);
11+
pub struct Module<$($lt)?>($List<($s, Binding<$($lt)?>)>);
1012

1113
/// A definition bound in a module, with metadata.
1214
$(#[derive($($Der),*)])?
@@ -38,8 +40,109 @@ macro_rules! declare_types {
3840
pub enum Symbol<$($lt)?> {
3941
/// A symbol without modifiers.
4042
Single(char),
41-
/// A symbol with named modifiers. The symbol defaults to its first variant.
42-
Multi($L<($s, char)>),
43+
/// A symbol with named modifiers.
44+
/// The symbol defaults to its first variant.
45+
Multi($List<(ModifierSet<$s>, char)>),
4346
}
4447
};
4548
}
49+
50+
/// A set of modifiers.
51+
#[derive(Debug, Copy, Clone)]
52+
pub struct ModifierSet<S>(S);
53+
54+
impl<S: Deref<Target = str>> ModifierSet<S> {
55+
/// Convert the underlying string to a slice.
56+
pub fn as_deref(&self) -> ModifierSet<&str> {
57+
ModifierSet(&self.0)
58+
}
59+
60+
/// Construct a modifier set from a string,
61+
/// where modifiers are separated by the character `.`.
62+
///
63+
/// It is not unsafe to use this function wrongly, but it can produce
64+
/// unexpected results down the line. Correct usage should ensure that
65+
/// `s` does not contain any empty modifiers (i.e. the sequence `..`)
66+
/// and that no modifier occurs twice.
67+
pub fn new_unchecked(s: S) -> Self {
68+
Self(s)
69+
}
70+
71+
/// Construct an empty modifier set.
72+
pub fn empty() -> Self
73+
where
74+
S: Default,
75+
{
76+
Self(S::default())
77+
}
78+
79+
/// Whether `self` is empty.
80+
pub fn is_empty(&self) -> bool {
81+
self.0.is_empty()
82+
}
83+
84+
/// Add a modifier to the set, without checking that it is a valid modifier.
85+
///
86+
/// It is not unsafe to use this method wrongly, but that can produce
87+
/// unexpected results down the line. Correct usage should ensure that
88+
/// `modifier` is not empty and doesn't contain the character `.`.
89+
pub fn add_unchecked(&mut self, m: &str)
90+
where
91+
S: for<'a> AddAssign<&'a str>,
92+
{
93+
if !self.0.is_empty() {
94+
self.0 += ".";
95+
}
96+
self.0 += m;
97+
}
98+
99+
/// Iterate over the list of modifiers in an arbitrary order.
100+
pub fn iter(&self) -> impl Iterator<Item = &str> {
101+
self.0.split('.').filter(|s| !s.is_empty())
102+
}
103+
104+
/// Whether the set contains the modifier `m`.
105+
pub fn contains(&self, m: &str) -> bool {
106+
self.iter().any(|lhs| lhs == m)
107+
}
108+
109+
/// Whether all modifiers in `self` are also present in `other`.
110+
pub fn is_subset(&self, other: ModifierSet<&str>) -> bool {
111+
self.iter().all(|m| other.contains(m))
112+
}
113+
114+
/// Find the best match from the list.
115+
///
116+
/// To be considered a match, the modifier set must be a superset of
117+
/// (or equal to) `self`. Among different matches, the best one is selected
118+
/// by the following two criteria (in order):
119+
/// 1. Number of modifiers in common with `self` (more is better).
120+
/// 2. Total number of modifiers (fewer is better).
121+
pub fn best_match_in<'a, T>(
122+
&self,
123+
variants: impl Iterator<Item = (ModifierSet<&'a str>, T)>,
124+
) -> Option<T> {
125+
let mut best = None;
126+
let mut best_score = None;
127+
128+
// Find the best table entry with this name.
129+
for candidate in variants.filter(|(set, _)| self.is_subset(*set)) {
130+
let mut matching = 0;
131+
let mut total = 0;
132+
for modifier in candidate.0.iter() {
133+
if self.contains(modifier) {
134+
matching += 1;
135+
}
136+
total += 1;
137+
}
138+
139+
let score = (matching, core::cmp::Reverse(total));
140+
if best_score.map_or(true, |b| score > b) {
141+
best = Some(candidate.1);
142+
best_score = Some(score);
143+
}
144+
}
145+
146+
best
147+
}
148+
}

0 commit comments

Comments
 (0)