1
1
use std::fmt;
2
2
use std::sync::atomic::{AtomicU32, Ordering};
3
3
4
- use tracing::instrument;
5
-
6
- use super::{Byte, Nfa, Ref, nfa};
4
+ use super::{Byte, Ref, Tree, Uninhabited};
7
5
use crate::Map;
8
6
9
- #[derive(PartialEq, Clone, Debug)]
7
+ #[derive(PartialEq)]
8
+ #[cfg_attr(test, derive(Clone))]
10
9
pub(crate) struct Dfa<R>
11
10
where
12
11
R: Ref,
13
12
{
14
13
pub(crate) transitions: Map<State, Transitions<R>>,
15
14
pub(crate) start: State,
16
- pub(crate) accepting : State,
15
+ pub(crate) accept : State,
17
16
}
18
17
19
18
#[derive(PartialEq, Clone, Debug)]
@@ -34,35 +33,15 @@ where
34
33
}
35
34
}
36
35
37
- impl<R> Transitions<R>
38
- where
39
- R: Ref,
40
- {
41
- #[cfg(test)]
42
- fn insert(&mut self, transition: Transition<R>, state: State) {
43
- match transition {
44
- Transition::Byte(b) => {
45
- self.byte_transitions.insert(b, state);
46
- }
47
- Transition::Ref(r) => {
48
- self.ref_transitions.insert(r, state);
49
- }
50
- }
51
- }
52
- }
53
-
54
- /// The states in a `Nfa` represent byte offsets.
36
+ /// The states in a [`Dfa`] represent byte offsets.
55
37
#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
56
- pub(crate) struct State(u32);
38
+ pub(crate) struct State(pub(crate) u32);
57
39
58
- #[cfg(test)]
59
- #[derive(Hash, Eq, PartialEq, Clone, Copy)]
60
- pub(crate) enum Transition<R>
61
- where
62
- R: Ref,
63
- {
64
- Byte(Byte),
65
- Ref(R),
40
+ impl State {
41
+ pub(crate) fn new() -> Self {
42
+ static COUNTER: AtomicU32 = AtomicU32::new(0);
43
+ Self(COUNTER.fetch_add(1, Ordering::SeqCst))
44
+ }
66
45
}
67
46
68
47
impl fmt::Debug for State {
@@ -71,19 +50,6 @@ impl fmt::Debug for State {
71
50
}
72
51
}
73
52
74
- #[cfg(test)]
75
- impl<R> fmt::Debug for Transition<R>
76
- where
77
- R: Ref,
78
- {
79
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80
- match &self {
81
- Self::Byte(b) => b.fmt(f),
82
- Self::Ref(r) => r.fmt(f),
83
- }
84
- }
85
- }
86
-
87
53
impl<R> Dfa<R>
88
54
where
89
55
R: Ref,
@@ -92,60 +58,167 @@ where
92
58
pub(crate) fn bool() -> Self {
93
59
let mut transitions: Map<State, Transitions<R>> = Map::default();
94
60
let start = State::new();
95
- let accepting = State::new();
61
+ let accept = State::new();
96
62
97
- transitions.entry(start).or_default().insert(Transition:: Byte(Byte ::Init(0x00)), accepting );
63
+ transitions.entry(start).or_default().byte_transitions. insert(Byte::Init(0x00), accept );
98
64
99
- transitions.entry(start).or_default().insert(Transition:: Byte(Byte ::Init(0x01)), accepting );
65
+ transitions.entry(start).or_default().byte_transitions. insert(Byte::Init(0x01), accept );
100
66
101
- Self { transitions, start, accepting }
67
+ Self { transitions, start, accept }
102
68
}
103
69
104
- #[instrument(level = "debug")]
105
- pub(crate) fn from_nfa(nfa: Nfa<R>) -> Self {
106
- let Nfa { transitions: nfa_transitions, start: nfa_start, accepting: nfa_accepting } = nfa;
70
+ pub(crate) fn unit() -> Self {
71
+ let transitions: Map<State, Transitions<R>> = Map::default();
72
+ let start = State::new();
73
+ let accept = start;
74
+
75
+ Self { transitions, start, accept }
76
+ }
107
77
108
- let mut dfa_transitions: Map<State, Transitions<R>> = Map::default();
109
- let mut nfa_to_dfa : Map<nfa:: State, State > = Map::default();
110
- let dfa_start = State::new();
111
- nfa_to_dfa.insert(nfa_start, dfa_start );
78
+ pub(crate) fn from_byte(byte: Byte) -> Self {
79
+ let mut transitions : Map<State, Transitions<R> > = Map::default();
80
+ let start = State::new();
81
+ let accept = State::new( );
112
82
113
- let mut queue = vec![(nfa_start, dfa_start)] ;
83
+ transitions.entry(start).or_default().byte_transitions.insert(byte, accept) ;
114
84
115
- while let Some((nfa_state, dfa_state)) = queue.pop() {
116
- if nfa_state == nfa_accepting {
117
- continue;
118
- }
85
+ Self { transitions, start, accept }
86
+ }
119
87
120
- for (nfa_transition, next_nfa_states) in nfa_transitions[&nfa_state].iter() {
121
- let dfa_transitions =
122
- dfa_transitions.entry(dfa_state).or_insert_with(Default::default);
123
-
124
- let mapped_state = next_nfa_states.iter().find_map(|x| nfa_to_dfa.get(x).copied());
125
-
126
- let next_dfa_state = match nfa_transition {
127
- &nfa::Transition::Byte(b) => *dfa_transitions
128
- .byte_transitions
129
- .entry(b)
130
- .or_insert_with(|| mapped_state.unwrap_or_else(State::new)),
131
- &nfa::Transition::Ref(r) => *dfa_transitions
132
- .ref_transitions
133
- .entry(r)
134
- .or_insert_with(|| mapped_state.unwrap_or_else(State::new)),
135
- };
136
-
137
- for &next_nfa_state in next_nfa_states {
138
- nfa_to_dfa.entry(next_nfa_state).or_insert_with(|| {
139
- queue.push((next_nfa_state, next_dfa_state));
140
- next_dfa_state
141
- });
88
+ pub(crate) fn from_ref(r: R) -> Self {
89
+ let mut transitions: Map<State, Transitions<R>> = Map::default();
90
+ let start = State::new();
91
+ let accept = State::new();
92
+
93
+ transitions.entry(start).or_default().ref_transitions.insert(r, accept);
94
+
95
+ Self { transitions, start, accept }
96
+ }
97
+
98
+ pub(crate) fn from_tree(tree: Tree<!, R>) -> Result<Self, Uninhabited> {
99
+ Ok(match tree {
100
+ Tree::Byte(b) => Self::from_byte(b),
101
+ Tree::Ref(r) => Self::from_ref(r),
102
+ Tree::Alt(alts) => {
103
+ // Convert and filter the inhabited alternatives.
104
+ let mut alts = alts.into_iter().map(Self::from_tree).filter_map(Result::ok);
105
+ // If there are no alternatives, return `Uninhabited`.
106
+ let dfa = alts.next().ok_or(Uninhabited)?;
107
+ // Combine the remaining alternatives with `dfa`.
108
+ alts.fold(dfa, |dfa, alt| dfa.union(alt, State::new))
109
+ }
110
+ Tree::Seq(elts) => {
111
+ let mut dfa = Self::unit();
112
+ for elt in elts.into_iter().map(Self::from_tree) {
113
+ dfa = dfa.concat(elt?);
142
114
}
115
+ dfa
143
116
}
117
+ })
118
+ }
119
+
120
+ /// Concatenate two `Dfa`s.
121
+ pub(crate) fn concat(self, other: Self) -> Self {
122
+ if self.start == self.accept {
123
+ return other;
124
+ } else if other.start == other.accept {
125
+ return self;
144
126
}
145
127
146
- let dfa_accepting = nfa_to_dfa[&nfa_accepting];
128
+ let start = self.start;
129
+ let accept = other.accept;
130
+
131
+ let mut transitions: Map<State, Transitions<R>> = self.transitions;
147
132
148
- Self { transitions: dfa_transitions, start: dfa_start, accepting: dfa_accepting }
133
+ for (source, transition) in other.transitions {
134
+ let fix_state = |state| if state == other.start { self.accept } else { state };
135
+ let entry = transitions.entry(fix_state(source)).or_default();
136
+ for (edge, destination) in transition.byte_transitions {
137
+ entry.byte_transitions.insert(edge, fix_state(destination));
138
+ }
139
+ for (edge, destination) in transition.ref_transitions {
140
+ entry.ref_transitions.insert(edge, fix_state(destination));
141
+ }
142
+ }
143
+
144
+ Self { transitions, start, accept }
145
+ }
146
+
147
+ /// Compute the union of two `Dfa`s.
148
+ pub(crate) fn union(self, other: Self, mut new_state: impl FnMut() -> State) -> Self {
149
+ // We implement `union` by lazily initializing a set of states
150
+ // corresponding to the product of states in `self` and `other`, and
151
+ // then add transitions between these states that correspond to where
152
+ // they exist between `self` and `other`.
153
+
154
+ let a = self;
155
+ let b = other;
156
+
157
+ let accept = new_state();
158
+
159
+ let mut mapping: Map<(Option<State>, Option<State>), State> = Map::default();
160
+
161
+ let mut mapped = |(a_state, b_state)| {
162
+ if Some(a.accept) == a_state || Some(b.accept) == b_state {
163
+ // If either `a_state` or `b_state` are accepting, map to a
164
+ // common `accept` state.
165
+ accept
166
+ } else {
167
+ *mapping.entry((a_state, b_state)).or_insert_with(&mut new_state)
168
+ }
169
+ };
170
+
171
+ let start = mapped((Some(a.start), Some(b.start)));
172
+ let mut transitions: Map<State, Transitions<R>> = Map::default();
173
+ let mut queue = vec![(Some(a.start), Some(b.start))];
174
+ let empty_transitions = Transitions::default();
175
+
176
+ while let Some((a_src, b_src)) = queue.pop() {
177
+ let a_transitions =
178
+ a_src.and_then(|a_src| a.transitions.get(&a_src)).unwrap_or(&empty_transitions);
179
+ let b_transitions =
180
+ b_src.and_then(|b_src| b.transitions.get(&b_src)).unwrap_or(&empty_transitions);
181
+
182
+ let byte_transitions =
183
+ a_transitions.byte_transitions.keys().chain(b_transitions.byte_transitions.keys());
184
+
185
+ for byte_transition in byte_transitions {
186
+ let a_dst = a_transitions.byte_transitions.get(byte_transition).copied();
187
+ let b_dst = b_transitions.byte_transitions.get(byte_transition).copied();
188
+
189
+ assert!(a_dst.is_some() || b_dst.is_some());
190
+
191
+ let src = mapped((a_src, b_src));
192
+ let dst = mapped((a_dst, b_dst));
193
+
194
+ transitions.entry(src).or_default().byte_transitions.insert(*byte_transition, dst);
195
+
196
+ if !transitions.contains_key(&dst) {
197
+ queue.push((a_dst, b_dst))
198
+ }
199
+ }
200
+
201
+ let ref_transitions =
202
+ a_transitions.ref_transitions.keys().chain(b_transitions.ref_transitions.keys());
203
+
204
+ for ref_transition in ref_transitions {
205
+ let a_dst = a_transitions.ref_transitions.get(ref_transition).copied();
206
+ let b_dst = b_transitions.ref_transitions.get(ref_transition).copied();
207
+
208
+ assert!(a_dst.is_some() || b_dst.is_some());
209
+
210
+ let src = mapped((a_src, b_src));
211
+ let dst = mapped((a_dst, b_dst));
212
+
213
+ transitions.entry(src).or_default().ref_transitions.insert(*ref_transition, dst);
214
+
215
+ if !transitions.contains_key(&dst) {
216
+ queue.push((a_dst, b_dst))
217
+ }
218
+ }
219
+ }
220
+
221
+ Self { transitions, start, accept }
149
222
}
150
223
151
224
pub(crate) fn bytes_from(&self, start: State) -> Option<&Map<Byte, State>> {
@@ -159,24 +232,48 @@ where
159
232
pub(crate) fn refs_from(&self, start: State) -> Option<&Map<R, State>> {
160
233
Some(&self.transitions.get(&start)?.ref_transitions)
161
234
}
162
- }
163
235
164
- impl State {
165
- pub(crate) fn new() -> Self {
166
- static COUNTER: AtomicU32 = AtomicU32::new(0);
167
- Self(COUNTER.fetch_add(1, Ordering::SeqCst))
236
+ #[cfg(test)]
237
+ pub(crate) fn from_edges<B: Copy + Into<Byte>>(
238
+ start: u32,
239
+ accept: u32,
240
+ edges: &[(u32, B, u32)],
241
+ ) -> Self {
242
+ let start = State(start);
243
+ let accept = State(accept);
244
+ let mut transitions: Map<State, Transitions<R>> = Map::default();
245
+
246
+ for &(src, edge, dst) in edges {
247
+ let src = State(src);
248
+ let dst = State(dst);
249
+ let old = transitions.entry(src).or_default().byte_transitions.insert(edge.into(), dst);
250
+ assert!(old.is_none());
251
+ }
252
+
253
+ Self { start, accept, transitions }
168
254
}
169
255
}
170
256
171
- #[cfg(test)]
172
- impl<R> From<nfa::Transition<R>> for Transition <R>
257
+ /// Serialize the DFA using the Graphviz DOT format.
258
+ impl<R> fmt::Debug for Dfa <R>
173
259
where
174
260
R: Ref,
175
261
{
176
- fn from(nfa_transition: nfa::Transition<R>) -> Self {
177
- match nfa_transition {
178
- nfa::Transition::Byte(byte) => Transition::Byte(byte),
179
- nfa::Transition::Ref(r) => Transition::Ref(r),
262
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
263
+ writeln!(f, "digraph {{")?;
264
+ writeln!(f, " {:?} [shape = doublecircle]", self.start)?;
265
+ writeln!(f, " {:?} [shape = doublecircle]", self.accept)?;
266
+
267
+ for (src, transitions) in self.transitions.iter() {
268
+ for (t, dst) in transitions.byte_transitions.iter() {
269
+ writeln!(f, " {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
270
+ }
271
+
272
+ for (t, dst) in transitions.ref_transitions.iter() {
273
+ writeln!(f, " {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
274
+ }
180
275
}
276
+
277
+ writeln!(f, "}}")
181
278
}
182
279
}
0 commit comments