Skip to content

Commit 3d9e5b2

Browse files
AhoCorasick: Track generation nodes are added
1 parent 29d98d3 commit 3d9e5b2

3 files changed

Lines changed: 38 additions & 8 deletions

File tree

include/libsemigroups/detail/aho-corasick-impl.hpp

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ namespace libsemigroups {
6565
// Private data
6666
////////////////////////////////////////////////////////////////////////
6767
private:
68+
mutable size_t _last_checked;
6869
uint32_t _height;
6970
index_type _link;
7071
index_type _parent;
@@ -97,6 +98,10 @@ namespace libsemigroups {
9798
// Getters - public
9899
////////////////////////////////////////////////////////////////////////
99100

101+
[[nodiscard]] size_t last_checked() const noexcept {
102+
return _last_checked;
103+
}
104+
100105
[[nodiscard]] size_t height() const noexcept {
101106
return _height;
102107
}
@@ -126,12 +131,22 @@ namespace libsemigroups {
126131
return _value;
127132
}
128133

134+
////////////////////////////////////////////////////////////////////////
135+
// Setters - public
136+
////////////////////////////////////////////////////////////////////////
137+
138+
Node const& last_checked(size_t val) const noexcept {
139+
_last_checked = val;
140+
return *this;
141+
}
142+
129143
private:
130144
////////////////////////////////////////////////////////////////////////
131145
// Setters - private
132146
////////////////////////////////////////////////////////////////////////
133147

134-
// All setters are private to avoid corrupting the objects.
148+
// All setters of non-mutable members are private to avoid corrupting
149+
// the objects.
135150

136151
Node const& height(size_t val) noexcept {
137152
_height = val;
@@ -162,6 +177,7 @@ namespace libsemigroups {
162177
std::vector<index_type> _inactive_nodes_index;
163178
std::vector<index_type> _node_indices_to_update;
164179
std::unordered_set<index_type> _terminal_nodes_index;
180+
mutable size_t _generation;
165181

166182
// TODO(1): it seems likely that the positions of the active nodes in
167183
// _all_nodes will become scattered and disordered over time, and so it'd
@@ -187,6 +203,14 @@ namespace libsemigroups {
187203

188204
~AhoCorasickImpl();
189205

206+
void increment_generation() const noexcept {
207+
++_generation;
208+
}
209+
210+
size_t generation() const noexcept {
211+
return _generation;
212+
}
213+
190214
size_t alphabet_size() const noexcept {
191215
return _children.number_of_cols();
192216
}
@@ -407,7 +431,7 @@ namespace libsemigroups {
407431
Word const& w);
408432

409433
} // namespace aho_corasick_impl
410-
} // namespace detail
434+
} // namespace detail
411435
} // namespace libsemigroups
412436

413437
#include "aho-corasick-impl.tpp"

include/libsemigroups/detail/aho-corasick-impl.tpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ namespace libsemigroups {
2929
LIBSEMIGROUPS_ASSERT(val != nullptr);
3030
index_type current = root;
3131
for (auto it = first; it != last; ++it) {
32+
_all_nodes[current].last_checked(generation());
3233
index_type next = _children.get(current, *it);
3334
if (next == UNDEFINED) {
3435
// index of next node added
@@ -40,7 +41,7 @@ namespace libsemigroups {
4041
if (inserted) {
4142
_all_nodes[current].value(val);
4243
}
43-
44+
_all_nodes[current].last_checked(generation());
4445
return {current, inserted};
4546
}
4647

@@ -309,5 +310,5 @@ namespace libsemigroups {
309310
}
310311

311312
} // namespace aho_corasick_impl
312-
} // namespace detail
313+
} // namespace detail
313314
} // namespace libsemigroups

src/detail/aho-corasick-impl.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ namespace libsemigroups {
3939
////////////////////////////////////////////////////////////////////////
4040

4141
AhoCorasickImpl::Node::Node(index_type parent, letter_type a)
42-
: _height(),
42+
: _last_checked(0),
43+
_height(),
4344
_link(),
4445
_parent(),
4546
_parent_letter(),
@@ -50,7 +51,8 @@ namespace libsemigroups {
5051

5152
typename AhoCorasickImpl::Node&
5253
AhoCorasickImpl::Node::init(index_type parent, letter_type a) noexcept {
53-
_height = parent == UNDEFINED ? 0 : UNDEFINED;
54+
_height = parent == UNDEFINED ? 0 : UNDEFINED;
55+
_last_checked = 0;
5456
if (_parent == root || _parent == UNDEFINED) {
5557
_link = root;
5658
} else {
@@ -77,7 +79,8 @@ namespace libsemigroups {
7779
_active_nodes_index({root}),
7880
_inactive_nodes_index(),
7981
_node_indices_to_update(),
80-
_terminal_nodes_index() {}
82+
_terminal_nodes_index(),
83+
_generation(0) {}
8184

8285
AhoCorasickImpl& AhoCorasickImpl::init() {
8386
init(0);
@@ -99,7 +102,8 @@ namespace libsemigroups {
99102
_active_nodes_index({root}),
100103
_inactive_nodes_index(),
101104
_node_indices_to_update(),
102-
_terminal_nodes_index() {}
105+
_terminal_nodes_index(),
106+
_generation(0){};
103107

104108
AhoCorasickImpl& AhoCorasickImpl::init(size_t num_letters) {
105109
LIBSEMIGROUPS_ASSERT(!_all_nodes.empty());
@@ -124,6 +128,7 @@ namespace libsemigroups {
124128
+ _inactive_nodes_index.size()
125129
== _all_nodes.size());
126130
LIBSEMIGROUPS_ASSERT(_children.number_of_rows() == _all_nodes.size());
131+
_generation = 0;
127132

128133
return *this;
129134
}

0 commit comments

Comments
 (0)