Skip to content

Commit 144f43c

Browse files
committed
WIP - add: trie
1 parent 24ce90b commit 144f43c

File tree

5 files changed

+448
-0
lines changed

5 files changed

+448
-0
lines changed

include/ext/meta/concepts.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Copyright - 2020 - Jan Christoph Uhde <[email protected]>
2+
#ifndef EXT_META_CONCEPTS_HEADER
3+
#define EXT_META_CONCEPTS_HEADER
4+
#include <concepts>
5+
6+
namespace ext::meta {
7+
8+
template <typename Container>
9+
concept Associative = requires(Container cont) {
10+
typename Container::key_type;
11+
typename Container::mapped_type;
12+
{ cont.begin() } -> std::same_as<typename Container::iterator>;
13+
{ cont.end() } -> std::same_as<typename Container::iterator>;
14+
};
15+
16+
} // namespace ext::meta
17+
#endif // EXT_META_CONCEPTS_HEADER

include/ext/structures/trie.hpp

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
// Copyright - Jan Christoph Uhde <[email protected]>
2+
#ifndef EXT_STRUCTURES_TRIE_HEADER
3+
#define EXT_STRUCTURES_TRIE_HEADER
4+
5+
#include <cstdint>
6+
#include <map>
7+
#include <memory>
8+
#include <optional>
9+
#include <vector>
10+
11+
#include <ext/meta/concepts.hpp>
12+
13+
namespace ext::structures {
14+
namespace detail_trie {
15+
16+
inline std::vector<char> str2vec(std::string const& str) {
17+
std::vector<char> rv;
18+
for (auto const& c : str) rv.push_back(c);
19+
return rv;
20+
}
21+
22+
template <typename T>
23+
std::string vec2str(std::vector<T> const& vec) {
24+
std::string rv;
25+
for (auto const& c : vec) rv.push_back(c);
26+
return rv;
27+
}
28+
29+
template <typename Key, typename Value>
30+
Value* find_or_null(std::unique_ptr<std::map<Key, Value>>& map, Key const& key) {
31+
if (map == nullptr)
32+
return nullptr;
33+
auto it = map->find(key);
34+
return it != map->end() ? &(it->second) : nullptr;
35+
}
36+
37+
// node in trie
38+
template <typename Label, typename Value>
39+
class node {
40+
public:
41+
using value_t = Value;
42+
using label_t = Label;
43+
using key_t = std::vector<label_t>;
44+
using children_t = std::map<label_t, std::unique_ptr<node>>;
45+
46+
key_t prefix;
47+
std::unique_ptr<children_t> children;
48+
std::unique_ptr<value_t> value;
49+
bool is_word = false;
50+
51+
void ensure_children() {
52+
if (children == nullptr)
53+
this->children = std::make_unique<children_t>();
54+
}
55+
56+
void assign_data(key_t& prefix, std::unique_ptr<value_t>& value, bool isw = true) {
57+
this->prefix = std::move(prefix);
58+
this->is_word = isw;
59+
this->value = std::move(value);
60+
}
61+
62+
std::pair<node*, bool>
63+
insert_node(label_t const& label, key_t& prefix, std::unique_ptr<value_t>& value, bool is_word) {
64+
auto new_node = std::make_unique<node>();
65+
ensure_children();
66+
auto [it, ok] = this->children->try_emplace(label, std::move(new_node));
67+
if (!ok)
68+
return {this, false};
69+
auto rv = it->second.get();
70+
rv->assign_data(prefix, value, is_word);
71+
return {rv, true};
72+
}
73+
};
74+
75+
// description of how a node will be splitted
76+
template <typename Key>
77+
struct split {
78+
using key_t = Key;
79+
using label_t = typename key_t::value_type;
80+
81+
key_t parent_prefix;
82+
std::optional<label_t> split_label;
83+
key_t split_prefix;
84+
std::optional<label_t> insert_label;
85+
key_t insert_prefix;
86+
};
87+
88+
template <typename Key>
89+
[[nodiscard]] constexpr std::size_t find_split_point(Key const& a, Key const& b) {
90+
std::size_t rv = 0;
91+
while (rv < std::min(a.size(), b.size())) {
92+
if (a[rv] == b[rv])
93+
++rv;
94+
else
95+
break;
96+
}
97+
return rv;
98+
}
99+
100+
// function to calculate the spilts
101+
template <typename Key>
102+
[[nodiscard]] split<Key> split_info(Key const& parent_prefix, Key const& insert_prefix) {
103+
std::size_t sp = find_split_point(parent_prefix, insert_prefix);
104+
105+
auto in_beg = insert_prefix.begin();
106+
auto in_sp = in_beg + sp;
107+
auto in_sp_cpy = in_sp;
108+
auto in_end = insert_prefix.end();
109+
110+
auto pa_beg = parent_prefix.begin();
111+
auto pa_sp = pa_beg + sp;
112+
auto pa_end = parent_prefix.end();
113+
114+
std::optional<typename Key::value_type> split_label;
115+
if (pa_sp != pa_end)
116+
split_label = *(pa_sp++);
117+
118+
std::optional<typename Key::value_type> insert_label;
119+
if (in_sp != in_end)
120+
insert_label = *(in_sp++);
121+
122+
return split<Key>{
123+
.parent_prefix = Key(in_beg, in_sp_cpy),
124+
.split_label = std::move(split_label),
125+
.split_prefix = Key(pa_sp, pa_end),
126+
.insert_label = std::move(insert_label),
127+
.insert_prefix = Key(in_sp, in_end),
128+
};
129+
}
130+
131+
132+
} // namespace detail_trie
133+
134+
template <typename Label, typename Value = std::uint8_t>
135+
class trie {
136+
public:
137+
using label_t = Label;
138+
using value_t = Value;
139+
140+
using node_t = detail_trie::node<label_t, value_t>;
141+
using key_t = typename node_t::key_t;
142+
using children_t = typename node_t::children_t;
143+
144+
[[nodiscard]] std::pair<node_t*, bool> insert(key_t const& key, std::unique_ptr<value_t> value = nullptr) {
145+
using namespace detail_trie;
146+
147+
auto [insert_parent, insert_prefix] = find_insert_parent(root.get(), key);
148+
if (insert_parent == nullptr)
149+
return {nullptr, false};
150+
151+
auto& ip = *insert_parent;
152+
153+
if (insert_prefix.empty()) {
154+
// must be inserted into ip
155+
if (ip.is_word)
156+
return {insert_parent, false};
157+
158+
ip.assign_data(insert_prefix, value);
159+
return {insert_parent, true};
160+
} else if (!ip.is_word && ip.prefix.empty() && (ip.children == nullptr || ip.children->empty())) [[unlikely]] {
161+
ip.assign_data(insert_prefix, value);
162+
return {insert_parent, true};
163+
}
164+
165+
split<key_t> si = split_info(insert_parent->prefix, insert_prefix);
166+
if (!si.insert_label.has_value())
167+
return {insert_parent, false};
168+
169+
// create split if neccessary
170+
if (si.split_label.has_value()) {
171+
auto split_children = std::move(ip.children);
172+
ip.children = std::make_unique<children_t>();
173+
174+
auto [split_node, ok] = ip.insert_node(si.split_label.value(), si.split_prefix, ip.value, ip.is_word);
175+
if (!ok) {
176+
ip.children = std::move(ip.children); // restore state
177+
throw std::logic_error("this insert into the trie not fail");
178+
}
179+
180+
split_node->children = std::move(split_children);
181+
ip.prefix = si.parent_prefix;
182+
ip.is_word = false;
183+
ip.value = nullptr;
184+
}
185+
186+
return ip.insert_node(si.insert_label.value(), si.insert_prefix, value, true);
187+
}
188+
189+
#ifndef EXT_TEST
190+
private:
191+
#endif
192+
193+
[[nodiscard]] static std::pair<node_t*, key_t> find_insert_parent(node_t* start, key_t key) {
194+
std::pair<node_t*, key_t> rv(start, {});
195+
196+
if (start == nullptr || key.empty())
197+
return rv;
198+
199+
auto key_it = key.begin();
200+
auto key_end = key.end();
201+
do {
202+
// first check the prefix
203+
// return if the key to insert is shorter or does not match
204+
{
205+
auto key_it_copy = key_it; // use this
206+
for (auto const& label : rv.first->prefix) {
207+
if (key_it == key_end || label != *key_it) {
208+
std::move(key_it_copy, key_end, std::back_inserter(rv.second));
209+
return rv;
210+
}
211+
++key_it;
212+
}
213+
}
214+
// check if there is a child that matches or break
215+
{
216+
auto next = find_or_null(rv.first->children, *key_it);
217+
if (next != nullptr) {
218+
rv.first = next->get();
219+
++key_it;
220+
} else
221+
break;
222+
}
223+
} while (key_it != key_end);
224+
225+
std::move(key_it, key_end, std::back_inserter(rv.second));
226+
return rv;
227+
}
228+
229+
// member vars
230+
std::unique_ptr<node_t> root = std::make_unique<node_t>();
231+
};
232+
233+
} // namespace ext::structures
234+
#endif

include_files.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ set(ext-basics-header
77
"include/ext/memory/tagged_pointer.hpp"
88
"include/ext/meta/basic.hpp"
99
"include/ext/meta/basic_old.hpp"
10+
"include/ext/meta/concepts.hpp"
1011
"include/ext/structures/binary_index_tree.hpp"
1112
"include/ext/structures/lru_cache.hpp"
13+
"include/ext/structures/trie.hpp"
1214
"include/ext/util/basic.hpp"
1315
"include/ext/util/bit_tricks.hpp"
1416
"include/ext/util/cast.hpp"

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ set(test-files
2121

2222
"structures_binary_index_tree"
2323
"structures_lru_cache"
24+
"structures_trie"
2425

2526
"util_basic"
2627
"util_cast"

0 commit comments

Comments
 (0)