1+ #ifndef EMP_OEE_STATS_H
2+ #define EMP_OEE_STATS_H
3+
4+ #include " Systematics.h"
5+ #include " bloom_filter.hpp"
6+ #include " base/vector.h"
7+ #include " base/Ptr.h"
8+ #include " tools/set_utils.h"
9+ #include " tools/vector_utils.h"
10+
11+ #include < deque>
12+
13+ namespace emp {
14+
15+ // Setup possible types for keeping track of what we've seen for novelty
16+
17+ template <typename SKEL_TYPE>
18+ class SeenSet {
19+ std::set<SKEL_TYPE> s;
20+ public:
21+ using skel_t = SKEL_TYPE;
22+ // Placeholders to ensure that constructor signature is same as bloom filter
23+ SeenSet (int placeholder_1 = 200000 , double placeholder_2 = 0.0001 ) { ; }
24+ void insert (const skel_t & val) {s.insert (val);}
25+ bool contains (const skel_t & val) {return Has (s, val);}
26+ };
27+
28+ class SeenBloomFilter {
29+ bloom_filter b;
30+
31+ public:
32+
33+ using skel_t = std::string;
34+ SeenBloomFilter (int bloom_count = 200000 , double false_positive = 0.0001 ) {
35+ bloom_parameters parameters;
36+
37+ // How many elements roughly do we expect to insert?
38+ parameters.projected_element_count = bloom_count;
39+
40+ // Maximum tolerable false positive probability? (0,1)
41+ parameters.false_positive_probability = false_positive;
42+
43+ if (!parameters)
44+ {
45+ std::cout << " Error - Invalid set of bloom filter parameters!" << std::endl;
46+ }
47+
48+ parameters.compute_optimal_parameters ();
49+ b = bloom_filter (parameters);
50+ }
51+
52+ void insert (const skel_t & val) {b.insert (val);}
53+ bool contains (const skel_t & val) {return b.contains (val);}
54+ };
55+
56+
57+ template <typename SYSTEMATICS_TYPE, typename SKEL_TYPE = typename SYSTEMATICS_TYPE::info_t , typename SEEN_TYPE = SeenSet<SKEL_TYPE>>
58+ class OEETracker {
59+ private:
60+ using systematics_t = SYSTEMATICS_TYPE;
61+ using taxon_t = typename systematics_t ::taxon_t ;
62+ using info_t = typename systematics_t ::info_t ;
63+ using hash_t = typename Ptr<taxon_t >::hash_t ;
64+ using fun_calc_complexity_t = std::function<double (const SKEL_TYPE&)>;
65+ using fun_calc_data_t = std::function<SKEL_TYPE(info_t &)>; // TODO: Allow other skeleton types
66+
67+ struct snapshot_info_t {
68+ Ptr<taxon_t > taxon = nullptr ; // This is what the systematics manager has
69+ Ptr<SKEL_TYPE> skel = nullptr ;
70+ int count = 0 ; // Count of this taxon at time of snapshot
71+
72+ ~snapshot_info_t () {if (skel){skel.Delete ();}}
73+ // bool operator==(const snapshot_info_t & other) const {return other.taxon == taxon;}
74+ };
75+
76+ std::deque<emp::vector<snapshot_info_t >> snapshots;
77+ std::deque<int > snapshot_times;
78+ Ptr<systematics_t > systematics_manager;
79+
80+ std::map<SKEL_TYPE, int > prev_coal_set;
81+ // std::unordered_set<SKEL_TYPE> seen;
82+
83+ fun_calc_data_t skeleton_fun;
84+ fun_calc_complexity_t complexity_fun;
85+ int generation_interval = 10 ;
86+ int resolution = 10 ;
87+
88+ DataManager<double , data::Current, data::Info> data_nodes;
89+ SEEN_TYPE seen;
90+ bool prune_top;
91+
92+ public:
93+ OEETracker (Ptr<systematics_t > s, fun_calc_data_t d, fun_calc_complexity_t c, bool remove_top = false , int bloom_count = 200000 , double bloom_false_positive = .0001 ) :
94+ systematics_manager (s), skeleton_fun(d), complexity_fun(c), seen(bloom_count, bloom_false_positive), prune_top(remove_top) {
95+
96+ emp_assert (s->GetStoreAncestors (), " OEE tracker only works with systematics manager where store_ancestor is set to true" );
97+
98+ data_nodes.New (" change" );
99+ data_nodes.New (" novelty" );
100+ data_nodes.New (" diversity" );
101+ data_nodes.New (" complexity" );
102+
103+ }
104+
105+ ~OEETracker () {}
106+
107+ int GetResolution () const {return resolution;}
108+ int GetGenerationInterval () const {return generation_interval;}
109+
110+ void SetResolution (int r) {resolution = r;}
111+ void SetGenerationInterval (int g) {generation_interval = g;}
112+
113+ void Update (size_t gen, int ud = -1 ) {
114+ if (Mod ((int )gen, resolution) == 0 ) {
115+ if (ud == -1 ) {
116+ ud = gen;
117+ }
118+ auto & sys_active = systematics_manager->GetActive ();
119+
120+ snapshots.emplace_back (sys_active.size ());
121+ int i = 0 ;
122+ for (auto tax : sys_active) {
123+ snapshots.back ()[i].taxon = tax;
124+ info_t info = tax->GetInfo ();
125+ snapshots.back ()[i].skel .New (skeleton_fun (info));
126+ snapshots.back ()[i].count = tax->GetNumOrgs ();
127+ i++;
128+ }
129+
130+ snapshot_times.push_back (ud);
131+ if ((int )snapshots.size () > generation_interval/resolution + 1 ) {
132+ if (prune_top) {
133+ systematics_manager->RemoveBefore (snapshot_times.front () - 1 );
134+ }
135+ snapshot_times.pop_front ();
136+
137+ snapshots.pop_front ();
138+ }
139+ CalcStats (ud);
140+ }
141+ }
142+
143+ void CalcStats (size_t ud) {
144+ std::map<SKEL_TYPE, int > coal_set = CoalescenceFilter (ud);
145+ int change = 0 ;
146+ int novelty = 0 ;
147+ double most_complex = 0 ;
148+ double diversity = 0 ;
149+ if (coal_set.size () > 0 ) {
150+ diversity = Entropy (coal_set, [](std::pair<SKEL_TYPE, int > entry){return entry.second ;});
151+ }
152+
153+ for (auto & tax : coal_set) {
154+ if (!Has (prev_coal_set, tax.first )) {
155+ change++;
156+ }
157+ if (!seen.contains (tax.first )) {
158+ novelty++;
159+ seen.insert (tax.first );
160+ }
161+ double complexity = complexity_fun (tax.first );
162+ if (complexity > most_complex) {
163+ most_complex = complexity;
164+ }
165+ }
166+
167+ data_nodes.Get (" change" ).Add (change);
168+ data_nodes.Get (" novelty" ).Add (novelty);
169+ data_nodes.Get (" diversity" ).Add (diversity);
170+ data_nodes.Get (" complexity" ).Add (most_complex);
171+
172+ std::swap (prev_coal_set, coal_set);
173+ }
174+
175+ std::map<SKEL_TYPE, int > CoalescenceFilter (size_t ud) {
176+
177+ emp_assert (emp::Mod (generation_interval, resolution) == 0 , " Generation interval must be a multiple of resolution" , generation_interval, resolution);
178+
179+ std::map<SKEL_TYPE, int > res;
180+
181+ if ((int )snapshots.size () <= generation_interval/resolution) {
182+ return res;
183+ }
184+
185+ std::set<Ptr<taxon_t >> extant_canopy_roots = systematics_manager->GetCanopyExtantRoots (snapshot_times.front ());
186+ for ( snapshot_info_t & t : snapshots.front ()) {
187+ if (Has (extant_canopy_roots, t.taxon )) {
188+ if (Has (res, *(t.skel ))) {
189+ res[*(t.skel )] += t.count ;
190+ } else {
191+ res[*(t.skel )] = t.count ;
192+ }
193+ }
194+ }
195+
196+ return res;
197+ }
198+
199+
200+ Ptr<DataNode<double , data::Current, data::Info>> GetDataNode (const std::string & name) {
201+ return &(data_nodes.Get (name));
202+ }
203+
204+ };
205+
206+ // Helper function for skeletonization when organism is a sequence of
207+
208+ // Assumes org is sequence of inst_type
209+ template <typename ORG_TYPE, typename INST_TYPE>
210+ emp::vector<INST_TYPE> Skeletonize (ORG_TYPE & org, const INST_TYPE null_value, std::function<double (ORG_TYPE&)> fit_fun) {
211+ emp_assert (org.size () > 0 , " Empty org passed to skeletonize" );
212+
213+ emp::vector<INST_TYPE> skeleton;
214+ // Some fitness functions may require the org to be const and smoe may require it to not be
215+ // We can let the compiler deducce whetehr ORG_TYPE is const or not.
216+ // But the test org really needs to not be const
217+ typename std::remove_const<ORG_TYPE>::type test_org = ORG_TYPE (org);
218+ double fitness = fit_fun (test_org);
219+
220+ for (int i = 0 ; i < (int )org.size (); i++) {
221+ test_org[i] = null_value;
222+ double new_fitness = fit_fun (test_org);
223+ if (new_fitness < fitness) {
224+ skeleton.push_back (org[i]);
225+ }
226+ test_org[i] = org[i];
227+ }
228+
229+ return skeleton;
230+ }
231+
232+
233+ }
234+
235+ #endif
0 commit comments