|
| 1 | +#ifndef TSVQ_H |
| 2 | +#define TSVQ_H |
| 3 | + |
| 4 | +#include "StdIncludes.h" |
| 5 | + |
| 6 | +#include "Node.h" |
| 7 | +#include "threadpool.hpp" |
| 8 | +#include "threadpool/size_policies.hpp" |
| 9 | + |
| 10 | +using namespace boost::threadpool; |
| 11 | + |
| 12 | +template <typename T, typename ClustererType, typename DistanceType, typename ProtoType> |
| 13 | +class TSVQ { |
| 14 | +private: |
| 15 | + // The order of this tree |
| 16 | + int _m; |
| 17 | + |
| 18 | + // The order of this tree |
| 19 | + int _depth; |
| 20 | + |
| 21 | + // The root of the tree. |
| 22 | + Node<T> *_root; |
| 23 | + |
| 24 | + ClustererType _clusterer; |
| 25 | + DistanceType _distF; |
| 26 | + ProtoType _protoF; |
| 27 | + |
| 28 | +public: |
| 29 | + TSVQ(int order, int depth, int maxiters) : _m(order), _depth(depth), |
| 30 | + _root(new Node<T>()) { |
| 31 | + _clusterer.setNumClusters(_m); |
| 32 | + _clusterer.setMaxIters(maxiters); |
| 33 | + } |
| 34 | + |
| 35 | + int getClusterCount() { |
| 36 | + return clusterCount(_root); |
| 37 | + } |
| 38 | + |
| 39 | + int getObjCount() { |
| 40 | + return objCount(_root); |
| 41 | + } |
| 42 | + |
| 43 | + int getLevelCount() { |
| 44 | + return levelCount(_root); |
| 45 | + } |
| 46 | + |
| 47 | + int getMaxLevelCount() { |
| 48 | + return maxLevelCount(_root); |
| 49 | + } |
| 50 | + |
| 51 | + int getMinLevelCount() { |
| 52 | + return minLevelCount(_root); |
| 53 | + } |
| 54 | + |
| 55 | + void printStats() { |
| 56 | + std::cout << "\nNumber of objects: " << getObjCount(); |
| 57 | + std::cout << "\nCluster count: " << getClusterCount(); |
| 58 | + std::cout << "\nLevel count (node 0): " << getLevelCount(); |
| 59 | + std::cout << "\nMax depth: " << getMaxLevelCount(); |
| 60 | + std::cout << "\nRMSE: " << getRMSE(); |
| 61 | + } |
| 62 | + |
| 63 | + void cluster(vector<T*> &data) { |
| 64 | + // make the root a leaf containing all data |
| 65 | + _root->addAll(data); |
| 66 | + cluster(_root, _depth); |
| 67 | + } |
| 68 | + |
| 69 | + void cluster(Node<T>* current, int depth) { |
| 70 | + if (depth == 1) { |
| 71 | + return; |
| 72 | + } else { |
| 73 | + vector<Cluster<T>*> clusters = _clusterer.cluster(current->getKeys()); |
| 74 | + current->clearKeysAndChildren(); |
| 75 | + for (Cluster<T>* c : clusters) { |
| 76 | + Node<T>* child = new Node<T>(); |
| 77 | + child->addAll(c->getNearestList()); |
| 78 | + current->add(c->getCentroid(), child); |
| 79 | + } |
| 80 | + for (Node<T>* n : current->getChildren()) { |
| 81 | + cluster(n, depth - 1); |
| 82 | + } |
| 83 | + } |
| 84 | + } |
| 85 | + |
| 86 | + double getRMSE() { |
| 87 | + return RMSE(); |
| 88 | + } |
| 89 | + |
| 90 | + |
| 91 | +private: |
| 92 | + |
| 93 | + double RMSE() { |
| 94 | + double RMSE = sumSquaredError(NULL, _root); |
| 95 | + int size = getObjCount(); |
| 96 | + RMSE /= size; |
| 97 | + RMSE = sqrt(RMSE); |
| 98 | + return RMSE; |
| 99 | + } |
| 100 | + |
| 101 | + double sumSquaredError(T* parentKey, Node<T> *child) { |
| 102 | + |
| 103 | + double distance = 0.0; |
| 104 | + double dis; |
| 105 | + |
| 106 | + if (child->isLeaf()) { |
| 107 | + vector<T*> &keys = child->getKeys(); |
| 108 | + for (T* key : keys) { |
| 109 | + dis = _distF(key, parentKey); |
| 110 | + distance += dis * dis; |
| 111 | + } |
| 112 | + } else { |
| 113 | + int numEntries = child->size(); |
| 114 | + |
| 115 | + vector<T*> &keys = child->getKeys(); |
| 116 | + vector<Node<T>*> &children = child->getChildren(); |
| 117 | + |
| 118 | + for (int i = 0; i < numEntries; i++) { |
| 119 | + distance += sumSquaredError(keys[i], children[i]); |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + return distance; |
| 124 | + } |
| 125 | + |
| 126 | + int objCount(Node<T>* current) { |
| 127 | + |
| 128 | + if (current->isLeaf()) { |
| 129 | + return current->size(); |
| 130 | + } else { |
| 131 | + int localCount = 0; |
| 132 | + vector<Node<T>*>& children = current->getChildren(); |
| 133 | + for (Node<T> *child : children) { |
| 134 | + localCount += objCount(child); |
| 135 | + |
| 136 | + } |
| 137 | + return localCount; |
| 138 | + } |
| 139 | + } |
| 140 | + |
| 141 | + int clusterCount(Node<T>* current) { |
| 142 | + if (current->isLeaf()) { |
| 143 | + return 1; |
| 144 | + } else { |
| 145 | + int localCount = 0; |
| 146 | + vector<Node<T>*>& children = current->getChildren(); |
| 147 | + for (Node<T> *child : children) { |
| 148 | + localCount += clusterCount(child); |
| 149 | + } |
| 150 | + return localCount; |
| 151 | + } |
| 152 | + } |
| 153 | + |
| 154 | + int levelCount(Node<T>* current) { |
| 155 | + if (current->isLeaf()) { |
| 156 | + return 1; |
| 157 | + } else { |
| 158 | + return levelCount(current->getChild(0)) + 1; |
| 159 | + } |
| 160 | + } |
| 161 | + |
| 162 | + int maxLevelCount(Node<T>* current) { |
| 163 | + if (current->isLeaf()) { |
| 164 | + return 1; |
| 165 | + } |
| 166 | + else { |
| 167 | + int count = 0; |
| 168 | + int maxCount = 0; |
| 169 | + vector<Node<T>*>& children = current->getChildren(); |
| 170 | + for (Node<T> *child : children) { |
| 171 | + count = maxLevelCount(child); |
| 172 | + if (count > maxCount) maxCount = count; |
| 173 | + } |
| 174 | + return maxCount + 1; |
| 175 | + } |
| 176 | + } |
| 177 | + |
| 178 | + int minLevelCount(Node<T>* current) { |
| 179 | + if (current->isLeaf()) { |
| 180 | + return 1; |
| 181 | + } |
| 182 | + else { |
| 183 | + int count = 0; |
| 184 | + int minCount = 0; |
| 185 | + vector<Node<T>*>& children = current->getChildren(); |
| 186 | + for (Node<T> *child : children) { |
| 187 | + count = maxLevelCount(child); |
| 188 | + if (count < minCount) minCount = count; |
| 189 | + } |
| 190 | + return minCount + 1; |
| 191 | + } |
| 192 | + } |
| 193 | + |
| 194 | + |
| 195 | +}; |
| 196 | + |
| 197 | + |
| 198 | +#endif /* TSVQ_H */ |
| 199 | + |
0 commit comments