Skip to content

Commit 4370da6

Browse files
authored
attempt to fix some bugs of null pointers, 0 slopes, and 1 expansion factors (#29)
1. In function "expand_root" (alex.h), all pointers must be assigned to resolve issue #26 . 2. The case that the slope is equal to 0 must be carefully check to prevent emergence of infinite. 3. In function "significant_cost_deviation" and "catastrophic_cost" (alex_nodes.h), if the slope is equal to 0, keys of the data node are equal and should not be split. Otherwise, one key cannot be mapped to different data nodes. 4. In function "expand_root" (alex.h), the computation of expansion factor should be more precise for long long int, especially for "ceil" function, or expansion factor may be equal to 1. Co-authored-by: Zhaoyan Sun <[email protected]>
1 parent 7f4cc98 commit 4370da6

File tree

4 files changed

+63
-32
lines changed

4 files changed

+63
-32
lines changed

src/core/alex.h

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,10 +1362,16 @@ class Alex {
13621362
T new_domain_max = istats_.key_domain_max_;
13631363
data_node_type* outermost_node;
13641364
if (expand_left) {
1365-
auto key_difference = static_cast<double>(istats_.key_domain_min_ -
1365+
if constexpr (std::is_integral<T>::value){
1366+
T key_difference = istats_.key_domain_min_ - std::min(key, get_min_key());
1367+
expansion_factor = pow_2_round_up((key_difference + domain_size - 1) / domain_size + 1);
1368+
}
1369+
else{
1370+
auto key_difference = static_cast<double>(istats_.key_domain_min_ -
13661371
std::min(key, get_min_key()));
1367-
expansion_factor = pow_2_round_up(static_cast<int>(
1368-
std::ceil((key_difference + domain_size) / domain_size)));
1372+
expansion_factor = pow_2_round_up(static_cast<int>(
1373+
std::ceil((key_difference + domain_size) / domain_size)));
1374+
}
13691375
// Check for overflow. To avoid overflow on signed types while doing
13701376
// this check, we do comparisons using half of the relevant quantities.
13711377
T half_expandable_domain =
@@ -1382,10 +1388,16 @@ class Alex {
13821388
istats_.num_keys_below_key_domain = 0;
13831389
outermost_node = first_data_node();
13841390
} else {
1385-
auto key_difference = static_cast<double>(std::max(key, get_max_key()) -
1391+
if constexpr (std::is_integral<T>::value){
1392+
T key_difference = std::max(key, get_max_key()) - istats_.key_domain_max_;
1393+
expansion_factor = pow_2_round_up((key_difference + domain_size - 1) / domain_size + 1);
1394+
}
1395+
else{
1396+
auto key_difference = static_cast<double>(std::max(key, get_max_key()) -
13861397
istats_.key_domain_max_);
1387-
expansion_factor = pow_2_round_up(static_cast<int>(
1388-
std::ceil((key_difference + domain_size) / domain_size)));
1398+
expansion_factor = pow_2_round_up(static_cast<int>(
1399+
std::ceil((key_difference + domain_size) / domain_size)));
1400+
}
13891401
// Check for overflow. To avoid overflow on signed types while doing
13901402
// this check, we do comparisons using half of the relevant quantities.
13911403
T half_expandable_domain =
@@ -1483,10 +1495,10 @@ class Alex {
14831495
int left_boundary = outermost_node->lower_bound(left_boundary_value);
14841496
data_node_type* next = outermost_node;
14851497
for (int i = new_nodes_end; i > new_nodes_start; i -= n) {
1486-
if (i <= in_bounds_new_nodes_start) {
1487-
// Do not initialize nodes that fall outside the key type's domain
1488-
break;
1489-
}
1498+
// if (i <= in_bounds_new_nodes_start) {
1499+
// // Do not initialize nodes that fall outside the key type's domain
1500+
// break;
1501+
// }
14901502
int right_boundary = left_boundary;
14911503
if (i - n <= in_bounds_new_nodes_start) {
14921504
left_boundary = 0;
@@ -1512,10 +1524,10 @@ class Alex {
15121524
int right_boundary = outermost_node->lower_bound(right_boundary_value);
15131525
data_node_type* prev = nullptr;
15141526
for (int i = new_nodes_start; i < new_nodes_end; i += n) {
1515-
if (i >= in_bounds_new_nodes_end) {
1516-
// Do not initialize nodes that fall outside the key type's domain
1517-
break;
1518-
}
1527+
// if (i >= in_bounds_new_nodes_end) {
1528+
// // Do not initialize nodes that fall outside the key type's domain
1529+
// break;
1530+
// }
15191531
int left_boundary = right_boundary;
15201532
if (i + n >= in_bounds_new_nodes_end) {
15211533
right_boundary = outermost_node->data_capacity_;
@@ -1585,13 +1597,19 @@ class Alex {
15851597
bucketID - (bucketID % repeats); // first bucket with same child
15861598
int end_bucketID =
15871599
start_bucketID + repeats; // first bucket with different child
1588-
double left_boundary_value =
1589-
(start_bucketID - parent->model_.b_) / parent->model_.a_;
1590-
double right_boundary_value =
1591-
(end_bucketID - parent->model_.b_) / parent->model_.a_;
1592-
new_node->model_.a_ =
1593-
1.0 / (right_boundary_value - left_boundary_value) * fanout;
1594-
new_node->model_.b_ = -new_node->model_.a_ * left_boundary_value;
1600+
if (parent->model_.a_ == 0){
1601+
new_node->model_.a_ = 0;
1602+
new_node->model_.b_ = -1.0 * (start_bucketID - parent->model_.b_) / repeats;
1603+
}
1604+
else{
1605+
double left_boundary_value =
1606+
(start_bucketID - parent->model_.b_) / parent->model_.a_;
1607+
double right_boundary_value =
1608+
(end_bucketID - parent->model_.b_) / parent->model_.a_;
1609+
new_node->model_.a_ =
1610+
1.0 / (right_boundary_value - left_boundary_value) * fanout;
1611+
new_node->model_.b_ = -new_node->model_.a_ * left_boundary_value;
1612+
}
15951613

15961614
// Create new data nodes
15971615
if (used_fanout_tree_nodes.empty()) {

src/core/alex_base.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,14 @@ class LinearModelBuilder {
127127

128128
// If floating point precision errors, fit spline
129129
if (model_->a_ <= 0) {
130-
model_->a_ = (y_max_ - y_min_) / (x_max_ - x_min_);
131-
model_->b_ = -static_cast<double>(x_min_) * model_->a_;
130+
if (x_max_ - x_min_ == 0){
131+
model_->a_ = 0;
132+
model_->b_ = static_cast<double>(y_sum_) / count_;
133+
}
134+
else{
135+
model_->a_ = (y_max_ - y_min_) / (x_max_ - x_min_);
136+
model_->b_ = -static_cast<double>(x_min_) * model_->a_;
137+
}
132138
}
133139
}
134140

src/core/alex_fanout_tree.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -362,13 +362,19 @@ int find_best_fanout_existing_node(const AlexModelNode<T, P>* parent,
362362
bucketID - (bucketID % repeats); // first bucket with same child
363363
int end_bucketID =
364364
start_bucketID + repeats; // first bucket with different child
365-
double left_boundary_value =
366-
(start_bucketID - parent->model_.b_) / parent->model_.a_;
367-
double right_boundary_value =
368-
(end_bucketID - parent->model_.b_) / parent->model_.a_;
369365
LinearModel<T> base_model;
370-
base_model.a_ = 1.0 / (right_boundary_value - left_boundary_value);
371-
base_model.b_ = -1.0 * base_model.a_ * left_boundary_value;
366+
if (parent->model_.a_ == 0){
367+
base_model.a_ = 0;
368+
base_model.b_ = -1.0 * (start_bucketID - parent->model_.b_) / repeats;
369+
}
370+
else{
371+
double left_boundary_value =
372+
(start_bucketID - parent->model_.b_) / parent->model_.a_;
373+
double right_boundary_value =
374+
(end_bucketID - parent->model_.b_) / parent->model_.a_;
375+
base_model.a_ = 1.0 / (right_boundary_value - left_boundary_value);
376+
base_model.b_ = -1.0 * base_model.a_ * left_boundary_value;
377+
}
372378

373379
for (int fanout = 1, fanout_tree_level = 0; fanout <= max_fanout;
374380
fanout *= 2, fanout_tree_level++) {

src/core/alex_nodes.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,8 @@ class AlexDataNode : public AlexNode<T, P> {
14101410
}
14111411
builder.build();
14121412

1413-
double rel_change_in_a = std::abs((model->a_ - prev_a) / prev_a);
1413+
double rel_change_in_a = prev_a == 0 ? (model->a_ != 0)
1414+
: std::abs((model->a_ - prev_a) / prev_a);
14141415
double abs_change_in_b = std::abs(model->b_ - prev_b);
14151416
double rel_change_in_b = std::abs(abs_change_in_b / prev_b);
14161417
if (verbose) {
@@ -1659,14 +1660,14 @@ class AlexDataNode : public AlexNode<T, P> {
16591660
// splitting
16601661
inline bool significant_cost_deviation() const {
16611662
double emp_cost = empirical_cost();
1662-
return emp_cost > kNodeLookupsWeight && emp_cost > 1.5 * this->cost_;
1663+
return this->model_.a_ != 0 && emp_cost > kNodeLookupsWeight && emp_cost > 1.5 * this->cost_;
16631664
}
16641665

16651666
// Returns true if cost is catastrophically high and we want to force a split
16661667
// The heuristic for this is if the number of shifts per insert (expected or
16671668
// empirical) is over 100
16681669
inline bool catastrophic_cost() const {
1669-
return shifts_per_insert() > 100 || expected_avg_shifts_ > 100;
1670+
return this->model_.a_ != 0 && shifts_per_insert() > 100 || expected_avg_shifts_ > 100;
16701671
}
16711672

16721673
// First value in returned pair is fail flag:

0 commit comments

Comments
 (0)