Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit 57b10c0

Browse files
committed
Use IPS4o in-place sorting
1 parent 6abdb85 commit 57b10c0

38 files changed

+3293
-1708
lines changed

src/codegen/code_context.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ class InstructionCounts : public llvm::ModulePass {
142142
}
143143

144144
void DumpStats() const {
145+
#ifndef NDEBUG
145146
LOG_DEBUG("# functions: %" PRId64 " (%" PRId64
146147
" external), # blocks: %" PRId64 ", # instructions: %" PRId64,
147148
func_count_, external_func_count_, basic_block_count_,
@@ -150,6 +151,7 @@ class InstructionCounts : public llvm::ModulePass {
150151
const char *inst_name = llvm::Instruction::getOpcodeName(iter.first);
151152
LOG_DEBUG("↳ %s: %" PRId64, inst_name, iter.second);
152153
}
154+
#endif
153155
}
154156

155157
private:

src/codegen/compact_storage.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
#include "codegen/compact_storage.h"
1414

15+
#include "ips4o/ips4o.hpp"
16+
1517
#include "codegen/type/sql_type.h"
1618

1719
namespace peloton {
@@ -138,11 +140,10 @@ llvm::Type *CompactStorage::Setup(CodeGen &codegen,
138140

139141
// Sort the entries by decreasing size. This minimizes storage overhead due to
140142
// padding (potentially) added by LLVM.
141-
// TODO: Does this help?
142-
std::sort(storage_format_.begin(), storage_format_.end(),
143-
[](const EntryInfo &left, const EntryInfo &right) {
144-
return right.num_bytes < left.num_bytes;
145-
});
143+
ips4o::sort(storage_format_.begin(), storage_format_.end(),
144+
[](const EntryInfo &left, const EntryInfo &right) {
145+
return right.num_bytes < left.num_bytes;
146+
});
146147

147148
// Now we construct the LLVM type of this storage space. First comes bytes
148149
// to manage the null bitmap. Then all the data elements.

src/codegen/updateable_storage.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
#include "codegen/updateable_storage.h"
1414

15-
#include "pdqsort/pdqsort.h"
15+
#include "ips4o/ips4o.hpp"
1616

1717
#include "codegen/lang/if.h"
1818
#include "codegen/type/sql_type.h"
@@ -66,11 +66,11 @@ llvm::Type *UpdateableStorage::Finalize(CodeGen &codegen) {
6666
}
6767

6868
// Sort the entries by decreasing size
69-
pdqsort(storage_format_.begin(), storage_format_.end(),
70-
[](const CompactStorage::EntryInfo &left,
71-
const CompactStorage::EntryInfo &right) {
72-
return right.num_bytes < left.num_bytes;
73-
});
69+
ips4o::sort(storage_format_.begin(), storage_format_.end(),
70+
[](const CompactStorage::EntryInfo &left,
71+
const CompactStorage::EntryInfo &right) {
72+
return right.num_bytes < left.num_bytes;
73+
});
7474

7575
// Now we construct the LLVM type of this storage space
7676
std::vector<llvm::Type *> llvm_types;

src/codegen/util/sorter.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
#include <queue>
1616

17-
#include "pdqsort/pdqsort.h"
17+
#include "ips4o/ips4o.hpp"
1818

1919
#include "common/synchronization/count_down_latch.h"
2020
#include "common/timer.h"
@@ -88,7 +88,7 @@ void Sorter::Sort() {
8888

8989
// Sort the sucker
9090
auto cmp = [this](char *l, char *r) { return cmp_func_(l, r) < 0; };
91-
pdqsort(tuples_.begin(), tuples_.end(), cmp);
91+
ips4o::sort(tuples_.begin(), tuples_.end(), cmp);
9292

9393
// Setup pointers
9494
tuples_start_ = tuples_.data();
@@ -170,7 +170,7 @@ void Sorter::SortParallel(
170170
// Sort the local separators and choose the median
171171
char *separator = nullptr;
172172
if (idx < separators.size() - 1) {
173-
pdqsort(separators[idx].begin(), separators[idx].end(), cmp);
173+
ips4o::sort(separators[idx].begin(), separators[idx].end(), cmp);
174174
separator = separators[idx][sorters.size() / 2];
175175
}
176176

src/threadpool/worker_pool.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ void WorkerFunc(std::string thread_name, std::atomic_bool *is_running,
2424
constexpr auto kMinPauseTime = std::chrono::microseconds(1);
2525
constexpr auto kMaxPauseTime = std::chrono::microseconds(1000);
2626

27-
LOG_DEBUG("Thread %s starting ...", thread_name.c_str());
27+
LOG_INFO("Thread %s starting ...", thread_name.c_str());
2828

2929
auto pause_time = kMinPauseTime;
3030
while (is_running->load() || !task_queue->IsEmpty()) {
@@ -39,7 +39,7 @@ void WorkerFunc(std::string thread_name, std::atomic_bool *is_running,
3939
}
4040
}
4141

42-
LOG_DEBUG("Thread %s exiting ...", thread_name.c_str());
42+
LOG_INFO("Thread %s exiting ...", thread_name.c_str());
4343
}
4444

4545
} // namespace

test/codegen/sorter_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ TEST_F(SorterTest, ParallelSortTest) {
139139
sorter_.SortParallel(thread_states, 0);
140140

141141
timer.Stop();
142-
LOG_DEBUG("Parallel sort took: %.2lf ms", timer.GetDuration());
142+
LOG_INFO("Parallel sort took: %.2lf ms", timer.GetDuration());
143143

144144
// Check main sorter is sorted
145145
CheckSorted(sorter_, true);

third_party/ips4o/LICENSE

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
BSD 2-Clause License
2+
3+
Copyright © 2017, Michael Axtmann <[email protected]>
4+
Copyright © 2017, Daniel Ferizovic <[email protected]>
5+
Copyright © 2017, Sascha Witt <[email protected]>
6+
All rights reserved.
7+
8+
Redistribution and use in source and binary forms, with or without
9+
modification, are permitted provided that the following conditions are met:
10+
11+
* Redistributions of source code must retain the above copyright notice, this
12+
list of conditions and the following disclaimer.
13+
14+
* Redistributions in binary form must reproduce the above copyright notice,
15+
this list of conditions and the following disclaimer in the documentation
16+
and/or other materials provided with the distribution.
17+
18+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

third_party/ips4o/README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# In-place Parallel Super Scalar Samplesort (IPS⁴o)
2+
3+
This is the implementation of the algorithm presented in the [eponymous paper](https://arxiv.org/abs/1705.02257),
4+
which contains an in-depth description of its inner workings, as well as an extensive experimental performance evaluation.
5+
Here's the abstract:
6+
7+
> We present a sorting algorithm that works in-place, executes in parallel, is
8+
> cache-efficient, avoids branch-mispredictions, and performs work O(n log n) for
9+
> arbitrary inputs with high probability. The main algorithmic contributions are
10+
> new ways to make distribution-based algorithms in-place: On the practical side,
11+
> by using coarse-grained block-based permutations, and on the theoretical side,
12+
> we show how to eliminate the recursion stack. Extensive experiments show that
13+
> our algorithm IPS⁴o scales well on a variety of multi-core machines. We
14+
> outperform our closest in-place competitor by a factor of up to 3. Even as
15+
> a sequential algorithm, we are up to 1.5 times faster than the closest
16+
> sequential competitor, BlockQuicksort.
17+
18+
## Usage
19+
20+
```C++
21+
#include "ips4o.hpp"
22+
23+
// sort sequentially
24+
ips4o::sort(begin, end[, comparator])
25+
26+
// sort in parallel (uses OpenMP if available, std::thread otherwise)
27+
ips4o::parallel::sort(begin, end[, comparator])
28+
```
29+
30+
Make sure to compile with C++14 support. Currently, the code does not compile on Windows.
31+
32+
For the parallel algorithm, you need to enable either OpenMP (`-fopenmp`) or C++ threads (e.g., `-pthread`).
33+
You also need a CPU that supports 16-byte compare-and-exchange instructions.
34+
If you get undefined references to `__atomic_fetch_add_16`, either set your CPU correctly (e.g., `-march=native`),
35+
enable the instructions explicitly (`-mcx16`), or try linking against GCC's libatomic (`-latomic`).

third_party/ips4o/ips4o.hpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/******************************************************************************
2+
* ips4o.hpp
3+
*
4+
* In-place Parallel Super Scalar Samplesort (IPS⁴o)
5+
*
6+
******************************************************************************
7+
* BSD 2-Clause License
8+
*
9+
* Copyright © 2017, Michael Axtmann <[email protected]>
10+
* Copyright © 2017, Daniel Ferizovic <[email protected]>
11+
* Copyright © 2017, Sascha Witt <[email protected]>
12+
* All rights reserved.
13+
*
14+
* Redistribution and use in source and binary forms, with or without
15+
* modification, are permitted provided that the following conditions are met:
16+
*
17+
* * Redistributions of source code must retain the above copyright notice, this
18+
* list of conditions and the following disclaimer.
19+
*
20+
* * Redistributions in binary form must reproduce the above copyright notice,
21+
* this list of conditions and the following disclaimer in the documentation
22+
* and/or other materials provided with the distribution.
23+
*
24+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34+
*****************************************************************************/
35+
36+
#pragma once
37+
38+
#include "ips4o/ips4o.hpp"

third_party/ips4o/ips4o/base_case.hpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/******************************************************************************
2+
* ips4o/base_case.hpp
3+
*
4+
* In-place Parallel Super Scalar Samplesort (IPS⁴o)
5+
*
6+
******************************************************************************
7+
* BSD 2-Clause License
8+
*
9+
* Copyright © 2017, Michael Axtmann <[email protected]>
10+
* Copyright © 2017, Daniel Ferizovic <[email protected]>
11+
* Copyright © 2017, Sascha Witt <[email protected]>
12+
* All rights reserved.
13+
*
14+
* Redistribution and use in source and binary forms, with or without
15+
* modification, are permitted provided that the following conditions are met:
16+
*
17+
* * Redistributions of source code must retain the above copyright notice, this
18+
* list of conditions and the following disclaimer.
19+
*
20+
* * Redistributions in binary form must reproduce the above copyright notice,
21+
* this list of conditions and the following disclaimer in the documentation
22+
* and/or other materials provided with the distribution.
23+
*
24+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34+
*****************************************************************************/
35+
36+
#pragma once
37+
38+
#include <algorithm>
39+
#include <cstddef>
40+
#include <utility>
41+
42+
#include "ips4o_fwd.hpp"
43+
#include "utils.hpp"
44+
45+
namespace ips4o {
46+
namespace detail {
47+
48+
/**
49+
* Insertion sort.
50+
*/
51+
template <class It, class Comp>
52+
void insertionSort(const It begin, const It end, Comp comp) {
53+
IPS4O_ASSUME_NOT(begin >= end);
54+
55+
for (It it = begin + 1; it < end; ++it) {
56+
auto val = std::move(*it);
57+
if (comp(val, *begin)) {
58+
std::move_backward(begin, it, it + 1);
59+
*begin = std::move(val);
60+
} else {
61+
auto cur = it;
62+
for (auto next = it - 1; comp(val, *next); --next) {
63+
*cur = std::move(*next);
64+
cur = next;
65+
}
66+
*cur = std::move(val);
67+
}
68+
}
69+
}
70+
71+
/**
72+
* Wrapper for base case sorter, for easier swapping.
73+
*/
74+
template <class It, class Comp>
75+
inline void baseCaseSort(It begin, It end, Comp&& comp) {
76+
if (begin == end) return;
77+
detail::insertionSort(std::move(begin), std::move(end), std::forward<Comp>(comp));
78+
}
79+
80+
81+
} // namespace detail
82+
} // namespace ips4o

0 commit comments

Comments
 (0)