Skip to content

Commit 1ddb71b

Browse files
Atoi demo and parallel extraction (compress) (#76)
Basic demo code submit. Functional, passes basic testing, acts equivalently to atoi, strlen etc from libc. Needs harder utesting to be truly shippable. ---- * Necessary introduction of binary operators to BooleanSWAR * Fixes compile-time bug * Implementation of compress, leading spaces, benchmarkization * Atoi benchmarked * Repairs parallel suffix * Re-enables building Robin Hood tests * Proper benchmark of parallel extraction * Needs to #include array * Update inc/zoo/swar/SWAR.h * Update test/swar/BasicOperations.cpp * fix broken test (compile error, and mislabel of SWAR<4,64t> as S16_64, causing conflict --------- Co-authored-by: Eddie <eddie see email elsewhere> Co-authored-by: Scottbruceheart <[email protected]> Co-authored-by: Scott Bruceheart <[email protected]>
1 parent d7c0180 commit 1ddb71b

File tree

13 files changed

+569
-78
lines changed

13 files changed

+569
-78
lines changed

benchmark/CMakeLists.txt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,19 @@ add_executable(
5656
set_xcode_properties(zoo-google-benchmark)
5757

5858
add_executable(
59-
zoo-atoi-benchmark benchmark_main.cpp bm-swar.cpp atoi.cpp
59+
zoo-atoi-benchmark benchmark_main.cpp bm-swar.cpp atoi.cpp swar/compress.cpp
6060
)
6161
set_xcode_properties(zoo-atoi-benchmark)
6262

6363
target_link_libraries(zoo-google-benchmark benchmark::benchmark)
6464
target_link_libraries(zoo-atoi-benchmark benchmark::benchmark)
65+
66+
add_library(zoo-atoi-implementations SHARED atoi.cpp)
67+
add_executable(
68+
zoo-atoi-benchmark-from-dynamic-library benchmark_main.cpp bm-swar.cpp
69+
)
70+
target_link_libraries(
71+
zoo-atoi-benchmark-from-dynamic-library
72+
zoo-atoi-implementations
73+
benchmark::benchmark
74+
)

benchmark/atoi-corpus.h

Lines changed: 164 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
#include <vector>
55
#include <string>
66
#include <cstring>
7+
#include <array>
78
#include <random>
9+
#include <math.h>
810

911
struct Corpus8DecimalDigits {
1012
std::vector<int> asNumbers_;
@@ -85,7 +87,7 @@ struct CorpusStringLength {
8587

8688
while(count--) {
8789
auto length = strSize(generator);
88-
sizes.push_back(length);
90+
sizes.push_back(length + 1);
8991
for(auto i = length; i--; ) {
9092
allCharacters.append(1, characters(generator));
9193
}
@@ -143,10 +145,171 @@ struct CorpusStringLength {
143145
AVX2_STRLEN_CORPUS_X_LIST \
144146
NEON_STRLEN_CORPUS_X_LIST
145147

148+
struct CorpusLeadingSpaces {
149+
constexpr static auto CountOfSpaceCharactersAvailable = 6;
150+
constexpr static inline std::array<char, CountOfSpaceCharactersAvailable> Spaces =
151+
{ ' ', '\n', '\t', '\r', '\f', '\v' };
152+
std::vector<int> skips_;
153+
std::string characters_;
154+
155+
CorpusLeadingSpaces(std::vector<int> &&skips, std::string &&cs):
156+
skips_{std::move(skips)}, characters_{std::move(cs)}
157+
{}
158+
159+
template<typename G>
160+
static auto makeCorpus(G &generator) {
161+
auto count = 1031; // see Corpus8DecimalDigits for why 1031
162+
std::vector<int> sizes;
163+
std::string allCharacters;
164+
std::geometric_distribution<>
165+
spacesCount(1.0/29),
166+
extraCharacters(0.5);
167+
// unrepresentatively very large, but will cross the 32 boundary
168+
// to test 32-byte techniques
169+
std::uniform_int_distribution<>
170+
spacer(0, CountOfSpaceCharactersAvailable - 1),
171+
moreCharacters(0, 255);
172+
173+
while(count--) {
174+
auto count = spacesCount(generator);
175+
for(auto i = count; i--; ) {
176+
allCharacters.append(1, Spaces[spacer(generator)]);
177+
}
178+
auto extra = moreCharacters(generator);
179+
for(auto i = extra; i--; ) {
180+
allCharacters.append(1, moreCharacters(generator));
181+
}
182+
sizes.push_back(count + extra + 1);
183+
allCharacters.append(1, '\0');
184+
}
185+
return CorpusStringLength(std::move(sizes), std::move(allCharacters));
186+
}
187+
188+
struct Iterator {
189+
int *skips, *sentinel;
190+
char *cp;
191+
192+
Iterator &operator++() {
193+
cp += *skips++;
194+
return *this;
195+
}
196+
197+
char *operator*() {
198+
return cp;
199+
}
200+
201+
auto next() noexcept {
202+
++(*this);
203+
return sentinel != skips;
204+
}
205+
};
206+
207+
Iterator commence() {
208+
return {
209+
skips_.data(), skips_.data() + skips_.size(), characters_.data()
210+
};
211+
}
212+
};
213+
214+
#define LEADING_SPACES_CORPUS_X_LIST X(GLIB_Spaces, spaces_glibc) X(ZooSpaces, zoo::leadingSpacesCount)
215+
216+
void (*consumeStrPtr)(const char *, unsigned) =
217+
[](const char *p, unsigned l) {
218+
return;
219+
};
220+
221+
struct CorpusAtoi {
222+
constexpr static auto CountOfSpaceCharactersAvailable = 6;
223+
constexpr static inline std::array<char, CountOfSpaceCharactersAvailable> Spaces =
224+
{ ' ', '\n', '\t', '\r', '\f', '\v' };
225+
std::vector<int> skips_;
226+
std::string characters_;
227+
228+
CorpusAtoi(std::vector<int> &&skips, std::string &&cs):
229+
skips_{std::move(skips)}, characters_{std::move(cs)}
230+
{}
231+
232+
template<typename G>
233+
static auto makeCorpus(G &generator) {
234+
auto count = 1031; // see Corpus8DecimalDigits for why 1031
235+
std::vector<int> sizes;
236+
std::string allCharacters;
237+
std::geometric_distribution
238+
spacesCount(0.5),
239+
insignificantZeros(0.9);
240+
std::uniform_real_distribution numberLogarithmBase10(-2.0, 9.2);
241+
// a maximum of 10^9.2 is ~1.6 billion, within the range.
242+
// negative "logarithms" are for indicating negative numbers up to
243+
// -10^2, or -100
244+
std::uniform_int_distribution
245+
postNumber('9' + 1, 255),
246+
spacer(0, CountOfSpaceCharactersAvailable - 1);
247+
char conversionBuffer[20];
248+
249+
while(count--) {
250+
auto currentLength = allCharacters.size();
251+
auto count = spacesCount(generator);
252+
for(auto i = count; i--; ) {
253+
allCharacters.append(1, Spaces[spacer(generator)]);
254+
}
255+
auto logBase10 = numberLogarithmBase10(generator);
256+
int negativeSign;
257+
if(0.0 <= logBase10) {
258+
negativeSign = 0;
259+
} else {
260+
allCharacters.append(1, '-');
261+
logBase10 = -logBase10;
262+
negativeSign = 1;
263+
}
264+
auto iz = insignificantZeros(generator);
265+
for(auto i = iz; i--; ) {
266+
allCharacters.append(1, '0');
267+
}
268+
int number = exp(logBase10 * M_LN10);
269+
auto n = sprintf(conversionBuffer, "%d%c", number, postNumber(generator));
270+
if(n < 0) { throw 0; }
271+
allCharacters.append(conversionBuffer);
272+
sizes.push_back(count + negativeSign + iz + n);
273+
consumeStrPtr(allCharacters.c_str() + currentLength, count + negativeSign + iz + n);
274+
}
275+
return CorpusStringLength(std::move(sizes), std::move(allCharacters));
276+
}
277+
278+
struct Iterator {
279+
int *skips, *sentinel;
280+
char *cp;
281+
282+
Iterator &operator++() {
283+
cp += *skips++;
284+
return *this;
285+
}
286+
287+
char *operator*() {
288+
return cp;
289+
}
290+
291+
auto next() noexcept {
292+
++(*this);
293+
return sentinel != skips;
294+
}
295+
};
296+
297+
Iterator commence() {
298+
return {
299+
skips_.data(), skips_.data() + skips_.size(), characters_.data()
300+
};
301+
}
302+
};
303+
304+
#define ATOI_CORPUS_X_LIST \
305+
X(GLIBC_atoi, atoi) X(ZOO_ATOI, zoo::c_strToI) X(COMPARE_ATOI, zoo::compareAtoi)
306+
146307
#define X(Typename, FunctionToCall) \
147308
struct Invoke##Typename { int operator()(const char *p) { return FunctionToCall(p); } };
148309

149310
PARSE8BYTES_CORPUS_X_LIST
150311
STRLEN_CORPUS_X_LIST
312+
LEADING_SPACES_CORPUS_X_LIST
313+
ATOI_CORPUS_X_LIST
151314

152315
#undef X

0 commit comments

Comments
 (0)