|
4 | 4 | #include <vector>
|
5 | 5 | #include <string>
|
6 | 6 | #include <cstring>
|
| 7 | +#include <array> |
7 | 8 | #include <random>
|
| 9 | +#include <math.h> |
8 | 10 |
|
9 | 11 | struct Corpus8DecimalDigits {
|
10 | 12 | std::vector<int> asNumbers_;
|
@@ -85,7 +87,7 @@ struct CorpusStringLength {
|
85 | 87 |
|
86 | 88 | while(count--) {
|
87 | 89 | auto length = strSize(generator);
|
88 |
| - sizes.push_back(length); |
| 90 | + sizes.push_back(length + 1); |
89 | 91 | for(auto i = length; i--; ) {
|
90 | 92 | allCharacters.append(1, characters(generator));
|
91 | 93 | }
|
@@ -143,10 +145,171 @@ struct CorpusStringLength {
|
143 | 145 | AVX2_STRLEN_CORPUS_X_LIST \
|
144 | 146 | NEON_STRLEN_CORPUS_X_LIST
|
145 | 147 |
|
| 148 | +struct CorpusLeadingSpaces { |
| 149 | + constexpr static auto CountOfSpaceCharactersAvailable = 6; |
| 150 | + constexpr static inline std::array<char, CountOfSpaceCharactersAvailable> Spaces = |
| 151 | + { ' ', '\n', '\t', '\r', '\f', '\v' }; |
| 152 | + std::vector<int> skips_; |
| 153 | + std::string characters_; |
| 154 | + |
| 155 | + CorpusLeadingSpaces(std::vector<int> &&skips, std::string &&cs): |
| 156 | + skips_{std::move(skips)}, characters_{std::move(cs)} |
| 157 | + {} |
| 158 | + |
| 159 | + template<typename G> |
| 160 | + static auto makeCorpus(G &generator) { |
| 161 | + auto count = 1031; // see Corpus8DecimalDigits for why 1031 |
| 162 | + std::vector<int> sizes; |
| 163 | + std::string allCharacters; |
| 164 | + std::geometric_distribution<> |
| 165 | + spacesCount(1.0/29), |
| 166 | + extraCharacters(0.5); |
| 167 | + // unrepresentatively very large, but will cross the 32 boundary |
| 168 | + // to test 32-byte techniques |
| 169 | + std::uniform_int_distribution<> |
| 170 | + spacer(0, CountOfSpaceCharactersAvailable - 1), |
| 171 | + moreCharacters(0, 255); |
| 172 | + |
| 173 | + while(count--) { |
| 174 | + auto count = spacesCount(generator); |
| 175 | + for(auto i = count; i--; ) { |
| 176 | + allCharacters.append(1, Spaces[spacer(generator)]); |
| 177 | + } |
| 178 | + auto extra = moreCharacters(generator); |
| 179 | + for(auto i = extra; i--; ) { |
| 180 | + allCharacters.append(1, moreCharacters(generator)); |
| 181 | + } |
| 182 | + sizes.push_back(count + extra + 1); |
| 183 | + allCharacters.append(1, '\0'); |
| 184 | + } |
| 185 | + return CorpusStringLength(std::move(sizes), std::move(allCharacters)); |
| 186 | + } |
| 187 | + |
| 188 | + struct Iterator { |
| 189 | + int *skips, *sentinel; |
| 190 | + char *cp; |
| 191 | + |
| 192 | + Iterator &operator++() { |
| 193 | + cp += *skips++; |
| 194 | + return *this; |
| 195 | + } |
| 196 | + |
| 197 | + char *operator*() { |
| 198 | + return cp; |
| 199 | + } |
| 200 | + |
| 201 | + auto next() noexcept { |
| 202 | + ++(*this); |
| 203 | + return sentinel != skips; |
| 204 | + } |
| 205 | + }; |
| 206 | + |
| 207 | + Iterator commence() { |
| 208 | + return { |
| 209 | + skips_.data(), skips_.data() + skips_.size(), characters_.data() |
| 210 | + }; |
| 211 | + } |
| 212 | +}; |
| 213 | + |
| 214 | +#define LEADING_SPACES_CORPUS_X_LIST X(GLIB_Spaces, spaces_glibc) X(ZooSpaces, zoo::leadingSpacesCount) |
| 215 | + |
| 216 | +void (*consumeStrPtr)(const char *, unsigned) = |
| 217 | + [](const char *p, unsigned l) { |
| 218 | + return; |
| 219 | + }; |
| 220 | + |
| 221 | +struct CorpusAtoi { |
| 222 | + constexpr static auto CountOfSpaceCharactersAvailable = 6; |
| 223 | + constexpr static inline std::array<char, CountOfSpaceCharactersAvailable> Spaces = |
| 224 | + { ' ', '\n', '\t', '\r', '\f', '\v' }; |
| 225 | + std::vector<int> skips_; |
| 226 | + std::string characters_; |
| 227 | + |
| 228 | + CorpusAtoi(std::vector<int> &&skips, std::string &&cs): |
| 229 | + skips_{std::move(skips)}, characters_{std::move(cs)} |
| 230 | + {} |
| 231 | + |
| 232 | + template<typename G> |
| 233 | + static auto makeCorpus(G &generator) { |
| 234 | + auto count = 1031; // see Corpus8DecimalDigits for why 1031 |
| 235 | + std::vector<int> sizes; |
| 236 | + std::string allCharacters; |
| 237 | + std::geometric_distribution |
| 238 | + spacesCount(0.5), |
| 239 | + insignificantZeros(0.9); |
| 240 | + std::uniform_real_distribution numberLogarithmBase10(-2.0, 9.2); |
| 241 | + // a maximum of 10^9.2 is ~1.6 billion, within the range. |
| 242 | + // negative "logarithms" are for indicating negative numbers up to |
| 243 | + // -10^2, or -100 |
| 244 | + std::uniform_int_distribution |
| 245 | + postNumber('9' + 1, 255), |
| 246 | + spacer(0, CountOfSpaceCharactersAvailable - 1); |
| 247 | + char conversionBuffer[20]; |
| 248 | + |
| 249 | + while(count--) { |
| 250 | + auto currentLength = allCharacters.size(); |
| 251 | + auto count = spacesCount(generator); |
| 252 | + for(auto i = count; i--; ) { |
| 253 | + allCharacters.append(1, Spaces[spacer(generator)]); |
| 254 | + } |
| 255 | + auto logBase10 = numberLogarithmBase10(generator); |
| 256 | + int negativeSign; |
| 257 | + if(0.0 <= logBase10) { |
| 258 | + negativeSign = 0; |
| 259 | + } else { |
| 260 | + allCharacters.append(1, '-'); |
| 261 | + logBase10 = -logBase10; |
| 262 | + negativeSign = 1; |
| 263 | + } |
| 264 | + auto iz = insignificantZeros(generator); |
| 265 | + for(auto i = iz; i--; ) { |
| 266 | + allCharacters.append(1, '0'); |
| 267 | + } |
| 268 | + int number = exp(logBase10 * M_LN10); |
| 269 | + auto n = sprintf(conversionBuffer, "%d%c", number, postNumber(generator)); |
| 270 | + if(n < 0) { throw 0; } |
| 271 | + allCharacters.append(conversionBuffer); |
| 272 | + sizes.push_back(count + negativeSign + iz + n); |
| 273 | + consumeStrPtr(allCharacters.c_str() + currentLength, count + negativeSign + iz + n); |
| 274 | + } |
| 275 | + return CorpusStringLength(std::move(sizes), std::move(allCharacters)); |
| 276 | + } |
| 277 | + |
| 278 | + struct Iterator { |
| 279 | + int *skips, *sentinel; |
| 280 | + char *cp; |
| 281 | + |
| 282 | + Iterator &operator++() { |
| 283 | + cp += *skips++; |
| 284 | + return *this; |
| 285 | + } |
| 286 | + |
| 287 | + char *operator*() { |
| 288 | + return cp; |
| 289 | + } |
| 290 | + |
| 291 | + auto next() noexcept { |
| 292 | + ++(*this); |
| 293 | + return sentinel != skips; |
| 294 | + } |
| 295 | + }; |
| 296 | + |
| 297 | + Iterator commence() { |
| 298 | + return { |
| 299 | + skips_.data(), skips_.data() + skips_.size(), characters_.data() |
| 300 | + }; |
| 301 | + } |
| 302 | +}; |
| 303 | + |
| 304 | +#define ATOI_CORPUS_X_LIST \ |
| 305 | + X(GLIBC_atoi, atoi) X(ZOO_ATOI, zoo::c_strToI) X(COMPARE_ATOI, zoo::compareAtoi) |
| 306 | + |
146 | 307 | #define X(Typename, FunctionToCall) \
|
147 | 308 | struct Invoke##Typename { int operator()(const char *p) { return FunctionToCall(p); } };
|
148 | 309 |
|
149 | 310 | PARSE8BYTES_CORPUS_X_LIST
|
150 | 311 | STRLEN_CORPUS_X_LIST
|
| 312 | +LEADING_SPACES_CORPUS_X_LIST |
| 313 | +ATOI_CORPUS_X_LIST |
151 | 314 |
|
152 | 315 | #undef X
|
0 commit comments