-
Notifications
You must be signed in to change notification settings - Fork 43
Description
When compiling https://github.com/iboB/picobench/ v2.8.0 the compiler as of the fix_omake_real branch fails to compile several things.
Some of these are reasonable, some of these are not.
I have no idea if the push_back on a freshly constructed item is a good thing or not, but emplace_back should be used instead in those cases.
As of right now, occparse crashes on my memcpy.cpp file:
#include <ctype.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#define PICOBENCH_IMPLEMENT_WITH_MAIN
#include <picobench.h>
#define WORD_TYPE size_t
void* memcpy_basic(void* s1, const void* s2, size_t sz)
{
for (size_t i = 0; i < sz; i++)
{
((char*)s1)[i] = ((const char*)s2)[i];
}
return s1;
}
void* memcpy_repmovs(void* s1, const void* s2, size_t sz)
{
if (sz < 64)
{
return memcpy_basic(s1, s2, sz);
}
asm
{
mov esi, [s2];
mov edi, [s1];
mov ecx, [sz];
cld;
rep movsb;
}
return s1;
}
void* memcpy_aligned(void* dest, const void* src, size_t sz)
{
size_t current_copy_location = 0;
// Garbage hueristic
if (sz < 128)
{
return memcpy_basic(dest, src, sz);
}
uintptr_t end = (uintptr_t)dest;
uintptr_t start = (uintptr_t)src;
size_t climb_point = (end % sizeof(WORD_TYPE));
if (start % sizeof(WORD_TYPE) != 0 && climb_point == (start % sizeof(WORD_TYPE)))
{
size_t amount_to_copy = sizeof(WORD_TYPE) - climb_point;
switch (amount_to_copy)
{
#if SIZE_WIDTH > 32
case 7:
((char*)dest)[7] = ((const char*)src)[7];
case 6:
((char*)dest)[6] = ((const char*)src)[6];
case 5:
((char*)dest)[5] = ((const char*)src)[5];
case 4:
((char*)dest)[4] = ((const char*)src)[4];
#endif
case 3:
((char*)dest)[3] = ((const char*)src)[3];
case 2:
((char*)dest)[2] = ((const char*)src)[2];
case 1:
((char*)dest)[1] = ((const char*)src)[1];
default:
break;
}
current_copy_location = amount_to_copy;
printf("Climb point is: %zu, amount_to_copy is: %zu\n", climb_point, amount_to_copy);
}
size_t i = current_copy_location;
for (; i <= (sz - sizeof(WORD_TYPE)); i += sizeof(WORD_TYPE))
{
WORD_TYPE* dest_temp = (WORD_TYPE*)dest;
const WORD_TYPE* src_temp = (const WORD_TYPE*)src;
*(dest_temp + i) = *(src_temp + i);
printf("Setting location at %zu, src_temp is: %x\n", i, src_temp[i]);
for (size_t j = 0; j < sizeof(WORD_TYPE); j++)
{
printf("Expected at %zu value %dhh, actually has %dhh\n", i + j, ((const char*)src)[i + j], ((char*)dest)[i + j]);
}
}
for (; i < sz; i++)
{
((char*)dest)[i] = ((const char*)src)[i];
}
return dest;
}
void* memcpy_aligned_repmovsd(void* s1, const void* s2, size_t sz)
{
char* dest = (char*)s1;
const char* src = (const char*)s2;
size_t current_copy_location = 0;
// Garbage hueristic
if (sz < 128)
{
return memcpy_basic(s1, s2, sz);
}
size_t total_copied = 0;
uintptr_t end = (uintptr_t)s1;
uintptr_t start = (uintptr_t)s2;
size_t climb_point = (end % sizeof(WORD_TYPE));
size_t amount_to_copy = sizeof(WORD_TYPE) - climb_point;
if (climb_point != 0 && climb_point == (start % sizeof(WORD_TYPE)))
{
for (size_t i = 0; i < amount_to_copy && i < sz; i++)
{
((char*)s1)[i] = ((const char*)s2)[i];
current_copy_location++;
}
}
total_copied += current_copy_location;
{
WORD_TYPE* s1_start = (WORD_TYPE*)((char*)s1 + current_copy_location);
WORD_TYPE* s2_start = (WORD_TYPE*)((char*)s2 + current_copy_location);
// For the stride length, we need to get the total number we can actually go through minus the amount we needed to chop off
// for alignment Let's say we need to copy 69 bytes, our alignment is 4.
// Let's say we start at address 2, to get to our alignment point (address 4) we need to go through 2 bytes
// Afterwards, we need to copy 67 bytes.
// The first loop copies our 2 bytes.
// This loop copies our next 64 bytes in WORD_TYPE chunks (actually DWORD chunks)
// The final loop needs to copy 3 bytes
// From this, we know that the sz - current_copy_location is the "starting stride"
// Starting stride in our example will be 67, we need to knock it down to 64, how?
// Subtract out the modulus of our starting stride
size_t start_stride = sz - current_copy_location;
size_t stride_length = start_stride - (start_stride % sizeof(WORD_TYPE));
size_t stride_length_prepped = stride_length / sizeof(WORD_TYPE);
// Starting stride
asm {
mov edi, [s1_start]
mov esi, [s2_start]
mov ecx, [stride_length]
cld
rep movsd
}
total_copied += stride_length;
current_copy_location += stride_length;
}
for (size_t i = current_copy_location; i < sz; i++)
{
((char*)s1)[i] = ((const char*)s2)[i];
current_copy_location++;
total_copied++;
}
return s1;
}
void test_memcpy(picobench::state& state)
{
size_t num_to_copy = state.iterations();
char* source = (char*)malloc(num_to_copy);
char* orig_test = (char*)malloc(num_to_copy);
char* dest = (char*)malloc(num_to_copy);
for (size_t i = 0; i < num_to_copy; i++)
{
orig_test[i] = source[i] = i % 256;
}
state.start_timer();
memcpy(dest, source, num_to_copy);
state.stop_timer();
bool is_good = true;
for (size_t i = 0; i < num_to_copy; i++)
{
if (orig_test[i] != source[i] || orig_test[i] != dest[i])
{
is_good = false;
break;
}
}
state.set_result(is_good);
free(source);
free(orig_test);
free(dest);
}
PICOBENCH(test_memcpy);
void test_memcpy_basic(picobench::state& state)
{
size_t num_to_copy = state.iterations();
char* source = (char*)malloc(num_to_copy);
char* orig_test = (char*)malloc(num_to_copy);
char* dest = (char*)malloc(num_to_copy);
for (size_t i = 0; i < num_to_copy; i++)
{
orig_test[i] = source[i] = i % 256;
}
state.start_timer();
memcpy_basic(dest, source, num_to_copy);
state.stop_timer();
bool is_good = true;
for (size_t i = 0; i < num_to_copy; i++)
{
if (orig_test[i] != source[i] || orig_test[i] != dest[i])
{
is_good = false;
break;
}
}
state.set_result(is_good);
free(source);
free(orig_test);
free(dest);
}
PICOBENCH(test_memcpy_basic);
void test_memcpy_repmovs(picobench::state& state)
{
size_t num_to_copy = state.iterations();
char* source = (char*)malloc(num_to_copy);
char* orig_test = (char*)malloc(num_to_copy);
char* dest = (char*)malloc(num_to_copy);
for (size_t i = 0; i < num_to_copy; i++)
{
orig_test[i] = source[i] = i % 256;
}
state.start_timer();
memcpy_repmovs(dest, source, num_to_copy);
state.stop_timer();
bool is_good = true;
for (size_t i = 0; i < num_to_copy; i++)
{
if (orig_test[i] != source[i] || orig_test[i] != dest[i])
{
is_good = false;
break;
}
}
state.set_result(is_good);
free(source);
free(orig_test);
free(dest);
}
PICOBENCH(test_memcpy_repmovs);
(Yes, I know memcpy_aligned does not work, I'm trying to figure out why, also I recognize that these aren't particularly efficient in terms of computation, I'll get it down at some point, but this is baselines).
Either way, this file produces a stacktrace as when in Parser::TemplateDeduceFromArgs with the latest master (and latest hashing branch), it NPE's when trying to perform checks on the type's BaseType:
6e8dba: Parser::__ispointer(Parser::Type*) + 0x6 module: types.cpp, line: 488
5070d8: Parser::Type::IsPtr() + 0x19 module: types.cpp, line: 493
506e4a: Parser::Type::IsArray() + 0x1c module: types.cpp, line: 434
477a57: Parser::TemplateDeduceArgsFromArgs(Parser::sym*, Parser::CallSite*) + 0xf1f module: templatededuce.cpp, line: 1514
48f436: Parser::detemplate(Parser::sym*, Parser::CallSite*, Parser::Type*) + 0x12b module: overload.cpp, line: 4049
490862: Parser::insertFuncs(Parser::sym**, list<Parser::sym*, allocator<Parser::sym*>>&, Parser::CallSite*, Parser::Type*, int) + 0x520 module: overload.cpp, line: 4334
493591: Parser::GetOverloadedFunction(Parser::Type**, Parser::expr**, Parser::sym*, Parser::CallSite*, Parser::Type*, int, bool, int) + 0xc82 module: overload.cpp, line: 4931
4546be: Parser::expression_arguments(Parser::LexList*, Parser::sym*, Parser::Type**, Parser::expr**, int) + 0xc67 module: expr.cpp, line: 4536
45d768: Parser::expression_postfix(Parser::LexList*, Parser::sym*, Parser::Type*, Parser::Type**, Parser::expr**, bool*, int) + 0x463 module: expr.cpp, line: 7509
45e0ce: Parser::expression_unary(Parser::LexList*, Parser::sym*, Parser::Type*, Parser::Type**, Parser::expr**, bool*, int) + 0x4fb module: expr.cpp, line: 7725