Skip to content

Supporting picobench #1107

@chuggafan

Description

@chuggafan

When compiling https://github.com/iboB/picobench/ v2.8.0 the compiler as of the fix_omake_real branch fails to compile several things.
Some of these are reasonable, some of these are not.
I have no idea if the push_back on a freshly constructed item is a good thing or not, but emplace_back should be used instead in those cases.

As of right now, occparse crashes on my memcpy.cpp file:

#include <ctype.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#define PICOBENCH_IMPLEMENT_WITH_MAIN
#include <picobench.h>
#define WORD_TYPE size_t
void* memcpy_basic(void* s1, const void* s2, size_t sz)
{
    for (size_t i = 0; i < sz; i++)
    {
        ((char*)s1)[i] = ((const char*)s2)[i];
    }
    return s1;
}
void* memcpy_repmovs(void* s1, const void* s2, size_t sz)
{
    if (sz < 64)
    {
        return memcpy_basic(s1, s2, sz);
    }
    asm
    {
        mov esi, [s2];
        mov edi, [s1];
        mov ecx, [sz];
        cld;
        rep movsb;
    }
    return s1;
}
void* memcpy_aligned(void* dest, const void* src, size_t sz)
{
    size_t current_copy_location = 0;
    // Garbage hueristic
    if (sz < 128)
    {
        return memcpy_basic(dest, src, sz);
    }
    uintptr_t end = (uintptr_t)dest;
    uintptr_t start = (uintptr_t)src;
    size_t climb_point = (end % sizeof(WORD_TYPE));
    if (start % sizeof(WORD_TYPE) != 0 && climb_point == (start % sizeof(WORD_TYPE)))
    {
        size_t amount_to_copy = sizeof(WORD_TYPE) - climb_point;
        switch (amount_to_copy)
        {
#if SIZE_WIDTH > 32
            case 7:
                ((char*)dest)[7] = ((const char*)src)[7];
            case 6:
                ((char*)dest)[6] = ((const char*)src)[6];
            case 5:
                ((char*)dest)[5] = ((const char*)src)[5];
            case 4:
                ((char*)dest)[4] = ((const char*)src)[4];
#endif
            case 3:
                ((char*)dest)[3] = ((const char*)src)[3];
            case 2:
                ((char*)dest)[2] = ((const char*)src)[2];
            case 1:
                ((char*)dest)[1] = ((const char*)src)[1];
            default:
                break;
        }
        current_copy_location = amount_to_copy;
        printf("Climb point is: %zu, amount_to_copy is: %zu\n", climb_point, amount_to_copy);
    }
    size_t i = current_copy_location;
    for (; i <= (sz - sizeof(WORD_TYPE)); i += sizeof(WORD_TYPE))
    {
        WORD_TYPE* dest_temp = (WORD_TYPE*)dest;
        const WORD_TYPE* src_temp = (const WORD_TYPE*)src;
        *(dest_temp + i) = *(src_temp + i);
        printf("Setting location at %zu, src_temp is: %x\n", i, src_temp[i]);
        for (size_t j = 0; j < sizeof(WORD_TYPE); j++)
        {
            printf("Expected at %zu value %dhh, actually has %dhh\n", i + j, ((const char*)src)[i + j], ((char*)dest)[i + j]);
        }
    }
    for (; i < sz; i++)
    {
        ((char*)dest)[i] = ((const char*)src)[i];
    }
    return dest;
}
void* memcpy_aligned_repmovsd(void* s1, const void* s2, size_t sz)
{
    char* dest = (char*)s1;
    const char* src = (const char*)s2;
    size_t current_copy_location = 0;
    // Garbage hueristic
    if (sz < 128)
    {
        return memcpy_basic(s1, s2, sz);
    }
    size_t total_copied = 0;

    uintptr_t end = (uintptr_t)s1;
    uintptr_t start = (uintptr_t)s2;
    size_t climb_point = (end % sizeof(WORD_TYPE));
    size_t amount_to_copy = sizeof(WORD_TYPE) - climb_point;
    if (climb_point != 0 && climb_point == (start % sizeof(WORD_TYPE)))
    {
        for (size_t i = 0; i < amount_to_copy && i < sz; i++)
        {
            ((char*)s1)[i] = ((const char*)s2)[i];
            current_copy_location++;
        }
    }
    total_copied += current_copy_location;
    {
        WORD_TYPE* s1_start = (WORD_TYPE*)((char*)s1 + current_copy_location);
        WORD_TYPE* s2_start = (WORD_TYPE*)((char*)s2 + current_copy_location);

        // For the stride length, we need to get the total number we can actually go through minus the amount we needed to chop off
        // for alignment Let's say we need to copy 69 bytes, our alignment is 4.
        // Let's say we start at address 2, to get to our alignment point (address 4) we need to go through 2 bytes
        // Afterwards, we need to copy 67 bytes.
        // The first loop copies our 2 bytes.
        // This loop copies our next 64 bytes in WORD_TYPE chunks (actually DWORD chunks)
        // The final loop needs to copy 3 bytes

        // From this, we know that the sz - current_copy_location is the "starting stride"
        // Starting stride in our example will be 67, we need to knock it down to 64, how?
        // Subtract out the modulus of our starting stride
        size_t start_stride = sz - current_copy_location;

        size_t stride_length = start_stride - (start_stride % sizeof(WORD_TYPE));
        size_t stride_length_prepped = stride_length / sizeof(WORD_TYPE);
        // Starting stride
        asm {
            mov edi, [s1_start]
            mov esi, [s2_start]
            mov ecx, [stride_length]
            cld
            rep movsd
        }
        total_copied += stride_length;
        current_copy_location += stride_length;
    }
    for (size_t i = current_copy_location; i < sz; i++)
    {
        ((char*)s1)[i] = ((const char*)s2)[i];
        current_copy_location++;
        total_copied++;
    }
    return s1;
}
void test_memcpy(picobench::state& state)
{
    size_t num_to_copy = state.iterations();

    char* source = (char*)malloc(num_to_copy);
    char* orig_test = (char*)malloc(num_to_copy);
    char* dest = (char*)malloc(num_to_copy);

    for (size_t i = 0; i < num_to_copy; i++)
    {
        orig_test[i] = source[i] = i % 256;
    }
    state.start_timer();
    memcpy(dest, source, num_to_copy);
    state.stop_timer();
    bool is_good = true;
    for (size_t i = 0; i < num_to_copy; i++)
    {
        if (orig_test[i] != source[i] || orig_test[i] != dest[i])
        {
            is_good = false;
            break;
        }
    }
    state.set_result(is_good);
    free(source);
    free(orig_test);
    free(dest);
}
PICOBENCH(test_memcpy);
void test_memcpy_basic(picobench::state& state)
{
    size_t num_to_copy = state.iterations();

    char* source = (char*)malloc(num_to_copy);
    char* orig_test = (char*)malloc(num_to_copy);
    char* dest = (char*)malloc(num_to_copy);

    for (size_t i = 0; i < num_to_copy; i++)
    {
        orig_test[i] = source[i] = i % 256;
    }
    state.start_timer();
    memcpy_basic(dest, source, num_to_copy);
    state.stop_timer();
    bool is_good = true;
    for (size_t i = 0; i < num_to_copy; i++)
    {
        if (orig_test[i] != source[i] || orig_test[i] != dest[i])
        {
            is_good = false;
            break;
        }
    }
    state.set_result(is_good);
    free(source);
    free(orig_test);
    free(dest);
}
PICOBENCH(test_memcpy_basic);
void test_memcpy_repmovs(picobench::state& state)
{
    size_t num_to_copy = state.iterations();

    char* source = (char*)malloc(num_to_copy);
    char* orig_test = (char*)malloc(num_to_copy);
    char* dest = (char*)malloc(num_to_copy);

    for (size_t i = 0; i < num_to_copy; i++)
    {
        orig_test[i] = source[i] = i % 256;
    }
    state.start_timer();
    memcpy_repmovs(dest, source, num_to_copy);
    state.stop_timer();
    bool is_good = true;
    for (size_t i = 0; i < num_to_copy; i++)
    {
        if (orig_test[i] != source[i] || orig_test[i] != dest[i])
        {
            is_good = false;
            break;
        }
    }
    state.set_result(is_good);
    free(source);
    free(orig_test);
    free(dest);
}
PICOBENCH(test_memcpy_repmovs);

(Yes, I know memcpy_aligned does not work, I'm trying to figure out why, also I recognize that these aren't particularly efficient in terms of computation, I'll get it down at some point, but this is baselines).

Either way, this file produces a stacktrace as when in Parser::TemplateDeduceFromArgs with the latest master (and latest hashing branch), it NPE's when trying to perform checks on the type's BaseType:

6e8dba: Parser::__ispointer(Parser::Type*) + 0x6  module: types.cpp, line: 488
                        5070d8: Parser::Type::IsPtr() + 0x19  module: types.cpp, line: 493
                        506e4a: Parser::Type::IsArray() + 0x1c  module: types.cpp, line: 434
                        477a57: Parser::TemplateDeduceArgsFromArgs(Parser::sym*, Parser::CallSite*) + 0xf1f  module: templatededuce.cpp, line: 1514
                        48f436: Parser::detemplate(Parser::sym*, Parser::CallSite*, Parser::Type*) + 0x12b  module: overload.cpp, line: 4049
                        490862: Parser::insertFuncs(Parser::sym**, list<Parser::sym*, allocator<Parser::sym*>>&, Parser::CallSite*, Parser::Type*, int) + 0x520  module: overload.cpp, line: 4334
                        493591: Parser::GetOverloadedFunction(Parser::Type**, Parser::expr**, Parser::sym*, Parser::CallSite*, Parser::Type*, int, bool, int) + 0xc82  module: overload.cpp, line: 4931
                        4546be: Parser::expression_arguments(Parser::LexList*, Parser::sym*, Parser::Type**, Parser::expr**, int) + 0xc67  module: expr.cpp, line: 4536
                        45d768: Parser::expression_postfix(Parser::LexList*, Parser::sym*, Parser::Type*, Parser::Type**, Parser::expr**, bool*, int) + 0x463  module: expr.cpp, line: 7509
                        45e0ce: Parser::expression_unary(Parser::LexList*, Parser::sym*, Parser::Type*, Parser::Type**, Parser::expr**, bool*, int) + 0x4fb  module: expr.cpp, line: 7725

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions