Skip to content

Consider adding UUIDv5 support? #11

@mofosyne

Description

@mofosyne

This was what I written that worked well for me using your sha1 implementation.
Might be something to consider including. (or maybe it should be a separate C module?)

#define UUID_NAMESPACE_LLAMA_CPP "ef001206-dadc-5f6d-a15f-3359e577d4e5"
#define UUID_NAMESPACE_LLAMA_CPP_HEX 0xef, 0x00, 0x12, 0x06, 0xda, 0xdc, 0x5f, 0x6d, 0xa1, 0x5f, 0x33, 0x59, 0xe5, 0x77, 0xd4, 0xe5

...

static void generate_uuidv5(const unsigned char sha1_digest[20], unsigned char uuid[16]) {
    // Ref: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.5
    // Assumes that digest was processed correctly with the expected namespace
    for (int i = 0; i < 16; i++) {
        uuid[i] = sha1_digest[i];
    }

    // Set bits corresponding to UUID ver 5
    uuid[ 6] &= ~(0xF << 4);
    uuid[ 6] |= (5 << 4);

    // Set bits corresponding to UUID variant 0b10XX
    uuid[ 8] &= ~(0xc << 4);
    uuid[ 8] |= (0x8 << 4);
}

static bool gguf_uuid(const hash_params & hash_params) {
...
    // sha1 init
    SHA1_CTX sha1_model_hash_ctx;
    SHA1Init(&sha1_model_hash_ctx);

    unsigned char const uuidv5_namespace[] = {UUID_NAMESPACE_LLAMA_CPP_HEX};
    SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)uuidv5_namespace, sizeof(uuidv5_namespace));

    for (int i = 0; i < n_tensors; ++i) {
...
        SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
...
    }

    unsigned char result[21];
    SHA1Final(result, &sha1_model_hash_ctx);

    unsigned char uuid[16];
    generate_uuidv5(result, uuid);

    char string_buffer[37] = {0};
    sprintf(string_buffer, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
        uuid[0], uuid[1], uuid[2], uuid[3],
        uuid[4], uuid[5], uuid[6], uuid[7],
        uuid[8], uuid[9], uuid[10], uuid[11],
        uuid[12], uuid[13], uuid[14], uuid[15]);
    printf("UUIDv5  %s\n", string_buffer);
...
    return true;
}

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions