Skip to content

CUDA compiler out of heap space #13086

@CommanderLake

Description

@CommanderLake

2 static constexpr methods mmq_get_dp4a_tile_x_sizes and mmq_get_mma_tile_x_k in mmq.cuh cause the CUDA compiler to run out of heap space due to cascaded question mark operators, i dont know if this was fixed in a later compiler but i'm using CUDA 11.4, this is the correct way to write these methods in mmq.cuh:

static constexpr __host__ __device__ tile_x_sizes mmq_get_dp4a_tile_x_sizes(ggml_type type, int mmq_y){
    switch(type){
        case GGML_TYPE_Q4_0:
            return MMQ_DP4A_TXS_Q4_0;
        case GGML_TYPE_Q4_1:
            return MMQ_DP4A_TXS_Q4_1;
        case GGML_TYPE_Q5_0:
        case GGML_TYPE_Q8_0:
        case GGML_TYPE_IQ2_XXS:
        case GGML_TYPE_IQ3_XXS:
        case GGML_TYPE_IQ3_S:
        case GGML_TYPE_IQ1_S:
        case GGML_TYPE_IQ4_XS:
        case GGML_TYPE_IQ4_NL:
            return MMQ_DP4A_TXS_Q8_0;
        case GGML_TYPE_Q5_1:
            return MMQ_DP4A_TXS_Q8_1;
        case GGML_TYPE_Q2_K:
            return MMQ_DP4A_TXS_Q2_K;
        case GGML_TYPE_Q3_K:
            return MMQ_DP4A_TXS_Q3_K;
        case GGML_TYPE_Q4_K:
            return MMQ_DP4A_TXS_Q4_K;
        case GGML_TYPE_Q5_K:
            return MMQ_DP4A_TXS_Q5_K;
        case GGML_TYPE_Q6_K:
            return MMQ_DP4A_TXS_Q6_K;
        case GGML_TYPE_IQ2_XS:
        case GGML_TYPE_IQ2_S:
            return MMQ_DP4A_TXS_Q8_0_16;
        default:
            return tile_x_sizes{0, 0, 0};
    }
}
static constexpr __host__ __device__ int mmq_get_mma_tile_x_k(ggml_type type){
    switch(type){
        case GGML_TYPE_Q4_0:
        case GGML_TYPE_Q5_0:
        case GGML_TYPE_Q8_0:
        case GGML_TYPE_IQ2_XXS:
        case GGML_TYPE_IQ3_XXS:
        case GGML_TYPE_IQ3_S:
        case GGML_TYPE_IQ1_S:
        case GGML_TYPE_IQ4_XS:
        case GGML_TYPE_IQ4_NL:
            return MMQ_MMA_TILE_X_K_Q8_0;
        case GGML_TYPE_Q4_1:
        case GGML_TYPE_Q5_1:
        case GGML_TYPE_Q4_K:
        case GGML_TYPE_Q5_K:
            return MMQ_MMA_TILE_X_K_Q8_1;
        case GGML_TYPE_Q2_K:
            return MMQ_MMA_TILE_X_K_Q2_K;
        case GGML_TYPE_Q3_K:
            return MMQ_MMA_TILE_X_K_Q3_K;
        case GGML_TYPE_Q6_K:
            return MMQ_MMA_TILE_X_K_Q6_K;
        case GGML_TYPE_IQ2_XS:
        case GGML_TYPE_IQ2_S:
            return MMQ_MMA_TILE_X_K_Q3_K;
        default:
            return 0;
    }
}

Metadata

Metadata

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions