Skip to content

Commit bafb2e9

Browse files
slarenckastner
authored andcommitted
ggml-cpu: Delay some initializations until function is called
When using GGML_BACKEND_DL=ON, these initializations might use instructions that are not supported by the current CPU.
1 parent 4358c2d commit bafb2e9

File tree

1 file changed

+15
-14
lines changed

1 file changed

+15
-14
lines changed

ggml/src/ggml-cpu/repack.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,44 +1397,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
13971397
}
13981398
};
13991399

1400-
// instance for Q4
1401-
static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
1402-
static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
1403-
static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
1404-
static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
1405-
1406-
// instance for IQ4
1407-
static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
1408-
14091400
} // namespace ggml::cpu::repack
14101401

14111402
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
1403+
1404+
// instance for Q4
1405+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
1406+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
1407+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
1408+
static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
1409+
1410+
// instance for IQ4
1411+
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
1412+
14121413
if (cur->type == GGML_TYPE_Q4_0) {
14131414
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
14141415
if (cur->ne[1] % 8 == 0) {
1415-
return &ggml::cpu::repack::q4_0_8x8_q8_0;
1416+
return &q4_0_8x8_q8_0;
14161417
}
14171418
}
14181419
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
14191420
if (cur->ne[1] % 4 == 0) {
1420-
return &ggml::cpu::repack::q4_0_4x8_q8_0;
1421+
return &q4_0_4x8_q8_0;
14211422
}
14221423
}
14231424
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
14241425
if (cur->ne[1] % 4 == 0) {
1425-
return &ggml::cpu::repack::q4_0_4x4_q8_0;
1426+
return &q4_0_4x4_q8_0;
14261427
}
14271428
}
14281429
} else if (cur->type == GGML_TYPE_Q4_K) {
14291430
if (ggml_cpu_has_avx2()) {
14301431
if (cur->ne[1] % 8 == 0) {
1431-
return &ggml::cpu::repack::q4_K_8x8_q8_K;
1432+
return &q4_K_8x8_q8_K;
14321433
}
14331434
}
14341435
} else if (cur->type == GGML_TYPE_IQ4_NL) {
14351436
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
14361437
if (cur->ne[1] % 4 == 0) {
1437-
return &ggml::cpu::repack::iq4_nl_4x4_q8_0;
1438+
return &iq4_nl_4x4_q8_0;
14381439
}
14391440
}
14401441
}

0 commit comments

Comments
 (0)