Skip to content

Commit 7fe28ce

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents bc05c26 + f6da8cb commit 7fe28ce

File tree

6 files changed

+59
-53
lines changed

6 files changed

+59
-53
lines changed

.clang-format

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ AllowShortIfStatementsOnASingleLine: Never
2222
AllowShortLambdasOnASingleLine: Inline
2323
AllowShortLoopsOnASingleLine: false
2424
AlwaysBreakBeforeMultilineStrings: true
25-
BinPackArguments: false
25+
BinPackArguments: true
2626
BinPackParameters: false # OnePerLine
2727
BitFieldColonSpacing: Both
2828
BreakBeforeBraces: Custom # Attach

common/arg.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,11 +1548,11 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
15481548
{"-fa", "--flash-attn"}, "FA",
15491549
string_format("set Flash Attention use ('on', 'off', or 'auto', default: '%s')", llama_flash_attn_type_name(params.flash_attn_type)),
15501550
[](common_params & params, const std::string & value) {
1551-
if (value == "on" || value == "enabled") {
1551+
if (value == "on" || value == "enabled" || value == "1") {
15521552
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
1553-
} else if (value == "off" || value == "disabled") {
1553+
} else if (value == "off" || value == "disabled" || value == "0") {
15541554
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED;
1555-
} else if (value == "auto") {
1555+
} else if (value == "auto" || value == "-1") {
15561556
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
15571557
} else {
15581558
throw std::runtime_error(string_format("error: unkown value for --flash-attn: '%s'\n", value.c_str()));

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1767,10 +1767,10 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
17671767
case GGML_TYPE_F16: {
17681768
aclTensor* acl_src0 = ggml_cann_create_tensor(src0);
17691769
ggml_cann_pool_alloc src_buffer_allocator(
1770-
ctx.pool(), ggml_nelements(src0) * sizeof(float_t));
1770+
ctx.pool(), ggml_nelements(src0) * sizeof(float));
17711771
void* src_trans_buffer = src_buffer_allocator.get();
17721772
size_t src_trans_nb[GGML_MAX_DIMS];
1773-
src_trans_nb[0] = sizeof(float_t);
1773+
src_trans_nb[0] = sizeof(float);
17741774
for (int i = 1; i < GGML_MAX_DIMS; i++) {
17751775
src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
17761776
}
@@ -1814,14 +1814,14 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
18141814

18151815
// [3,4,5,64] -> [3,4,5,2,32]
18161816
dequant_ne = weight_ne;
1817-
dequant_nb[0] = sizeof(float_t);
1817+
dequant_nb[0] = sizeof(float);
18181818
for (int i = 1; i < GGML_MAX_DIMS + 1; i++) {
18191819
dequant_nb[i] = dequant_nb[i - 1] * dequant_ne[i - 1];
18201820
}
18211821

18221822
scale_offset = ggml_nelements(src0) * sizeof(int8_t);
18231823
ggml_cann_pool_alloc dequant_buffer_allocator(
1824-
ctx.pool(), ggml_nelements(src0) * sizeof(float_t));
1824+
ctx.pool(), ggml_nelements(src0) * sizeof(float));
18251825

18261826
aclTensor* acl_weight_tensor = ggml_cann_create_tensor(
18271827
src0->data, ACL_INT8, sizeof(int8_t), weight_ne, weight_nb,
@@ -1830,11 +1830,11 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
18301830
src0->data, ACL_FLOAT16, sizeof(uint16_t), scale_ne, scale_nb,
18311831
GGML_MAX_DIMS + 1, ACL_FORMAT_ND, scale_offset);
18321832
aclTensor* dequant_tensor = ggml_cann_create_tensor(
1833-
dequant_buffer_allocator.get(), ACL_FLOAT, sizeof(float_t),
1833+
dequant_buffer_allocator.get(), ACL_FLOAT, sizeof(float),
18341834
dequant_ne, dequant_nb, GGML_MAX_DIMS + 1);
18351835

18361836
aclnn_mul(ctx, acl_weight_tensor, acl_scale_tensor, dequant_tensor);
1837-
dequant_nb[0] = sizeof(float_t);
1837+
dequant_nb[0] = sizeof(float);
18381838
dequant_ne = src0->ne;
18391839
for (int i = 1; i < GGML_MAX_DIMS; i++) {
18401840
dequant_nb[i] = dequant_nb[i - 1] * src0->ne[i - 1];
@@ -2282,8 +2282,8 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22822282

22832283
int64_t theta_scale_length = src0->ne[0] / 2;
22842284
int64_t theta_scale_ne[] = {theta_scale_length, 1, 1, 1};
2285-
size_t theta_scale_nb[] = {sizeof(float_t), sizeof(float_t), sizeof(float_t),
2286-
theta_scale_length * sizeof(float_t)};
2285+
size_t theta_scale_nb[] = {sizeof(float), sizeof(float), sizeof(float),
2286+
theta_scale_length * sizeof(float)};
22872287

22882288
GGML_ASSERT(src1->type == GGML_TYPE_I32);
22892289
int64_t position_length = src1->ne[0];
@@ -2293,7 +2293,7 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22932293

22942294
int64_t theta_ne[] = {theta_scale_length, 1, position_length, 1};
22952295
size_t theta_nb[GGML_MAX_DIMS];
2296-
theta_nb[0] = sizeof(float_t);
2296+
theta_nb[0] = sizeof(float);
22972297
for (int i = 1; i < GGML_MAX_DIMS; i++) {
22982298
theta_nb[i] = theta_nb[i - 1] * theta_ne[i - 1];
22992299
}
@@ -2314,10 +2314,10 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
23142314
if (ctx.rope_cache.theta_scale_cache != nullptr) {
23152315
ACL_CHECK(aclrtFree(ctx.rope_cache.theta_scale_cache));
23162316
}
2317-
ACL_CHECK(aclrtMalloc(&ctx.rope_cache.theta_scale_cache, theta_scale_length * sizeof(float_t), ACL_MEM_MALLOC_HUGE_FIRST));
2317+
ACL_CHECK(aclrtMalloc(&ctx.rope_cache.theta_scale_cache, theta_scale_length * sizeof(float), ACL_MEM_MALLOC_HUGE_FIRST));
23182318

23192319
acl_theta_scale_tensor =
2320-
ggml_cann_create_tensor(ctx.rope_cache.theta_scale_cache, ACL_FLOAT, sizeof(float_t),
2320+
ggml_cann_create_tensor(ctx.rope_cache.theta_scale_cache, ACL_FLOAT, sizeof(float),
23212321
theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
23222322

23232323
float start = 0;
@@ -2383,20 +2383,20 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
23832383
} else {
23842384
// use cache
23852385
acl_theta_scale_tensor =
2386-
ggml_cann_create_tensor(ctx.rope_cache.theta_scale_cache, ACL_FLOAT, sizeof(float_t),
2386+
ggml_cann_create_tensor(ctx.rope_cache.theta_scale_cache, ACL_FLOAT, sizeof(float),
23872387
theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
23882388
}
23892389

23902390
ggml_cann_pool_alloc freq_fac_res_allocator(ctx.pool());
23912391
// freq_factors
23922392
if (src2) {
2393-
freq_fac_res_allocator.alloc(theta_scale_length * sizeof(float_t));
2393+
freq_fac_res_allocator.alloc(theta_scale_length * sizeof(float));
23942394
void* freq_fac_res_ptr = freq_fac_res_allocator.get();
23952395
aclTensor* acl_freq_factors_tensor = ggml_cann_create_tensor(
23962396
src2->data, ggml_cann_type_mapping(src2->type),
23972397
ggml_type_size(src2->type), theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
23982398
aclTensor* acl_freq_fac_res_tensor = ggml_cann_create_tensor(
2399-
freq_fac_res_ptr, ACL_FLOAT, sizeof(float_t),
2399+
freq_fac_res_ptr, ACL_FLOAT, sizeof(float),
24002400
theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
24012401
aclnn_div(ctx, acl_theta_scale_tensor, acl_freq_factors_tensor, acl_freq_fac_res_tensor);
24022402
std::swap(acl_theta_scale_tensor, acl_freq_fac_res_tensor);
@@ -2411,29 +2411,29 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
24112411
// power * position
24122412
int64_t theta_length = theta_scale_length * position_length;
24132413
ggml_cann_pool_alloc theta_allocator(ctx.pool(),
2414-
theta_length * sizeof(float_t));
2414+
theta_length * sizeof(float));
24152415
void* theta_buffer = theta_allocator.get();
24162416

24172417
aclTensor* acl_theta_tensor =
2418-
ggml_cann_create_tensor(theta_buffer, ACL_FLOAT, sizeof(float_t),
2418+
ggml_cann_create_tensor(theta_buffer, ACL_FLOAT, sizeof(float),
24192419
theta_ne, theta_nb, GGML_MAX_DIMS);
24202420
aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor,
24212421
acl_theta_tensor);
24222422

24232423
// sin/cos
24242424
ggml_cann_pool_alloc sin_allocator(ctx.pool(),
2425-
theta_length * sizeof(float_t));
2425+
theta_length * sizeof(float));
24262426
void* sin_buffer = sin_allocator.get();
24272427
aclTensor* acl_sin_tensor = ggml_cann_create_tensor(
2428-
sin_buffer, ACL_FLOAT, sizeof(float_t), theta_ne, theta_nb,
2428+
sin_buffer, ACL_FLOAT, sizeof(float), theta_ne, theta_nb,
24292429
GGML_MAX_DIMS, ACL_FORMAT_ND);
24302430
aclnn_sin(ctx, acl_theta_tensor, acl_sin_tensor);
24312431

24322432
ggml_cann_pool_alloc cos_allocator(ctx.pool(),
2433-
theta_length * sizeof(float_t));
2433+
theta_length * sizeof(float));
24342434
void* cos_buffer = cos_allocator.get();
24352435
aclTensor* acl_cos_tensor = ggml_cann_create_tensor(
2436-
cos_buffer, ACL_FLOAT, sizeof(float_t), theta_ne, theta_nb,
2436+
cos_buffer, ACL_FLOAT, sizeof(float), theta_ne, theta_nb,
24372437
GGML_MAX_DIMS, ACL_FORMAT_ND);
24382438
aclnn_cos(ctx, acl_theta_tensor, acl_cos_tensor);
24392439

@@ -2449,15 +2449,15 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
24492449

24502450
int64_t sin_reshape_ne[4] = {src0->ne[0], 1, src0->ne[2], 1};
24512451
size_t sin_reshape_nb[GGML_MAX_DIMS];
2452-
sin_reshape_nb[0] = sizeof(float_t);
2452+
sin_reshape_nb[0] = sizeof(float);
24532453
for (int i = 1; i < GGML_MAX_DIMS; i++) {
24542454
sin_reshape_nb[i] = sin_reshape_nb[i - 1] * sin_reshape_ne[i - 1];
24552455
}
24562456
aclTensor* acl_sin_repeat_tensor =
2457-
ggml_cann_create_tensor(sin_tensor_buffer, ACL_FLOAT, sizeof(float_t),
2457+
ggml_cann_create_tensor(sin_tensor_buffer, ACL_FLOAT, sizeof(float),
24582458
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
24592459
aclTensor* acl_cos_repeat_tensor =
2460-
ggml_cann_create_tensor(cos_tensor_buffer, ACL_FLOAT, sizeof(float_t),
2460+
ggml_cann_create_tensor(cos_tensor_buffer, ACL_FLOAT, sizeof(float),
24612461
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
24622462

24632463
// repeat
@@ -2543,15 +2543,15 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25432543

25442544
int64_t sin_reshape_ne[4] = {ne00, 1, ne02, 1};
25452545
size_t sin_reshape_nb[GGML_MAX_DIMS];
2546-
sin_reshape_nb[0] = sizeof(float_t);
2546+
sin_reshape_nb[0] = sizeof(float);
25472547
for (int i = 1; i < GGML_MAX_DIMS; i++) {
25482548
sin_reshape_nb[i] = sin_reshape_nb[i - 1] * sin_reshape_ne[i - 1];
25492549
}
25502550
aclTensor* acl_sin_reshape_tensor =
2551-
ggml_cann_create_tensor(sin_tensor_buffer, ACL_FLOAT, sizeof(float_t),
2551+
ggml_cann_create_tensor(sin_tensor_buffer, ACL_FLOAT, sizeof(float),
25522552
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
25532553
aclTensor* acl_cos_reshape_tensor =
2554-
ggml_cann_create_tensor(cos_tensor_buffer, ACL_FLOAT, sizeof(float_t),
2554+
ggml_cann_create_tensor(cos_tensor_buffer, ACL_FLOAT, sizeof(float),
25552555
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
25562556

25572557
aclTensor* acl_src = ggml_cann_create_tensor(src0);
@@ -2566,7 +2566,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25662566
void* minus_one_scale_buffer = nullptr;
25672567
ggml_cann_pool_alloc roll_allocator(ctx.pool(), ggml_nbytes(src0));
25682568
ggml_cann_pool_alloc minus_one_scale_allocator(
2569-
ctx.pool(), sizeof(float_t) * src0->ne[0]);
2569+
ctx.pool(), sizeof(float) * src0->ne[0]);
25702570
if (!is_neox) {
25712571
// roll input: [q0,q1,q2,q3,...] -> [q1,q0,q3,q2,...]
25722572
input_roll_buffer = roll_allocator.get();
@@ -2596,13 +2596,13 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25962596

25972597
int64_t minus_one_ne[4] = {src0->ne[0], 1, 1, 1};
25982598
size_t minus_one_nb[GGML_MAX_DIMS];
2599-
minus_one_nb[0] = sizeof(float_t);
2599+
minus_one_nb[0] = sizeof(float);
26002600
for (int i = 1; i < GGML_MAX_DIMS; i++) {
26012601
minus_one_nb[i] = minus_one_nb[i - 1] * minus_one_ne[i - 1];
26022602
}
26032603
acl_minus_one_tensor = aclnn_values(
2604-
ctx, minus_one_scale_buffer, sizeof(float_t) * src0->ne[0],
2605-
minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t), 1);
2604+
ctx, minus_one_scale_buffer, sizeof(float) * src0->ne[0],
2605+
minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof(float), 1);
26062606
int64_t dim = 3;
26072607
int64_t* index = new int64_t[src0->ne[0]];
26082608
for (int i = 0; i < src0->ne[0]; i++) {
@@ -2630,22 +2630,22 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
26302630
minus_one_scale_buffer = minus_one_scale_allocator.get();
26312631
int64_t minus_one_ne[4] = {src0->ne[0], 1, 1, 1};
26322632
size_t minus_one_nb[GGML_MAX_DIMS];
2633-
minus_one_nb[0] = sizeof(float_t);
2633+
minus_one_nb[0] = sizeof(float);
26342634
for (int i = 1; i < GGML_MAX_DIMS; i++) {
26352635
minus_one_nb[i] = minus_one_nb[i - 1] * minus_one_ne[i - 1];
26362636
}
26372637
acl_minus_one_tensor = aclnn_values(
2638-
ctx, minus_one_scale_buffer, sizeof(float_t) * src0->ne[0],
2639-
minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t), 1);
2638+
ctx, minus_one_scale_buffer, sizeof(float) * src0->ne[0],
2639+
minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof(float), 1);
26402640
// -1 * first half
26412641
int64_t first_half_ne[4] = {src0->ne[0] / 2, 1, 1, 1};
26422642
size_t first_half_nb[GGML_MAX_DIMS];
2643-
first_half_nb[0] = sizeof(float_t);
2643+
first_half_nb[0] = sizeof(float);
26442644
for (int i = 1; i < GGML_MAX_DIMS; i++) {
26452645
first_half_nb[i] = first_half_nb[i - 1] * first_half_ne[i - 1];
26462646
}
26472647
aclTensor* acl_first_half_tensor = ggml_cann_create_tensor(
2648-
minus_one_scale_buffer, ACL_FLOAT, sizeof(float_t), first_half_ne,
2648+
minus_one_scale_buffer, ACL_FLOAT, sizeof(float), first_half_ne,
26492649
first_half_nb, GGML_MAX_DIMS);
26502650
bool inplace = true;
26512651
float scale = -1;
@@ -2685,28 +2685,28 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
26852685
// TODO: ne0 != n_dims in mode2
26862686
} else if (src0->type == GGML_TYPE_F16) {
26872687
size_t input_fp32_nb[GGML_MAX_DIMS];
2688-
input_fp32_nb[0] = sizeof(float_t);
2688+
input_fp32_nb[0] = sizeof(float);
26892689
for (int i = 1; i < GGML_MAX_DIMS; i++) {
26902690
input_fp32_nb[i] = input_fp32_nb[i - 1] * dst->ne[i - 1];
26912691
}
26922692
ggml_cann_pool_alloc fp32_allocator1(
2693-
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
2693+
ctx.pool(), ggml_nelements(dst) * sizeof(float));
26942694
void* input_fp32_buffer1 = fp32_allocator1.get();
26952695
aclTensor* input_fp32_tensor1 = ggml_cann_create_tensor(
2696-
input_fp32_buffer1, ACL_FLOAT, sizeof(float_t), dst->ne,
2696+
input_fp32_buffer1, ACL_FLOAT, sizeof(float), dst->ne,
26972697
input_fp32_nb, GGML_MAX_DIMS);
26982698
ggml_cann_pool_alloc fp32_allocator2(
2699-
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
2699+
ctx.pool(), ggml_nelements(dst) * sizeof(float));
27002700
void* input_fp32_buffer2 = fp32_allocator2.get();
27012701
aclTensor* input_fp32_tensor2 = ggml_cann_create_tensor(
2702-
input_fp32_buffer2, ACL_FLOAT, sizeof(float_t), dst->ne,
2702+
input_fp32_buffer2, ACL_FLOAT, sizeof(float), dst->ne,
27032703
input_fp32_nb, GGML_MAX_DIMS);
27042704

27052705
ggml_cann_pool_alloc fp32_allocator(
2706-
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
2706+
ctx.pool(), ggml_nelements(dst) * sizeof(float));
27072707
output_fp32_buffer = fp32_allocator.get();
27082708
aclTensor* output_fp32_tensor = ggml_cann_create_tensor(
2709-
output_fp32_buffer, ACL_FLOAT, sizeof(float_t), dst->ne,
2709+
output_fp32_buffer, ACL_FLOAT, sizeof(float), dst->ne,
27102710
input_fp32_nb, GGML_MAX_DIMS);
27112711
aclnn_mul(ctx, acl_src, acl_cos_reshape_tensor, input_fp32_tensor1);
27122712
aclnn_mul(ctx, acl_input_roll_mul_scale_tensor, acl_sin_reshape_tensor,
@@ -2803,16 +2803,14 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
28032803
aclIntArray *padding = aclCreateIntArray(paddingVal, 1);
28042804
int64_t dilationVal[] = {1};
28052805
aclIntArray *dilation = aclCreateIntArray(dilationVal, 1);
2806-
bool transposed = true;
2807-
int64_t groups = 1;
28082806
int8_t cubeMathType = 0;
28092807

28102808
#ifdef ASCEND_310P
28112809
cubeMathType = 1;
28122810
#endif
28132811

28142812
GGML_CANN_CALL_ACLNN_OP(ctx, Convolution, acl_input, acl_weight, nullptr, stride,
2815-
padding, dilation, transposed, padding, groups, acl_dst, cubeMathType);
2813+
padding, dilation, true, padding, 1, acl_dst, cubeMathType);
28162814

28172815
ggml_cann_release_resources(ctx, acl_weight, acl_dst, stride, padding, dilation);
28182816
}

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2479,12 +2479,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
24792479
case GGML_OP_ARGMAX:
24802480
case GGML_OP_COS:
24812481
case GGML_OP_SIN:
2482-
case GGML_OP_CONV_TRANSPOSE_1D:
24832482
case GGML_OP_LOG:
24842483
case GGML_OP_MEAN:
24852484
case GGML_OP_PAD_REFLECT_1D:
24862485
case GGML_OP_COUNT_EQUAL:
24872486
return true;
2487+
case GGML_OP_CONV_TRANSPOSE_1D:
2488+
// TODO: ((weightL - 1) * dilationW - padLeft)=1336 should not be larger than 255.
2489+
return (op->src[0]->ne[0] - 1) <= 255;
24882490
case GGML_OP_SCALE:
24892491
float bias;
24902492
memcpy(&bias, (const float *)(op->op_params) + 1, sizeof(float));

ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,13 @@ void write_output_files() {
854854
fputs(len.c_str(), src);
855855
}
856856

857-
for (const std::string& btype : {"f16", "f32", "q8_1"}) {
857+
std::vector<std::string> btypes = {"f16", "f32"};
858+
859+
#if defined(GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT)
860+
btypes.push_back("q8_1");
861+
#endif
862+
863+
for (const std::string& btype : btypes) {
858864
for (const auto& tname : type_names) {
859865
if (btype == "q8_1" && !is_legacy_quant(tname)) {
860866
continue;

ggml/src/gguf.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ struct gguf_reader {
273273
}
274274

275275
bool read(std::string & dst) const {
276-
uint64_t size = -1;
276+
uint64_t size = 0;
277277
if (!read(size)) {
278278
return false;
279279
}
@@ -523,7 +523,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
523523

524524
// tensor shape
525525
{
526-
uint32_t n_dims = -1;
526+
uint32_t n_dims = 0;
527527
ok = ok && gr.read(n_dims);
528528
if (n_dims > GGML_MAX_DIMS) {
529529
GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",

0 commit comments

Comments
 (0)