Skip to content

Commit 382655c

Browse files
committed
Revert "ggml: allow casting between f32 and i32 (ggml-org#15783)"
1 parent b504460 commit 382655c

File tree

11 files changed

+3
-236
lines changed

11 files changed

+3
-236
lines changed

ggml/include/ggml-cpu.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ extern "C" {
134134
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
135135

136136
GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
137-
GGML_BACKEND_API void ggml_cpu_fp32_to_i32 (const float *, int32_t *, int64_t);
138137
GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
139138
GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
140139
GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);

ggml/include/ggml.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1429,7 +1429,6 @@ extern "C" {
14291429
struct ggml_tensor * a,
14301430
struct ggml_tensor * b);
14311431

1432-
// note: casting from f32 to i32 will discard the fractional part
14331432
GGML_API struct ggml_tensor * ggml_cast(
14341433
struct ggml_context * ctx,
14351434
struct ggml_tensor * a,

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -377,9 +377,6 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
377377
.vec_dot_type = GGML_TYPE_Q8_K,
378378
.nrows = 1,
379379
},
380-
[GGML_TYPE_I32] = {
381-
.from_float = (ggml_from_float_t) ggml_cpu_fp32_to_i32,
382-
},
383380
};
384381

385382
const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type) {
@@ -3546,10 +3543,7 @@ struct ggml_cplan ggml_graph_plan(
35463543
if (ggml_is_quantized(node->type) ||
35473544
// F16 -> BF16 and BF16 -> F16 copies go through intermediate F32
35483545
(node->src[0]->type == GGML_TYPE_F16 && node->src[1] && node->src[1]->type == GGML_TYPE_BF16) ||
3549-
(node->src[0]->type == GGML_TYPE_BF16 && node->src[1] && node->src[1]->type == GGML_TYPE_F16) ||
3550-
// conversion between F32 and I32
3551-
(node->src[0]->type == GGML_TYPE_F32 && node->src[1] && node->src[1]->type == GGML_TYPE_I32) ||
3552-
(node->src[0]->type == GGML_TYPE_I32 && node->src[1] && node->src[1]->type == GGML_TYPE_F32)) {
3546+
(node->src[0]->type == GGML_TYPE_BF16 && node->src[1] && node->src[1]->type == GGML_TYPE_F16)) {
35533547
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
35543548
}
35553549
} break;
@@ -4130,13 +4124,6 @@ void ggml_cpu_fp32_to_bf16(const float * x, ggml_bf16_t * y, int64_t n) {
41304124
}
41314125
}
41324126

4133-
void ggml_cpu_fp32_to_i32(const float * x, int32_t * y, int64_t n) {
4134-
int64_t i = 0;
4135-
for (; i < n; ++i) {
4136-
y[i] = x[i];
4137-
}
4138-
}
4139-
41404127
void ggml_cpu_bf16_to_fp32(const ggml_bf16_t * x, float * y, int64_t n) {
41414128
int64_t i = 0;
41424129
#if defined(__AVX2__)

ggml/src/ggml-cpu/ops.cpp

Lines changed: 0 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -776,24 +776,6 @@ static void ggml_compute_forward_dup_f32(
776776
id += ne00 * (ne01 - ir1);
777777
}
778778
}
779-
} else if (dst->type == GGML_TYPE_I32) {
780-
size_t id = 0;
781-
int32_t * dst_ptr = (int32_t *) dst->data;
782-
783-
for (int i03 = 0; i03 < ne03; i03++) {
784-
for (int i02 = 0; i02 < ne02; i02++) {
785-
id += ne00 * ir0;
786-
for (int i01 = ir0; i01 < ir1; i01++) {
787-
for (int i00 = 0; i00 < ne00; i00++) {
788-
const float * src0_ptr = (float *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
789-
790-
dst_ptr[id] = *src0_ptr;
791-
id++;
792-
}
793-
}
794-
id += ne00 * (ne01 - ir1);
795-
}
796-
}
797779
} else {
798780
GGML_ABORT("fatal error"); // TODO: implement
799781
}
@@ -965,144 +947,6 @@ static void ggml_compute_forward_dup_f32(
965947
}
966948
}
967949
}
968-
} else if (dst->type == GGML_TYPE_I32) {
969-
for (int64_t i03 = 0; i03 < ne03; i03++) {
970-
for (int64_t i02 = 0; i02 < ne02; i02++) {
971-
i10 += ne00 * ir0;
972-
while (i10 >= ne0) {
973-
i10 -= ne0;
974-
if (++i11 == ne1) {
975-
i11 = 0;
976-
if (++i12 == ne2) {
977-
i12 = 0;
978-
if (++i13 == ne3) {
979-
i13 = 0;
980-
}
981-
}
982-
}
983-
}
984-
for (int64_t i01 = ir0; i01 < ir1; i01++) {
985-
for (int64_t i00 = 0; i00 < ne00; i00++) {
986-
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
987-
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
988-
989-
*(int32_t *) dst_ptr = *(const float *) src0_ptr;
990-
991-
if (++i10 == ne0) {
992-
i10 = 0;
993-
if (++i11 == ne1) {
994-
i11 = 0;
995-
if (++i12 == ne2) {
996-
i12 = 0;
997-
if (++i13 == ne3) {
998-
i13 = 0;
999-
}
1000-
}
1001-
}
1002-
}
1003-
}
1004-
}
1005-
i10 += ne00 * (ne01 - ir1);
1006-
while (i10 >= ne0) {
1007-
i10 -= ne0;
1008-
if (++i11 == ne1) {
1009-
i11 = 0;
1010-
if (++i12 == ne2) {
1011-
i12 = 0;
1012-
if (++i13 == ne3) {
1013-
i13 = 0;
1014-
}
1015-
}
1016-
}
1017-
}
1018-
}
1019-
}
1020-
} else {
1021-
GGML_ABORT("fatal error"); // TODO: implement
1022-
}
1023-
}
1024-
1025-
static void ggml_compute_forward_dup_i32(
1026-
const ggml_compute_params * params,
1027-
ggml_tensor * dst) {
1028-
1029-
const ggml_tensor * src0 = dst->src[0];
1030-
1031-
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
1032-
1033-
GGML_TENSOR_UNARY_OP_LOCALS
1034-
1035-
const int ith = params->ith; // thread index
1036-
const int nth = params->nth; // number of threads
1037-
1038-
// parallelize by rows
1039-
const int nr = ne01;
1040-
// number of rows per thread
1041-
const int dr = (nr + nth - 1) / nth;
1042-
// row range for this thread
1043-
const int ir0 = dr * ith;
1044-
const int ir1 = MIN(ir0 + dr, nr);
1045-
1046-
// dst counters
1047-
1048-
int64_t i10 = 0;
1049-
int64_t i11 = 0;
1050-
int64_t i12 = 0;
1051-
int64_t i13 = 0;
1052-
1053-
// TODO: not optimal, but works
1054-
if (dst->type == GGML_TYPE_F32) {
1055-
for (int64_t i03 = 0; i03 < ne03; i03++) {
1056-
for (int64_t i02 = 0; i02 < ne02; i02++) {
1057-
i10 += ne00 * ir0;
1058-
while (i10 >= ne0) {
1059-
i10 -= ne0;
1060-
if (++i11 == ne1) {
1061-
i11 = 0;
1062-
if (++i12 == ne2) {
1063-
i12 = 0;
1064-
if (++i13 == ne3) {
1065-
i13 = 0;
1066-
}
1067-
}
1068-
}
1069-
}
1070-
for (int64_t i01 = ir0; i01 < ir1; i01++) {
1071-
for (int64_t i00 = 0; i00 < ne00; i00++) {
1072-
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
1073-
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
1074-
1075-
*(float *) dst_ptr = *(const int32_t *) src0_ptr;
1076-
1077-
if (++i10 == ne0) {
1078-
i10 = 0;
1079-
if (++i11 == ne1) {
1080-
i11 = 0;
1081-
if (++i12 == ne2) {
1082-
i12 = 0;
1083-
if (++i13 == ne3) {
1084-
i13 = 0;
1085-
}
1086-
}
1087-
}
1088-
}
1089-
}
1090-
}
1091-
i10 += ne00 * (ne01 - ir1);
1092-
while (i10 >= ne0) {
1093-
i10 -= ne0;
1094-
if (++i11 == ne1) {
1095-
i11 = 0;
1096-
if (++i12 == ne2) {
1097-
i12 = 0;
1098-
if (++i13 == ne3) {
1099-
i13 = 0;
1100-
}
1101-
}
1102-
}
1103-
}
1104-
}
1105-
}
1106950
} else {
1107951
GGML_ABORT("fatal error"); // TODO: implement
1108952
}
@@ -1333,10 +1177,6 @@ void ggml_compute_forward_dup(
13331177
{
13341178
ggml_compute_forward_dup_f32(params, dst);
13351179
} break;
1336-
case GGML_TYPE_I32:
1337-
{
1338-
ggml_compute_forward_dup_i32(params, dst);
1339-
} break;
13401180
default:
13411181
{
13421182
if (ggml_is_quantized(src0->type) && dst->type == GGML_TYPE_F32) {

ggml/src/ggml-cuda/convert.cuh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ template<typename dst_t, typename src_t>
3838
return __float2bfloat16(float(x));
3939
} else if constexpr(std::is_same_v<src_t, nv_bfloat16>) {
4040
return __bfloat162float(x);
41-
} else if constexpr(std::is_same_v<dst_t, int32_t>) {
42-
return int32_t(x);
4341
} else {
4442
return float(x);
4543
}

ggml/src/ggml-cuda/cpy.cu

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -374,10 +374,6 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
374374
ggml_cpy_flt_cuda<nv_bfloat16, half> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
375375
} else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_F32) {
376376
ggml_cpy_flt_cuda<nv_bfloat16, float> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
377-
} else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_I32) {
378-
ggml_cpy_flt_cuda<float, int32_t> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
379-
} else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_F32) {
380-
ggml_cpy_flt_cuda<int32_t, float> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
381377
} else {
382378
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
383379
ggml_type_name(src0->type), ggml_type_name(src1->type));

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3479,12 +3479,6 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
34793479
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_IQ4_NL) {
34803480
return true;
34813481
}
3482-
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_I32) {
3483-
return true;
3484-
}
3485-
if (src0_type == GGML_TYPE_I32 && src1_type == GGML_TYPE_F32) {
3486-
return true;
3487-
}
34883482
if (src0_type == src1_type && ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1])) {
34893483
return true;
34903484
}

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -498,8 +498,6 @@ - (void) dealloc {
498498
GGML_METAL_KERNEL_TYPE_CPY_F16_F32,
499499
GGML_METAL_KERNEL_TYPE_CPY_BF16_F32,
500500
GGML_METAL_KERNEL_TYPE_CPY_BF16_BF16,
501-
GGML_METAL_KERNEL_TYPE_CPY_F32_I32,
502-
GGML_METAL_KERNEL_TYPE_CPY_I32_F32,
503501
GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0,
504502
GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0,
505503
GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1,
@@ -1435,8 +1433,6 @@ @implementation GGMLMetalClass
14351433
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F16_F16, cpy_f16_f16, true);
14361434
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_BF16_F32, cpy_bf16_f32, use_bfloat);
14371435
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_BF16_BF16, cpy_bf16_bf16, use_bfloat);
1438-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_I32, cpy_f32_i32, true);
1439-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_I32_F32, cpy_i32_f32, true);
14401436
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0, cpy_f32_q8_0, true);
14411437
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0, cpy_f32_q4_0, true);
14421438
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1, cpy_f32_q4_1, true);
@@ -1993,7 +1989,6 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
19931989
case GGML_TYPE_Q5_0:
19941990
case GGML_TYPE_Q5_1:
19951991
case GGML_TYPE_IQ4_NL:
1996-
case GGML_TYPE_I32:
19971992
return true;
19981993
default:
19991994
return false;
@@ -2026,8 +2021,6 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
20262021
default:
20272022
return false;
20282023
}
2029-
case GGML_TYPE_I32:
2030-
return op->type == GGML_TYPE_F32;
20312024
default:
20322025
return false;
20332026
};
@@ -5472,7 +5465,6 @@ static int ggml_metal_encode_node(
54725465

54735466
switch (dstt) {
54745467
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_F32].pipeline; break;
5475-
case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_I32].pipeline; break;
54765468
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_F16].pipeline; break;
54775469
case GGML_TYPE_BF16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_BF16].pipeline; break;
54785470
case GGML_TYPE_Q8_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0].pipeline; break;
@@ -5484,13 +5476,6 @@ static int ggml_metal_encode_node(
54845476
default: GGML_ABORT("not implemented");
54855477
};
54865478
} break;
5487-
case GGML_TYPE_I32:
5488-
{
5489-
switch (dstt) {
5490-
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_I32_F32].pipeline; break;
5491-
default: GGML_ABORT("not implemented");
5492-
};
5493-
} break;
54945479
case GGML_TYPE_F16:
54955480
{
54965481
switch (dstt) {

ggml/src/ggml-metal/ggml-metal.metal

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5607,8 +5607,6 @@ typedef decltype(kernel_cpy<float, float>) kernel_cpy_t;
56075607

56085608
template [[host_name("kernel_cpy_f32_f32")]] kernel kernel_cpy_t kernel_cpy<float, float>;
56095609
template [[host_name("kernel_cpy_f32_f16")]] kernel kernel_cpy_t kernel_cpy<float, half>;
5610-
template [[host_name("kernel_cpy_f32_i32")]] kernel kernel_cpy_t kernel_cpy<float, int32_t>;
5611-
template [[host_name("kernel_cpy_i32_f32")]] kernel kernel_cpy_t kernel_cpy<int32_t, float>;
56125610
#if defined(GGML_METAL_USE_BF16)
56135611
template [[host_name("kernel_cpy_f32_bf16")]] kernel kernel_cpy_t kernel_cpy<float, bfloat>;
56145612
#endif

0 commit comments

Comments
 (0)