Skip to content

Commit 9c43dca

Browse files
committed
warnings
1 parent 8eaa479 commit 9c43dca

File tree

1 file changed

+16
-33
lines changed

1 file changed

+16
-33
lines changed

ggml/src/ggml-tp/ggml-tp.cpp

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ static void ensure_reduce_split_views(const ggml_tensor *tensor) {
507507
reduce_split_view->buffer = wrapped->buffer;
508508
reduce_split_view->view_src = wrapped;
509509
reduce_split_view->view_offs = col_offset * wrapped->nb[0];
510-
reduce_split_view->data = wrapped->data + reduce_split_view->view_offs;
510+
reduce_split_view->data = (char *)wrapped->data + reduce_split_view->view_offs;
511511
reduce_split_view->ne[0] = splits.split[j];
512512

513513
col_offset += splits.split[j];
@@ -644,7 +644,7 @@ static void ensure_rejoined(const ggml_tensor *reason, const ggml_tensor * src)
644644

645645
static int memdiff_index(const void *a, const void *b, size_t length) {
646646
for (size_t i = 0; i < length; ++i) {
647-
if (((char*)a)[i] != ((char*)b)[i]) {
647+
if (((const char*)a)[i] != ((const char*)b)[i]) {
648648
return (int)i; // return index of first difference
649649
}
650650
}
@@ -757,7 +757,6 @@ static ggml_status reduce_gathered_tensors(ggml_cgraph * backend_graph, int devi
757757
return GGML_STATUS_SUCCESS;
758758
}
759759

760-
auto be = ggml_parallel_backends[device_index];
761760
ggml_tensor * wrapped = extra->tensors[device_index];
762761

763762
// when reducing a tensor, the actual op (sub or add) is contained in reduce_op_tensors
@@ -781,17 +780,16 @@ static ggml_status reduce_gathered_tensors(ggml_cgraph * backend_graph, int devi
781780
return GGML_STATUS_SUCCESS;
782781
}
783782

784-
void set_tensor(ggml_backend_t be, ggml_tensor * tensor, float value) {
783+
static void set_tensor(ggml_backend_t be, ggml_tensor * tensor, float value) {
785784
std::unique_ptr<float, decltype(&std::free)> data(static_cast<float*>(std::malloc(ggml_nbytes(tensor))), &std::free);
786785

787-
for (size_t i = 0; i < ggml_nelements(tensor); i++) {
786+
for (int64_t i = 0; i < ggml_nelements(tensor); i++) {
788787
data.get()[i] = value;
789788
}
790789
be->iface.set_tensor_async(be, tensor, data.get(), 0, ggml_nbytes(tensor));
791790
}
792791

793792
static ggml_tensor* ggml_backend_tp_node_compute_split(int device_index, ggml_tensor * tensor) {
794-
auto be = ggml_parallel_backends[device_index];
795793
auto extra = (ggml_tensor_parallel_extra *)tensor->extra;
796794

797795
auto wrapped = extra->tensors[device_index];
@@ -842,6 +840,7 @@ static void ggml_backend_tp_buffer_compute_graph(ggml_cgraph * cgraph, std::func
842840

843841
static void ggml_backend_tp_buffer_graph_compute_one(struct compute_thread * thread) {
844842
auto startTime = std::chrono::high_resolution_clock::now();
843+
GGML_UNUSED(startTime);
845844
auto cgraph = thread->cgraph;
846845

847846
struct ggml_init_params params = {
@@ -903,7 +902,7 @@ static void ggml_backend_tp_buffer_graph_compute_one(struct compute_thread * thr
903902
view_src = view_src->view_src;
904903
}
905904
if (!be->iface.cpy_tensor2d_async(be, other_be, view_src, rejoined_tensor_view)) {
906-
GGML_ABORT("Failed to copy tensor %s from device %d to device %d\n", tensor->name, device_index, other_device_index);
905+
GGML_ABORT("Failed to copy tensor %s from device %d to device %ld\n", tensor->name, device_index, other_device_index);
907906
// TODO, this is recoverable if something like this is implemented:
908907
// ggml_backend_tensor2d_copy(view_src, rejoined_tensor_view);
909908
}
@@ -929,19 +928,18 @@ static void ggml_backend_tp_buffer_graph_compute_one(struct compute_thread * thr
929928
pending_extra->rejoined[device_index] = true;
930929
}
931930
return true;
931+
GGML_UNUSED(node_index);
932932
};
933933

934934
auto compute = [&](int node_index, ggml_tensor * tensor, ggml_tensor_parallel_extra * extra) {
935935
auto wrapped = ggml_backend_tp_node_compute_split(device_index, tensor);
936936
if (extra->split_tensors != GGML_TP_SPLIT_VIEW) {
937937
backend_graph->nodes[backend_graph->n_nodes++] = wrapped;
938938
}
939-
else {
940-
int i = 0;
941-
}
942939
extra->computed[device_index] = true;
943940

944941
return true;
942+
GGML_UNUSED(node_index);
945943
};
946944

947945
ggml_backend_tp_buffer_compute_graph(cgraph, gather_pending, compute, flush_compute);
@@ -986,7 +984,6 @@ static void do_init(size_t node_index, ggml_tensor * tensor, ggml_tensor_paralle
986984
auto create_default_tensors_for = [](ggml_tensor * tensor, ggml_tensor_parallel_extra * extra) {
987985
extra->split_tensors = GGML_TP_SPLIT_NONE;
988986
for (size_t j = 0; j < ggml_parallel_devices.size(); j++) {
989-
auto dev = ggml_parallel_devices[j];
990987
auto wrapped = ggml_backend_tp_clone_tensor(tensor);
991988
extra->tensors[j] = wrapped;
992989
}
@@ -999,7 +996,6 @@ static void do_init(size_t node_index, ggml_tensor * tensor, ggml_tensor_paralle
999996
auto create_reduce_tensors = [&]() {
1000997
extra->split_tensors = GGML_TP_SPLIT_REDUCE;
1001998
for (size_t j = 0; j < ggml_parallel_devices.size(); j++) {
1002-
auto dev = ggml_parallel_devices[j];
1003999
auto wrapped = ggml_backend_tp_clone_tensor(tensor);
10041000
extra->tensors[j] = wrapped;
10051001
}
@@ -1021,7 +1017,6 @@ static void do_init(size_t node_index, ggml_tensor * tensor, ggml_tensor_paralle
10211017
extra->split_tensors = GGML_TP_SPLIT_ROWS;
10221018
auto splits = get_row_splits(dims);
10231019
for (size_t j = 0; j < ggml_parallel_devices.size(); j++) {
1024-
auto dev = ggml_parallel_devices[j];
10251020
auto wrapped = prepare_wrapped(tensor, dims);
10261021
extra->tensors[j] = wrapped;
10271022

@@ -1042,7 +1037,6 @@ static void do_init(size_t node_index, ggml_tensor * tensor, ggml_tensor_paralle
10421037
extra->split_tensors = GGML_TP_SPLIT_COLUMNS;
10431038
auto splits = get_col_splits(dims);
10441039
for (size_t j = 0; j < ggml_parallel_devices.size(); j++) {
1045-
auto dev = ggml_parallel_devices[j];
10461040
auto wrapped = prepare_wrapped(tensor, dims);
10471041
extra->tensors[j] = wrapped;
10481042

@@ -1064,7 +1058,6 @@ static void do_init(size_t node_index, ggml_tensor * tensor, ggml_tensor_paralle
10641058
extra->split_tensors = GGML_TP_SPLIT_DIM2;
10651059
auto splits = get_dim_splits(dims->ne[2]);
10661060
for (size_t j = 0; j < ggml_parallel_devices.size(); j++) {
1067-
auto dev = ggml_parallel_devices[j];
10681061
auto wrapped = prepare_wrapped(tensor, dims);
10691062
extra->tensors[j] = wrapped;
10701063

@@ -1117,15 +1110,9 @@ static void do_init(size_t node_index, ggml_tensor * tensor, ggml_tensor_paralle
11171110
reduce_op->buffer = wrapped->buffer;
11181111
reduce_op->view_src = wrapped;
11191112
reduce_op->view_offs = col_offset * wrapped->nb[0];
1120-
reduce_op->data = wrapped->data + reduce_op->view_offs;
1113+
reduce_op->data = (char *)wrapped->data + reduce_op->view_offs;
11211114
reduce_op->ne[0] = splits.split[j];
11221115

1123-
// the reduce was rejoined, and the
1124-
auto reduce = reduce_extra->tensors[j];
1125-
if (reduce_extra->has_rejoin) {
1126-
reduce = reduce_extra->rejoined_tensor_views[j][j];
1127-
}
1128-
11291116
// create a col split view of the reduced tensor
11301117
ensure_reduce_split_views(reduce_tensor);
11311118

@@ -1596,9 +1583,6 @@ static void do_init(size_t node_index, ggml_tensor * tensor, ggml_tensor_paralle
15961583
// one split, one not split
15971584
auto split_tensors = src0_split_tensors ? src0_split_tensors : src1_split_tensors;
15981585
if (split_tensors == GGML_TP_SPLIT_COLUMNS) {
1599-
if (src0_extra->has_rejoin || src1_extra->has_rejoin) {
1600-
int i = 0;
1601-
}
16021586
ensure_column_split(src0);
16031587
ensure_column_split(src1);
16041588
create_column_split_tensors();
@@ -1886,7 +1870,7 @@ static enum ggml_status ggml_backend_tp_graph_compute(ggml_backend_t backend, gg
18861870
continue;
18871871
}
18881872

1889-
wrapped->data = wrapped->src[0]->data + wrapped->view_offs;
1873+
wrapped->data = (char *)wrapped->src[0]->data + wrapped->view_offs;
18901874
wrapped->buffer = wrapped->src[0]->buffer;
18911875
}
18921876
}
@@ -2100,6 +2084,9 @@ static ggml_backend_i ggml_backend_tp_interface = {
21002084
/* .graph_compute = */ ggml_backend_tp_graph_compute,
21012085
/* .event_record = */ NULL,
21022086
/* .event_wait = */ NULL,
2087+
/* .set_tensor2d_async = */ NULL,
2088+
/* .get_tensor2d_async = */ NULL,
2089+
/* .cpy_tensor2d_async = */ NULL,
21032090
};
21042091

21052092
static ggml_backend_dev_t ggml_backend_tp_reg_get_device(ggml_backend_reg_t reg, size_t index);
@@ -2223,7 +2210,7 @@ static void ensure_weight_column_split(ggml_tensor * weight) {
22232210
std::unique_ptr<char, decltype(&std::free)> data(
22242211
static_cast<char*>(std::malloc(size)), &std::free);
22252212
size_t offset = 0;
2226-
for (int j = 0; j < ggml_parallel_devices.size(); j++) {
2213+
for (size_t j = 0; j < ggml_parallel_devices.size(); j++) {
22272214
auto wrapped = extra->tensors[j];
22282215
auto buft = wrapped->buffer;
22292216
auto wrapped_size = ggml_nbytes(wrapped);
@@ -2251,7 +2238,7 @@ static void ensure_weight_column_split(ggml_tensor * weight) {
22512238
auto splits = get_dim_splits(blocks_per_row);
22522239

22532240
offset = 0;
2254-
for (int j = 0; j < ggml_parallel_devices.size(); j++) {
2241+
for (size_t j = 0; j < ggml_parallel_devices.size(); j++) {
22552242
auto wrapped = extra->tensors[j];
22562243
wrapped->ne[0] = splits.split[j] * elements_per_block;
22572244
wrapped->ne[1] = weight->ne[1];
@@ -2352,6 +2339,7 @@ static enum ggml_status ggml_backend_tp_finish_init_tensor(ggml_tensor *tensor)
23522339
GGML_ABORT("ggml_backend_tp_buffer_init_tensor: init_tensor failed for tensor %s\n", tensor->name);
23532340
}
23542341
}
2342+
23552343
return GGML_STATUS_SUCCESS;
23562344
}
23572345

@@ -2506,7 +2494,6 @@ static size_t ggml_backend_tp_buffer_type_get_alloc_size(ggml_backend_buffer_typ
25062494
// to get cleanly diviible splits, make sure the allocation alignment is the multiple of the number of devices
25072495
max_alloc_size = ggml_align_size(max_alloc_size, ggml_backend_tp_buffer_type_get_alignment(buft) * ggml_parallel_devices.size());
25082496
return max_alloc_size;
2509-
// return ggml_nbytes(tensor);
25102497
}
25112498

25122499
static ggml_backend_buffer_type_i ggml_backend_tp_buffer_type_interface = {
@@ -2549,10 +2536,6 @@ static bool ggml_backend_tp_device_supports_op(ggml_backend_dev_t dev, const str
25492536
GGML_UNUSED(dev);
25502537
GGML_UNUSED(op);
25512538

2552-
if (op->op == GGML_OP_MUL_MAT_ID) {
2553-
return false;
2554-
}
2555-
25562539
auto buft = op->buffer ? op->buffer->buft : nullptr;
25572540
if (buft && (!ggml_backend_buft_is_tp_split(buft) && !ggml_backend_buft_is_tp(buft))) {
25582541
return false;

0 commit comments

Comments
 (0)