You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
GGML_ABORT("Tensor %s has view source tensors, which are not supported for tensor parallelism.\n", tensor->name);
1272
1278
}
1273
1279
1274
-
if (src0_extra->split_tensors && src0_extra->split_tensors != GGML_TP_SPLIT_COLUMNS) {
1275
-
GGML_ABORT("Tensor %s has unsupported op %s for tensor parallelism, src0 is split but not as columns.\n", tensor->name, ggml_op_name(tensor->op));
1276
-
// technically, this is not a problem, but it is not expected.
1277
-
// ensure_rejoined(tensor, src0);
1278
-
}
1279
-
1280
1280
auto src0_split_tensors = src0_extra->has_rejoin ? GGML_TP_SPLIT_NONE : src0_extra->split_tensors;
1281
1281
1282
+
if (src0_split_tensors == GGML_TP_SPLIT_VIEW) {
1283
+
// make this into columns and create views into it
1284
+
auto src0_viewsrc = src0->view_src;
1285
+
auto src0_viewsrc_extra = (ggml_tensor_parallel_extra *)src0_viewsrc->extra;
1286
+
if (src0_viewsrc_extra->split_tensors != GGML_TP_SPLIT_COLUMNS) {
1287
+
GGML_ABORT("Tensor %s has unsupported op %s for tensor parallelism, src0 is split but not as columns.\n", tensor->name, ggml_op_name(tensor->op));
1288
+
}
1289
+
1290
+
if (src0_viewsrc->ne[0] % tensor->ne[0]) {
1291
+
GGML_ABORT("Tensor %s has unsupported op %s for tensor parallelism, src0 is split as view but not evenly divisible by the rope head count.\n", tensor->name, ggml_op_name(tensor->op));
1292
+
}
1293
+
1294
+
if (src0_extra->tensors[0]) {
1295
+
GGML_ABORT("Reshape Tensor %s has already been initialized, but is being initialized again.\n", tensor->name);
1296
+
}
1297
+
1298
+
// rope input is initially on columns.
1299
+
// input to rope is split [8192,1,1,1], per gpu it is [4096,1,1,1]
1300
+
// the input is then reshaped [128,64,1,1] per gpu it is [128,32,1,1]
1301
+
// this effectively splits it on the num heads 64->32 heads.
1302
+
// the output from rope is [128,64,1,1] per gpu it is [128,32,1,1]
1303
+
// this means that the rope output is now split on rows.
// this actually works out just fine because the rows can be gotten then added together.
1581
1632
create_reduce_tensors();
1633
+
set_src_tensor(0, GGML_TP_SPLIT_REDUCE);
1582
1634
}
1583
1635
else {
1584
1636
GGML_ABORT("Tensor %s has unsupported op %s for tensor parallelism, src0 is split but not as columns or rows.\n", tensor->name, ggml_op_name(tensor->op));
1585
1637
}
1638
+
set_src_tensor(1, GGML_TP_SPLIT_NONE);
1639
+
check_srcs();
1586
1640
break;
1587
1641
}
1588
1642
1589
1643
case GGML_OP_VIEW:
1590
1644
case GGML_OP_PERMUTE:
1591
1645
case GGML_OP_RESHAPE: {
1646
+
auto src0_split_tensors = src0_extra->has_rejoin ? GGML_TP_SPLIT_NONE : src0_extra->split_tensors;
1592
1647
// if split, skip, make the downstream op make sense of it, as some graphs combine a bunch of reshapes/permutes/views.
1593
-
if (!extra->split_tensors) {
1648
+
if (!src0_split_tensors) {
1594
1649
create_default_tensors();
1595
1650
}
1651
+
else {
1652
+
GGML_LOG_WARN("Tensor %s has unsupported op %s for tensor parallelism, src0 is split.\n", tensor->name, ggml_op_name(tensor->op));
0 commit comments