Skip to content

Commit ff41be4

Browse files
committed
cleanup
1 parent 9c43dca commit ff41be4

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

ggml/src/ggml-tp/ggml-tp.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,7 +1244,7 @@ static void do_init(size_t node_index, ggml_tensor * tensor, ggml_tensor_paralle
12441244
ggml_backend_tp_finish_init_tensor(tensor);
12451245
};
12461246

1247-
bool force_rejoin = true;
1247+
bool force_rejoin = false;
12481248
if (force_rejoin) {
12491249
for (int i = 0; i < GGML_MAX_SRC; i++) {
12501250
auto src = tensor->src[i];
@@ -1710,9 +1710,10 @@ static void do_init(size_t node_index, ggml_tensor * tensor, ggml_tensor_paralle
17101710

17111711
case GGML_OP_CPY: {
17121712
// the src1 is the destination, and has already been created.
1713-
// it maybe op NONE or op VIEW. without graph introspection.
1713+
// it maybe op NONE or op VIEW. without graph inspection.
17141714
// it is possible to use this cpy op to make the src1 tensor tree
17151715
// split, but this is simpler for now.
1716+
// the min split amount in supports_opt also affects this.
17161717
ensure_init_from_viewsrc(src0, src0_extra);
17171718
ensure_init_from_viewsrc(src1, src1_extra);
17181719
ensure_rejoined(tensor, src0);
@@ -2575,7 +2576,9 @@ static bool ggml_backend_tp_device_supports_op(ggml_backend_dev_t dev, const str
25752576
return true;
25762577
}
25772578

2578-
return src0->ne[1] >= 1024;
2579+
// using something too small reduces performance due to additional rejoins.
2580+
// return src0->ne[1] >= 2048;
2581+
return src0->ne[1] >= 4096;
25792582
return src0->ne[1] >= 8192;
25802583
}
25812584

0 commit comments

Comments
 (0)