|
64 | 64 | bool use_fusion; |
65 | 65 | bool use_concurrency; |
66 | 66 | bool use_shared_buffers; |
| 67 | + bool use_graph_optimize; |
67 | 68 |
|
68 | 69 | int debug_graph; |
69 | 70 | int debug_fusion; |
|
88 | 89 | /*.use_fusion =*/ true, |
89 | 90 | /*.use_concurrency =*/ true, |
90 | 91 | /*.use_shared_buffers =*/ true, |
| 92 | + /*.use_graph_optimize =*/ true, |
91 | 93 | /*.debug_graph =*/ 0, |
92 | 94 | /*.debug_fusion =*/ 0, |
93 | 95 | /*.fuse_cnt =*/ { 0 }, |
|
149 | 151 | ctx->use_shared_buffers = false; |
150 | 152 | } |
151 | 153 |
|
| 154 | + ctx->use_graph_optimize = true; |
| 155 | + |
| 156 | + if (getenv("GGML_METAL_GRAPH_OPTIMIZE_DISABLE") != NULL) { |
| 157 | + ctx->use_graph_optimize = false; |
| 158 | + } |
| 159 | + |
152 | 160 | memset(ctx->fuse_cnt, 0, sizeof(ctx->fuse_cnt)); |
153 | 161 |
|
154 | 162 | ctx->max_size = ctx->mtl_device.maxBufferLength; |
@@ -1105,6 +1113,7 @@ @implementation GGMLMetalClass |
1105 | 1113 | GGML_LOG_INFO("%s: use fusion = %s\n", __func__, ctx_dev->use_fusion ? "true" : "false"); |
1106 | 1114 | GGML_LOG_INFO("%s: use concurrency = %s\n", __func__, ctx_dev->use_concurrency ? "true" : "false"); |
1107 | 1115 | GGML_LOG_INFO("%s: use shared buffers = %s\n", __func__, ctx_dev->use_shared_buffers ? "true" : "false"); |
| 1116 | + GGML_LOG_INFO("%s: use graph optimize = %s\n", __func__, ctx_dev->use_graph_optimize ? "true" : "false"); |
1108 | 1117 | GGML_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx_dev->mtl_device.hasUnifiedMemory ? "true" : "false"); |
1109 | 1118 |
|
1110 | 1119 | ctx->capture_next_compute = false; |
@@ -6726,11 +6735,13 @@ static enum ggml_status ggml_backend_metal_graph_compute(ggml_backend_t backend, |
6726 | 6735 | } |
6727 | 6736 |
|
6728 | 6737 | static void ggml_backend_metal_graph_optimize(ggml_backend_t backend, struct ggml_cgraph * cgraph) { |
6729 | | - GGML_UNUSED(backend); |
| 6738 | + struct ggml_backend_metal_device_context * ctx_dev = backend->device->context; |
6730 | 6739 |
|
6731 | 6740 | //const int64_t t_start = ggml_time_us(); |
6732 | 6741 |
|
6733 | | - ggml_metal_graph_optimize(cgraph); |
| 6742 | + if (ctx_dev->use_graph_optimize) { |
| 6743 | + ggml_metal_graph_optimize(cgraph); |
| 6744 | + } |
6734 | 6745 |
|
6735 | 6746 | //printf("%s: initial graph optimize took %.3f ms\n", __func__, (ggml_time_us() - t_start) / 1000.0); |
6736 | 6747 | } |
|
0 commit comments