|  | 
|  | 1 | +#include "ggml.h" | 
|  | 2 | +#include "ggml-alloc.h" | 
|  | 3 | +#include "ggml-backend.h" | 
|  | 4 | +#include "ggml-metal.h" | 
|  | 5 | + | 
|  | 6 | +#include <cstdio> | 
|  | 7 | +#include <vector> | 
|  | 8 | +#include <thread> | 
|  | 9 | + | 
|  | 10 | +int main(int argc, char ** argv) { | 
|  | 11 | +    int n_op = 1024; | 
|  | 12 | +    int n_iter = 128; | 
|  | 13 | + | 
|  | 14 | +    if (argc > 1) { | 
|  | 15 | +        n_op = std::atoi(argv[1]); | 
|  | 16 | +    } | 
|  | 17 | + | 
|  | 18 | +    if (argc > 2) { | 
|  | 19 | +        n_iter = std::atoi(argv[2]); | 
|  | 20 | +    } | 
|  | 21 | + | 
|  | 22 | +    printf("%s: n_op = %d, n_iter = %d\n", __func__, n_op, n_iter); | 
|  | 23 | + | 
|  | 24 | +    const int ne00 = 8; | 
|  | 25 | +    const int ne01 = 8; | 
|  | 26 | +    const int ne11 = 8; | 
|  | 27 | + | 
|  | 28 | +    std::vector<float> data0(ne00*ne01, 1.0f); | 
|  | 29 | +    std::vector<float> data1(ne00*ne01, 1.0f/ne00); | 
|  | 30 | + | 
|  | 31 | +    ggml_backend_t backend = ggml_backend_metal_init(); | 
|  | 32 | +    if (!backend) { | 
|  | 33 | +        fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); | 
|  | 34 | +        return 1; | 
|  | 35 | +    } | 
|  | 36 | + | 
|  | 37 | +    const size_t ctx_size = 2 * ggml_tensor_overhead(); | 
|  | 38 | + | 
|  | 39 | +    struct ggml_init_params params = { | 
|  | 40 | +        /*.mem_size   =*/ ctx_size, | 
|  | 41 | +        /*.mem_buffer =*/ NULL, | 
|  | 42 | +        /*.no_alloc   =*/ true, | 
|  | 43 | +    }; | 
|  | 44 | +    struct ggml_context * ctx = ggml_init(params); | 
|  | 45 | + | 
|  | 46 | +    struct ggml_tensor * t0 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne00, ne01); | 
|  | 47 | +    struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne00, ne11); | 
|  | 48 | + | 
|  | 49 | +    ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx, backend); | 
|  | 50 | + | 
|  | 51 | +    ggml_backend_tensor_set(t0, data0.data(), 0, ggml_nbytes(t0)); | 
|  | 52 | +    ggml_backend_tensor_set(t1, data1.data(), 0, ggml_nbytes(t1)); | 
|  | 53 | + | 
|  | 54 | +    struct ggml_cgraph * gf = NULL; | 
|  | 55 | + | 
|  | 56 | +    struct ggml_context * ctx_cgraph = NULL; | 
|  | 57 | + | 
|  | 58 | +    // create a dummy compute graph: | 
|  | 59 | +    // | 
|  | 60 | +    // x = mul_mat(t0, t1) | 
|  | 61 | +    // x = x * 1.0f | 
|  | 62 | +    // x = mul_mat(x, t1) | 
|  | 63 | +    // x = x * 1.0f | 
|  | 64 | +    // ... repeat n_op times ... | 
|  | 65 | +    // | 
|  | 66 | +    { | 
|  | 67 | +        struct ggml_init_params params0 = { | 
|  | 68 | +            /*.mem_size   =*/ 4*n_op*ggml_tensor_overhead() + ggml_graph_overhead(), | 
|  | 69 | +            /*.mem_buffer =*/ NULL, | 
|  | 70 | +            /*.no_alloc   =*/ true, | 
|  | 71 | +        }; | 
|  | 72 | +        ctx_cgraph = ggml_init(params0); | 
|  | 73 | + | 
|  | 74 | +        gf = ggml_new_graph_custom(ctx_cgraph, 4*n_op, false); | 
|  | 75 | + | 
|  | 76 | +        struct ggml_tensor * cur = ggml_mul_mat(ctx_cgraph, t0, t1); | 
|  | 77 | +        cur = ggml_scale(ctx_cgraph, cur, 1.0f); | 
|  | 78 | + | 
|  | 79 | +        for (int i = 0; i < n_op - 1; i++) { | 
|  | 80 | +            cur = ggml_mul_mat(ctx_cgraph, cur, t1); | 
|  | 81 | +            cur = ggml_scale(ctx_cgraph, cur, 1.0f); | 
|  | 82 | +        } | 
|  | 83 | + | 
|  | 84 | +        ggml_build_forward_expand(gf, cur); | 
|  | 85 | +    } | 
|  | 86 | + | 
|  | 87 | +    printf("%s: graph nodes = %d\n", __func__, ggml_graph_n_nodes(gf)); | 
|  | 88 | + | 
|  | 89 | +    ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend)); | 
|  | 90 | +    ggml_gallocr_alloc_graph(allocr, gf); | 
|  | 91 | + | 
|  | 92 | +    for (int n_thread = 1; n_thread < std::thread::hardware_concurrency(); n_thread++) { | 
|  | 93 | +        ggml_backend_metal_set_n_cb(backend, n_thread); | 
|  | 94 | + | 
|  | 95 | +        // warm-up | 
|  | 96 | +        ggml_backend_graph_compute(backend, gf); | 
|  | 97 | + | 
|  | 98 | +        const int64_t t_start = ggml_time_us(); | 
|  | 99 | + | 
|  | 100 | +        for (int iter = 0; iter < n_iter; iter++) { | 
|  | 101 | +            ggml_backend_graph_compute(backend, gf); | 
|  | 102 | +        } | 
|  | 103 | + | 
|  | 104 | +        const int64_t t_end = ggml_time_us(); | 
|  | 105 | + | 
|  | 106 | +        // actual trace | 
|  | 107 | +        if (n_thread == 4) { | 
|  | 108 | +            ggml_backend_metal_capture_next_compute(backend); | 
|  | 109 | +            ggml_backend_graph_compute(backend, gf); | 
|  | 110 | +            ggml_backend_metal_capture_next_compute(backend); | 
|  | 111 | +            ggml_backend_graph_compute(backend, gf); | 
|  | 112 | +            ggml_backend_metal_capture_next_compute(backend); | 
|  | 113 | +            ggml_backend_graph_compute(backend, gf); | 
|  | 114 | + | 
|  | 115 | +            printf("%s: trace dumped\n", __func__); | 
|  | 116 | +        } | 
|  | 117 | + | 
|  | 118 | +        printf("%s: n_thread = %d, time = %f ms\n", __func__, n_thread, (t_end - t_start) / 1000.0 / n_iter); | 
|  | 119 | +    } | 
|  | 120 | + | 
|  | 121 | +    { | 
|  | 122 | +        struct ggml_tensor * res = ggml_graph_node(gf, -1); | 
|  | 123 | + | 
|  | 124 | +        std::vector<float> data(res->ne[0] * res->ne[1], 0.0f); | 
|  | 125 | + | 
|  | 126 | +        ggml_backend_tensor_get(res, data.data(), 0, ggml_nbytes(res)); | 
|  | 127 | + | 
|  | 128 | +        for (int i1 = 0; i1 < res->ne[1]; i1++) { | 
|  | 129 | +            for (int i0 = 0; i0 < res->ne[0]; i0++) { | 
|  | 130 | +                printf("%f ", data[i1*res->ne[0] + i0]); | 
|  | 131 | +            } | 
|  | 132 | +            printf("\n"); | 
|  | 133 | +        } | 
|  | 134 | +    } | 
|  | 135 | + | 
|  | 136 | +    // 11. Free memory and exit | 
|  | 137 | +    ggml_free(ctx_cgraph); | 
|  | 138 | +    ggml_gallocr_free(allocr); | 
|  | 139 | +    ggml_free(ctx); | 
|  | 140 | +    ggml_backend_buffer_free(buffer); | 
|  | 141 | +    ggml_backend_free(backend); | 
|  | 142 | +    return 0; | 
|  | 143 | +} | 
|  | 144 | + | 
0 commit comments