Skip to content

Commit ad60149

Browse files
author
zhouwg
committed
tests: a simple backend ut
1 parent f777a73 commit ad60149

File tree

2 files changed

+329
-0
lines changed

2 files changed

+329
-0
lines changed

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ llama_target_and_test(test-chat-template.cpp)
137137
# llama_target_and_test(test-opt.cpp) # SLOW
138138
llama_target_and_test(test-gguf.cpp)
139139
llama_target_and_test(test-backend-ops.cpp)
140+
llama_target_and_test(simple-backend-ut.cpp)
140141

141142
llama_target_and_test(test-model-load-cancel.cpp LABEL "model")
142143
llama_target_and_test(test-autorelease.cpp LABEL "model")

tests/simple-backend-ut.cpp

Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
/*
2+
* Copyright (c) 2023-2024 The ggml authors
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a copy
5+
* of this software and associated documentation files (the "Software"), to
6+
* deal in the Software without restriction, including without limitation the
7+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8+
* sell copies of the Software, and to permit persons to whom the Software is
9+
* furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be included in
12+
* all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20+
* IN THE SOFTWARE.
21+
*/
22+
#include <stdio.h>
23+
#include <stdlib.h>
24+
#include <stdint.h>
25+
#include <string.h>
26+
#include <stddef.h>
27+
#include <unistd.h>
28+
#include <inttypes.h>
29+
#include <math.h>
30+
#include <time.h>
31+
#include <unistd.h>
32+
#include <dlfcn.h>
33+
#include <fcntl.h>
34+
#include <sys/stat.h>
35+
#include <limits.h>
36+
#include <signal.h>
37+
#include <fcntl.h>
38+
#include <sys/types.h>
39+
40+
#include <string>
41+
#include <vector>
42+
#include <thread>
43+
#include <mutex>
44+
#include <map>
45+
#include <set>
46+
#include <tuple>
47+
#include <queue>
48+
#include <fstream>
49+
#include <iostream>
50+
#include <iomanip>
51+
#include <sstream>
52+
#include <chrono>
53+
#include <memory>
54+
#include <regex>
55+
#include <random>
56+
#include <functional>
57+
#include <unordered_map>
58+
#include <condition_variable>
59+
#include <cassert>
60+
#include <unordered_set>
61+
#include <utility>
62+
63+
#include "ggml.h"
64+
#include "ggml-cpu.h"
65+
#include "ggml-alloc.h"
66+
#include "ggml-backend.h"
67+
68+
#define LOG_BUF_LEN 4096
69+
#define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor)
70+
71+
72+
static bool ggml_graph_compute_helper(
73+
struct ggml_backend * backend,
74+
struct ggml_cgraph * graph,
75+
std::vector<uint8_t> & buf,
76+
int n_threads,
77+
ggml_abort_callback abort_callback,
78+
void * abort_callback_data) {
79+
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads, NULL);
80+
81+
plan.abort_callback = abort_callback;
82+
plan.abort_callback_data = abort_callback_data;
83+
84+
if (plan.work_size > 0) {
85+
buf.resize(plan.work_size);
86+
plan.work_data = buf.data();
87+
}
88+
89+
if (nullptr != backend)
90+
return ggml_backend_graph_compute(backend, graph) == GGML_STATUS_SUCCESS;
91+
else
92+
return ggml_graph_compute(graph, &plan);
93+
}
94+
95+
96+
static void tensor_dump_elements(const ggml_tensor * tensor) {
97+
float value = 0;
98+
std::ostringstream tmposs;
99+
if (tensor->type == GGML_TYPE_F32) {
100+
for (int h = 0; h < tensor->ne[3]; h++) {
101+
for (int i = 0; i < tensor->ne[2]; i++) {
102+
for (int j = 0; j < tensor->ne[1]; j++) {
103+
for (int k = 0; k < tensor->ne[0]; k++) {
104+
value = ((float *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] +
105+
j * tensor->ne[0] + k];
106+
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
107+
<< " ";
108+
}
109+
if (strlen(tmposs.str().c_str()) <= (LOG_BUF_LEN - 96)) {
110+
printf("%s\n", tmposs.str().c_str());
111+
}
112+
tmposs.clear();
113+
tmposs.str("");
114+
}
115+
}
116+
}
117+
}
118+
119+
printf("\n");
120+
}
121+
122+
123+
static void tensor_dump(const ggml_tensor * tensor, const char * name) {
124+
printf("dump ggml tensor %s(%s)\n", name, tensor->name);
125+
printf("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
126+
name,
127+
tensor->type, ggml_type_name(tensor->type),
128+
tensor->ne[0], tensor->ne[1], tensor->ne[2],
129+
tensor->nb[0], tensor->nb[1], tensor->nb[2]);
130+
tensor_dump_elements(tensor);
131+
132+
printf("\n");
133+
}
134+
135+
136+
static uint32_t get_tensor_rank(const ggml_tensor * tensor) {
137+
uint32_t rank = 0;
138+
for (int i = 0; i < GGML_MAX_DIMS; i++) {
139+
if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) {
140+
rank++;
141+
}
142+
}
143+
return rank;
144+
}
145+
146+
147+
static uint32_t get_tensor_data_size(const ggml_tensor * tensor) {
148+
size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]);
149+
size_t n_dims = get_tensor_rank(tensor);
150+
for (size_t i = 1; i < n_dims; i++) {
151+
data_size *= tensor->ne[i];
152+
}
153+
printf("get_tensor_data_size %ld", data_size);
154+
printf("ggml_nbytes(tensor) %ld", ggml_nbytes(tensor));
155+
156+
return ggml_nbytes(tensor);
157+
}
158+
159+
160+
static void show_usage() {
161+
printf(" " \
162+
"\nUsage: simple-backend-ut [options]\n" \
163+
"\n" \
164+
"Options:\n" \
165+
" -t GGML_OP_ADD / GGML_OP_MUL / GGML_OP_MULMAT\n" \
166+
" ?/h print usage information\n\n"
167+
);
168+
}
169+
170+
171+
int main(int argc, char * argv[]) {
172+
size_t ctx_size = 0;
173+
int sizey = 4;
174+
int sizex = 4;
175+
int num_threads = 4;
176+
int n_ggml_op_type = GGML_OP_ADD;
177+
178+
struct ggml_context * ctx = nullptr;
179+
struct ggml_cgraph * gf = nullptr;
180+
struct ggml_tensor * src0 = nullptr;
181+
struct ggml_tensor * src1 = nullptr;
182+
struct ggml_tensor * dst = nullptr;
183+
ggml_backend_t backend = nullptr;
184+
ggml_backend_buffer_t buffer= nullptr;
185+
ggml_type qtype = GGML_TYPE_F32;
186+
std::vector<uint8_t> work_buffer;
187+
188+
for (int i = 1; i < argc; i++) {
189+
if (0 == strcmp(argv[i], "-t")) {
190+
if (i + 1 < argc) {
191+
if (0 == memcmp(argv[i + 1], "GGML_OP_ADD", 11)) {
192+
n_ggml_op_type = GGML_OP_ADD;
193+
} else if (0 == memcmp(argv[i + 1], "GGML_OP_MUL_MAT", 15)) {
194+
n_ggml_op_type = GGML_OP_MUL_MAT;
195+
} else if (0 == memcmp(argv[i + 1], "GGML_OP_MUL", 11)) {
196+
n_ggml_op_type = GGML_OP_MUL;
197+
} else {
198+
show_usage();
199+
return 1;
200+
}
201+
i++;
202+
}
203+
} else {
204+
show_usage();
205+
return 1;
206+
}
207+
}
208+
209+
printf("Testing %zu devices\n\n", ggml_backend_dev_count());
210+
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
211+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
212+
213+
printf("Backend %zu/%zu: %s\n", i + 1, ggml_backend_dev_count(),
214+
ggml_backend_dev_name(dev));
215+
216+
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) {
217+
printf(" Skipping CPU backend\n");
218+
continue;
219+
}
220+
221+
backend = ggml_backend_dev_init(dev, reinterpret_cast<const char *>(i));
222+
GGML_ASSERT(backend != NULL);
223+
if (backend != nullptr) {
224+
printf("%s: initialize %s backend\n", __func__, ggml_backend_dev_name(dev));
225+
}
226+
227+
printf(" Device description: %s\n", ggml_backend_dev_description(dev));
228+
size_t free, total;
229+
ggml_backend_dev_memory(dev, &free, &total);
230+
printf(" Device memory: %zu MB (%zu MB free)\n", total / 1024 / 1024, free / 1024 / 1024);
231+
printf("\n");
232+
}
233+
234+
ggml_backend_t backend_cpu = nullptr;
235+
backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
236+
if (nullptr == backend_cpu) {
237+
printf("failed to initialize cpu backend\n");
238+
exit(1);
239+
} else {
240+
printf("succeed to initialize cpu backend\n");
241+
}
242+
243+
printf("ggml op:%d(%s)", n_ggml_op_type, ggml_op_name((enum ggml_op) n_ggml_op_type));
244+
245+
ctx_size += 1024 * 1024 * 32;
246+
printf("allocating Memory of size %zi bytes, %zi MB\n", ctx_size,
247+
(ctx_size / 1024 / 1024));
248+
249+
struct ggml_init_params params = {
250+
/*.mem_size =*/ ctx_size,
251+
/*.mem_buffer =*/ NULL,
252+
/* no_alloc =*/ 0
253+
};
254+
255+
ctx = ggml_init(params);
256+
if (!ctx) {
257+
printf("ggml_init() failed\n");
258+
return 2;
259+
}
260+
261+
if (qtype != GGML_TYPE_F32) {
262+
sizex = ggml_blck_size(qtype);
263+
}
264+
265+
printf("creating new tensors\n");
266+
src0 = ggml_new_tensor_2d(ctx, qtype, sizey, sizex);
267+
src1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, sizey, sizex);
268+
269+
ggml_set_input(src0);
270+
ggml_set_input(src1);
271+
switch (n_ggml_op_type) {
272+
case GGML_OP_ADD:
273+
dst = ggml_add(ctx, src0, src1);
274+
break;
275+
case GGML_OP_MUL:
276+
dst = ggml_mul(ctx, src0, src1);
277+
break;
278+
case GGML_OP_MUL_MAT:
279+
dst = ggml_mul_mat(ctx, src0, src1);
280+
break;
281+
default:
282+
printf("ggml op %d(%s) not supported", n_ggml_op_type,
283+
ggml_op_name((enum ggml_op) n_ggml_op_type));
284+
ggml_free(ctx);
285+
ggml_backend_free(backend);
286+
ggml_backend_free(backend_cpu);
287+
return 3;
288+
}
289+
290+
ggml_set_output(dst);
291+
292+
printf("creating compute graph\n");
293+
gf = ggml_new_graph(ctx);
294+
ggml_build_forward_expand(gf, dst);
295+
296+
ggml_set_f32(src0, 1.0f);
297+
ggml_set_f32(src1, 2.0f);
298+
ggml_set_f32(dst, 0.0f);
299+
300+
ggml_graph_compute_helper(backend, gf, work_buffer, num_threads, nullptr, nullptr);
301+
if (get_tensor_data_size(dst) < (100 * 100)) {
302+
printf("dump result tensors:\n");
303+
TENSOR_DUMP(src0);
304+
TENSOR_DUMP(src1);
305+
TENSOR_DUMP(dst);
306+
} else {
307+
printf("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi, %5zi)\n",
308+
src0->name,
309+
src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3],
310+
src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3]);
311+
printf("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi, %5zi)\n",
312+
src1->name,
313+
src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
314+
src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3]);
315+
printf("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi, %5zi)\n",
316+
dst->name,
317+
dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], dst->nb[0],
318+
dst->nb[1], dst->nb[2], dst->nb[3]);
319+
}
320+
TENSOR_DUMP(dst);
321+
322+
ggml_free(ctx);
323+
ggml_backend_buffer_free(buffer);
324+
ggml_backend_free(backend);
325+
ggml_backend_free(backend_cpu);
326+
327+
return 0;
328+
}

0 commit comments

Comments
 (0)