Skip to content

Commit ad4b2d7

Browse files
committed
Add simple test to choose the right datatype based on the supported OUT_PROD datatype implementation.
Signed-off-by: Marcus Edel <[email protected]>
1 parent 7b0b9af commit ad4b2d7

File tree

3 files changed

+154
-18
lines changed

3 files changed

+154
-18
lines changed

examples/training/finetune-lora.cpp

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "common.h"
33
#include "log.h"
44
#include "llama.h"
5+
#include "ggml-backend.h"
56

67
#include <cmath>
78
#include <cstdio>
@@ -54,6 +55,72 @@ static uint32_t parse_lora_modules(const std::string& modules_str) {
5455
return target_modules;
5556
}
5657

58+
static bool training_supports_out_prod_f16(const common_params & params) {
59+
std::vector<ggml_backend_dev_t> devices;
60+
61+
if (!params.devices.empty()) {
62+
devices.assign(params.devices.begin(), params.devices.end());
63+
} else {
64+
ggml_backend_dev_t gpu = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
65+
if (gpu) {
66+
devices.push_back(gpu);
67+
}
68+
}
69+
70+
if (devices.empty()) {
71+
return true;
72+
}
73+
74+
constexpr int64_t ne0 = 4;
75+
constexpr int64_t ne1 = 3;
76+
constexpr int64_t k = 2;
77+
78+
struct ggml_tensor src0 = {};
79+
struct ggml_tensor src1 = {};
80+
struct ggml_tensor dst = {};
81+
82+
src0.type = GGML_TYPE_F16;
83+
src1.type = GGML_TYPE_F32;
84+
dst.type = GGML_TYPE_F32;
85+
86+
src0.ne[0] = ne0; src0.ne[1] = k; src0.ne[2] = 1; src0.ne[3] = 1;
87+
src1.ne[0] = ne1; src1.ne[1] = k; src1.ne[2] = 1; src1.ne[3] = 1;
88+
dst.ne [0] = ne0; dst.ne [1] = ne1; dst.ne [2] = 1; dst.ne [3] = 1;
89+
90+
src0.nb[0] = sizeof(ggml_fp16_t);
91+
src0.nb[1] = src0.nb[0] * ne0;
92+
src0.nb[2] = src0.nb[1] * k;
93+
src0.nb[3] = src0.nb[2] * 1;
94+
95+
src1.nb[0] = sizeof(float);
96+
src1.nb[1] = src1.nb[0] * ne1;
97+
src1.nb[2] = src1.nb[1] * k;
98+
src1.nb[3] = src1.nb[2] * 1;
99+
100+
dst.nb[0] = sizeof(float);
101+
dst.nb[1] = dst.nb[0] * ne0;
102+
dst.nb[2] = dst.nb[1] * ne1;
103+
dst.nb[3] = dst.nb[2] * 1;
104+
105+
dst.op = GGML_OP_OUT_PROD;
106+
dst.src[0] = &src0;
107+
dst.src[1] = &src1;
108+
109+
for (ggml_backend_dev_t dev : devices) {
110+
if (dev == nullptr) {
111+
continue;
112+
}
113+
if (ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_GPU) {
114+
continue;
115+
}
116+
if (!ggml_backend_dev_supports_op(dev, &dst)) {
117+
return false;
118+
}
119+
}
120+
121+
return true;
122+
}
123+
57124
static void print_lora_usage() {
58125
printf("\nLoRA Fine-tuning Parameters:\n");
59126
printf(" --lora-rank N LoRA rank (default: 8, range: 1-512)\n");
@@ -124,13 +191,16 @@ int main(int argc, char ** argv) {
124191
LOG_INF("%s: force disabling memory mapping because it would result in-read-only pointers to the weights\n", __func__);
125192
params.use_mmap = false;
126193
}
127-
if (params.cache_type_k != GGML_TYPE_F32) {
128-
LOG_INF("%s: force changing k cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
129-
params.cache_type_k = GGML_TYPE_F32;
130-
}
131-
if (params.cache_type_v != GGML_TYPE_F32) {
132-
LOG_INF("%s: force changing v cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
133-
params.cache_type_v = GGML_TYPE_F32;
194+
const bool supports_out_prod_f16 = training_supports_out_prod_f16(params);
195+
if (!supports_out_prod_f16) {
196+
if (params.cache_type_k != GGML_TYPE_F32) {
197+
LOG_INF("%s: force changing k cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
198+
params.cache_type_k = GGML_TYPE_F32;
199+
}
200+
if (params.cache_type_v != GGML_TYPE_F32) {
201+
LOG_INF("%s: force changing v cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
202+
params.cache_type_v = GGML_TYPE_F32;
203+
}
134204
}
135205

136206
common_init();

examples/training/finetune.cpp

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "common.h"
33
#include "log.h"
44
#include "llama.h"
5+
#include "ggml-backend.h"
56

67
#include <cmath>
78
#include <cstdio>
@@ -13,6 +14,72 @@
1314
#pragma warning(disable: 4244 4267) // possible loss of data
1415
#endif
1516

17+
static bool training_supports_out_prod_f16(const common_params & params) {
18+
std::vector<ggml_backend_dev_t> devices;
19+
20+
if (!params.devices.empty()) {
21+
devices.assign(params.devices.begin(), params.devices.end());
22+
} else {
23+
ggml_backend_dev_t gpu = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
24+
if (gpu) {
25+
devices.push_back(gpu);
26+
}
27+
}
28+
29+
if (devices.empty()) {
30+
return true;
31+
}
32+
33+
constexpr int64_t ne0 = 4;
34+
constexpr int64_t ne1 = 3;
35+
constexpr int64_t k = 2;
36+
37+
struct ggml_tensor src0 = {};
38+
struct ggml_tensor src1 = {};
39+
struct ggml_tensor dst = {};
40+
41+
src0.type = GGML_TYPE_F16;
42+
src1.type = GGML_TYPE_F32;
43+
dst.type = GGML_TYPE_F32;
44+
45+
src0.ne[0] = ne0; src0.ne[1] = k; src0.ne[2] = 1; src0.ne[3] = 1;
46+
src1.ne[0] = ne1; src1.ne[1] = k; src1.ne[2] = 1; src1.ne[3] = 1;
47+
dst.ne [0] = ne0; dst.ne [1] = ne1; dst.ne [2] = 1; dst.ne [3] = 1;
48+
49+
src0.nb[0] = sizeof(ggml_fp16_t);
50+
src0.nb[1] = src0.nb[0] * ne0;
51+
src0.nb[2] = src0.nb[1] * k;
52+
src0.nb[3] = src0.nb[2] * 1;
53+
54+
src1.nb[0] = sizeof(float);
55+
src1.nb[1] = src1.nb[0] * ne1;
56+
src1.nb[2] = src1.nb[1] * k;
57+
src1.nb[3] = src1.nb[2] * 1;
58+
59+
dst.nb[0] = sizeof(float);
60+
dst.nb[1] = dst.nb[0] * ne0;
61+
dst.nb[2] = dst.nb[1] * ne1;
62+
dst.nb[3] = dst.nb[2] * 1;
63+
64+
dst.op = GGML_OP_OUT_PROD;
65+
dst.src[0] = &src0;
66+
dst.src[1] = &src1;
67+
68+
for (ggml_backend_dev_t dev : devices) {
69+
if (dev == nullptr) {
70+
continue;
71+
}
72+
if (ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_GPU) {
73+
continue;
74+
}
75+
if (!ggml_backend_dev_supports_op(dev, &dst)) {
76+
return false;
77+
}
78+
}
79+
80+
return true;
81+
}
82+
1683
int main(int argc, char ** argv) {
1784
common_params params;
1885
params.escape = false;
@@ -26,13 +93,16 @@ int main(int argc, char ** argv) {
2693
__func__);
2794
params.use_mmap = false;
2895
}
29-
if (params.cache_type_k != GGML_TYPE_F32) {
30-
LOG_INF("%s: force changing k cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
31-
params.cache_type_k = GGML_TYPE_F32;
32-
}
33-
if (params.cache_type_v != GGML_TYPE_F32) {
34-
LOG_INF("%s: force changing v cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
35-
params.cache_type_v = GGML_TYPE_F32;
96+
const bool supports_out_prod_f16 = training_supports_out_prod_f16(params);
97+
if (!supports_out_prod_f16) {
98+
if (params.cache_type_k != GGML_TYPE_F32) {
99+
LOG_INF("%s: force changing k cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
100+
params.cache_type_k = GGML_TYPE_F32;
101+
}
102+
if (params.cache_type_v != GGML_TYPE_F32) {
103+
LOG_INF("%s: force changing v cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
104+
params.cache_type_v = GGML_TYPE_F32;
105+
}
36106
}
37107

38108
common_init();

src/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,4 @@ if (BUILD_SHARED_LIBS)
5353
set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
5454
target_compile_definitions(llama PRIVATE LLAMA_BUILD)
5555
target_compile_definitions(llama PUBLIC LLAMA_SHARED)
56-
if (ANDROID OR (UNIX AND CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
57-
message(STATUS "Linking llama with c++_shared for Android/Termux compatibility")
58-
target_link_libraries(llama PUBLIC c++_shared)
59-
endif()
6056
endif()

0 commit comments

Comments
 (0)