Skip to content

Commit 0b7d564

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 811bef9 + b2ba81d commit 0b7d564

File tree

25 files changed

+1717
-489
lines changed

25 files changed

+1717
-489
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ jobs:
207207
- name: ccache
208208
uses: ggml-org/[email protected]
209209
with:
210-
key: ubuntu-cpu-cmake
210+
key: ubuntu-cpu-cmake-${{ matrix.build }}
211211
evict-old-files: 1d
212212

213213
- name: Build Dependencies

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ jobs:
150150
- name: ccache
151151
uses: ggml-org/[email protected]
152152
with:
153-
key: ubuntu-cpu-cmake
153+
key: ubuntu-cpu-cmake-${{ matrix.build }}
154154
evict-old-files: 1d
155155

156156
- name: Dependencies

common/arg.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,13 @@
5454
#endif
5555
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
5656

57+
// isatty
58+
#if defined(_WIN32)
59+
#include <io.h>
60+
#else
61+
#include <unistd.h>
62+
#endif
63+
5764
using json = nlohmann::ordered_json;
5865

5966
std::initializer_list<enum llama_example> mmproj_examples = {
@@ -100,6 +107,14 @@ static void write_file(const std::string & fname, const std::string & content) {
100107
}
101108
}
102109

110+
static bool is_output_a_tty() {
111+
#if defined(_WIN32)
112+
return _isatty(_fileno(stdout));
113+
#else
114+
return isatty(1);
115+
#endif
116+
}
117+
103118
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
104119
this->examples = std::move(examples);
105120
return *this;
@@ -652,7 +667,11 @@ static std::string show_masked_url(const common_url & parts) {
652667
return parts.scheme + "://" + (parts.user.empty() ? "" : "****:****@") + parts.host + parts.path;
653668
}
654669

655-
static void print_progress(size_t current, size_t total) { // TODO isatty
670+
static void print_progress(size_t current, size_t total) {
671+
if (!is_output_a_tty()) {
672+
return;
673+
}
674+
656675
if (!total) {
657676
return;
658677
}

ggml/include/ggml.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@
237237
#define GGML_EXIT_SUCCESS 0
238238
#define GGML_EXIT_ABORTED 1
239239

240+
// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
241+
#define GGML_ROPE_TYPE_NORMAL 0
240242
#define GGML_ROPE_TYPE_NEOX 2
241243
#define GGML_ROPE_TYPE_MROPE 8
242244
#define GGML_ROPE_TYPE_VISION 24

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 75 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2889,10 +2889,7 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te
28892889
case GGML_OP_REPEAT:
28902890
return op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32; // Assuming F32 for now, can be expanded
28912891
case GGML_OP_PAD:
2892-
return op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32 &&
2893-
op->src[0]->ne[3] == 1 && op->ne[3] == 1 &&
2894-
(ggml_get_op_params_i32(op, 0) == 0) && (ggml_get_op_params_i32(op, 2) == 0) &&
2895-
(ggml_get_op_params_i32(op, 4) == 0) && (ggml_get_op_params_i32(op, 6) == 0);
2892+
return op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
28962893
case GGML_OP_UPSCALE:
28972894
return op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
28982895
case GGML_OP_CONV_2D:
@@ -4222,15 +4219,19 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c
42224219
GGML_ASSERT(dst);
42234220
GGML_ASSERT(dst->extra);
42244221

4225-
const int ne00 = src0 ? src0->ne[0] : 0;
4226-
const cl_ulong nb01 = src0 ? src0->nb[1] : 0;
4227-
const cl_ulong nb02 = src0 ? src0->nb[2] : 0;
4228-
const int ne10 = src1 ? src1->ne[0] : 0;
4229-
const cl_ulong nb10 = src1 ? src1->nb[0] : 0;
4230-
const int ne11 = src1 ? src1->ne[1] : 0;
4231-
const cl_ulong nb11 = src1 ? src1->nb[1] : 0;
4232-
const cl_ulong nb1 = dst ? dst->nb[1] : 0;
4233-
const cl_ulong nb2 = dst ? dst->nb[2] : 0;
4222+
const int ne00 = src0->ne[0];
4223+
const cl_ulong nb01 = src0->nb[1];
4224+
const cl_ulong nb02 = src0->nb[2];
4225+
const cl_ulong nb03 = src0->nb[3];
4226+
const int ne10 = src1->ne[0];
4227+
const cl_ulong nb10 = src1->nb[0];
4228+
const int ne11 = src1->ne[1];
4229+
const int ne12 = src1->ne[2];
4230+
const cl_ulong nb11 = src1->nb[1];
4231+
const cl_ulong nb12 = src1->nb[2];
4232+
const cl_ulong nb1 = dst->nb[1];
4233+
const cl_ulong nb2 = dst->nb[2];
4234+
const cl_ulong nb3 = dst->nb[3];
42344235

42354236
ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context;
42364237

@@ -4267,14 +4268,17 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c
42674268
CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &ne00));
42684269
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_ulong), &nb01));
42694270
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_ulong), &nb02));
4270-
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne10));
4271-
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_ulong), &nb10));
4272-
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_ulong), &nb11));
4273-
CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_ulong), &nb1));
4274-
CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_ulong), &nb2));
4275-
4276-
size_t global_work_size[] = {(size_t)ne10, (size_t)ne11, 1};
4277-
size_t local_work_size[] = {1, 1, 1};
4271+
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_ulong), &nb03));
4272+
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne10));
4273+
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_ulong), &nb10));
4274+
CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_ulong), &nb11));
4275+
CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_ulong), &nb12));
4276+
CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_ulong), &nb1));
4277+
CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_ulong), &nb2));
4278+
CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_ulong), &nb3));
4279+
4280+
size_t global_work_size[] = {(size_t)ne10*64, (size_t)ne11, (size_t)ne12};
4281+
size_t local_work_size[] = {64, 1, 1};
42784282

42794283
backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst);
42804284
}
@@ -5874,7 +5878,6 @@ static void ggml_cl_pad(ggml_backend_t backend, const ggml_tensor * src0, ggml_t
58745878
GGML_ASSERT(dst->extra);
58755879
GGML_ASSERT(src0->type == GGML_TYPE_F32);
58765880
GGML_ASSERT(dst->type == GGML_TYPE_F32);
5877-
GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1);
58785881

58795882
ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context;
58805883

@@ -5892,28 +5895,67 @@ static void ggml_cl_pad(ggml_backend_t backend, const ggml_tensor * src0, ggml_t
58925895
const int s_ne0 = src0->ne[0];
58935896
const int s_ne1 = src0->ne[1];
58945897
const int s_ne2 = src0->ne[2];
5898+
const int s_ne3 = src0->ne[3];
5899+
5900+
const int s_nb0 = src0->nb[0];
5901+
const int s_nb1 = src0->nb[1];
5902+
const int s_nb2 = src0->nb[2];
5903+
const int s_nb3 = src0->nb[3];
58955904

58965905
const int d_ne0 = dst->ne[0];
58975906
const int d_ne1 = dst->ne[1];
58985907
const int d_ne2 = dst->ne[2];
5908+
const int d_ne3 = dst->ne[3];
5909+
5910+
const int d_nb0 = dst->nb[0];
5911+
const int d_nb1 = dst->nb[1];
5912+
const int d_nb2 = dst->nb[2];
5913+
const int d_nb3 = dst->nb[3];
5914+
5915+
const int lp0 = ((const int*)(dst->op_params))[0];
5916+
const int rp0 = ((const int*)(dst->op_params))[1];
5917+
const int lp1 = ((const int*)(dst->op_params))[2];
5918+
const int rp1 = ((const int*)(dst->op_params))[3];
5919+
const int lp2 = ((const int*)(dst->op_params))[4];
5920+
const int rp2 = ((const int*)(dst->op_params))[5];
5921+
const int lp3 = ((const int*)(dst->op_params))[6];
5922+
const int rp3 = ((const int*)(dst->op_params))[7];
58995923

59005924
cl_kernel kernel = backend_ctx->kernel_pad;
59015925

5902-
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra_src0->data_device));
5903-
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &off_src0));
5904-
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra_dst->data_device));
5905-
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &off_dst));
5906-
CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), &s_ne0));
5907-
CL_CHECK(clSetKernelArg(kernel, 5, sizeof(int), &s_ne1));
5908-
CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &s_ne2));
5909-
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), &d_ne0));
5910-
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &d_ne1));
5911-
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &d_ne2));
5926+
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra_src0->data_device));
5927+
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &off_src0));
5928+
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra_dst->data_device));
5929+
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &off_dst));
5930+
CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), &s_ne0));
5931+
CL_CHECK(clSetKernelArg(kernel, 5, sizeof(int), &s_ne1));
5932+
CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &s_ne2));
5933+
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), &s_ne3));
5934+
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_ulong), &s_nb0));
5935+
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_ulong), &s_nb1));
5936+
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_ulong), &s_nb2));
5937+
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_ulong), &s_nb3));
5938+
CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &d_ne0));
5939+
CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &d_ne1));
5940+
CL_CHECK(clSetKernelArg(kernel, 14, sizeof(int), &d_ne2));
5941+
CL_CHECK(clSetKernelArg(kernel, 15, sizeof(int), &d_ne3));
5942+
CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_ulong), &d_nb0));
5943+
CL_CHECK(clSetKernelArg(kernel, 17, sizeof(cl_ulong), &d_nb1));
5944+
CL_CHECK(clSetKernelArg(kernel, 18, sizeof(cl_ulong), &d_nb2));
5945+
CL_CHECK(clSetKernelArg(kernel, 19, sizeof(cl_ulong), &d_nb3));
5946+
CL_CHECK(clSetKernelArg(kernel, 20, sizeof(int), &lp0));
5947+
CL_CHECK(clSetKernelArg(kernel, 21, sizeof(int), &rp0));
5948+
CL_CHECK(clSetKernelArg(kernel, 22, sizeof(int), &lp1));
5949+
CL_CHECK(clSetKernelArg(kernel, 23, sizeof(int), &rp1));
5950+
CL_CHECK(clSetKernelArg(kernel, 24, sizeof(int), &lp2));
5951+
CL_CHECK(clSetKernelArg(kernel, 25, sizeof(int), &rp2));
5952+
CL_CHECK(clSetKernelArg(kernel, 26, sizeof(int), &lp3));
5953+
CL_CHECK(clSetKernelArg(kernel, 27, sizeof(int), &rp3));
59125954

59135955
size_t lws0 = 64;
59145956
size_t gws0 = (( (size_t)d_ne0 + lws0 - 1 ) / lws0) * lws0;
59155957

5916-
size_t global_work_size[] = { gws0, (size_t)d_ne1, (size_t)d_ne2 };
5958+
size_t global_work_size[] = { gws0, (size_t)d_ne1, (size_t)d_ne2*d_ne3 };
59175959
size_t local_work_size[] = { lws0, 1, 1 };
59185960

59195961
size_t * local_work_size_ptr = local_work_size;

ggml/src/ggml-opencl/kernels/get_rows.cl

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,26 +69,34 @@ kernel void kernel_get_rows_f32(
6969
int ne00,
7070
ulong nb01,
7171
ulong nb02,
72+
ulong nb03,
7273
int ne10,
7374
ulong nb10,
7475
ulong nb11,
76+
ulong nb12,
7577
ulong nb1,
76-
ulong nb2
78+
ulong nb2,
79+
ulong nb3
7780
) {
7881
src0 = (global void*)((global char*)src0 + offset0);
7982
src1 = (global int*)((global char*)src1 + offset1);
8083
dst = (global float*)((global char*)dst + offsetd);
8184

8285
int i10 = get_group_id(0);
8386
int i11 = get_group_id(1);
87+
int i12 = get_group_id(2);
8488

85-
int r = ((global int *) ((global char *) src1 + i11*nb11 + i10*nb10))[0];
89+
int r = ((global int *) ((global char *) src1 + i12*nb12 + i11*nb11 + i10*nb10))[0];
8690

8791
int i02 = i11;
92+
int i03 = i12;
8893

8994
for (int ind = get_local_id(0); ind < ne00; ind += get_local_size(0)) {
90-
((global float *) ((global char *) dst + i11*nb2 + i10*nb1))[ind] =
91-
((global float *) ((global char *) src0 + r*nb01 + i02*nb02))[ind];
95+
if (ind >= ne00) {
96+
return;
97+
}
98+
((global float *) ((global char *) dst + i12*nb3 + i11*nb2 + i10*nb1))[ind] =
99+
((global float *) ((global char *) src0 + r*nb01 + i02*nb02 + i03*nb03))[ind];
92100
}
93101
}
94102

@@ -102,26 +110,34 @@ kernel void kernel_get_rows_f16(
102110
int ne00,
103111
ulong nb01,
104112
ulong nb02,
113+
ulong nb03,
105114
int ne10,
106115
ulong nb10,
107116
ulong nb11,
117+
ulong nb12,
108118
ulong nb1,
109-
ulong nb2
119+
ulong nb2,
120+
ulong nb3
110121
) {
111122
src0 = (global void*)((global char*)src0 + offset0);
112123
src1 = (global int*)((global char*)src1 + offset1);
113124
dst = (global float*)((global char*)dst + offsetd);
114125

115126
int i10 = get_group_id(0);
116127
int i11 = get_group_id(1);
128+
int i12 = get_group_id(2);
117129

118-
int r = ((global int32_t *) ((global char *) src1 + i11*nb11 + i10*nb10))[0];
130+
int r = ((global int32_t *) ((global char *) src1 + i12*nb12 + i11*nb11 + i10*nb10))[0];
119131

120132
int i02 = i11;
133+
int i03 = i12;
121134

122135
for (int ind = get_local_id(0); ind < ne00; ind += get_local_size(0)) {
123-
((global float *) ((global char *) dst + i11*nb2 + i10*nb1))[ind] =
124-
((global half *) ((global char *) src0 + r*nb01 + i02*nb02))[ind];
136+
if (ind >= ne00) {
137+
return;
138+
}
139+
((global float *) ((global char *) dst + i12*nb3 + i11*nb2 + i10*nb1))[ind] =
140+
((global half *) ((global char *) src0 + r*nb01 + i02*nb02 + i03*nb03))[ind];
125141
}
126142
}
127143

@@ -135,11 +151,14 @@ kernel void kernel_get_rows_q4_0(
135151
int ne00,
136152
ulong nb01,
137153
ulong nb02,
154+
ulong nb03,
138155
int ne10,
139156
ulong nb10,
140157
ulong nb11,
158+
ulong nb12,
141159
ulong nb1,
142-
ulong nb2
160+
ulong nb2,
161+
ulong nb3
143162
) {
144163
src0 = (global void*)((global char*)src0 + offset0);
145164
src1 = (global int*)((global char*)src1 + offset1);
@@ -149,15 +168,20 @@ kernel void kernel_get_rows_q4_0(
149168

150169
int i10 = get_group_id(0);
151170
int i11 = get_group_id(1);
171+
int i12 = get_group_id(2);
152172

153-
int r = ((global int32_t *) ((global char *) src1 + i11*nb11 + i10*nb10))[0];
173+
int r = ((global int32_t *) ((global char *) src1 + i12*nb12 + i11*nb11 + i10*nb10))[0];
154174

155175
int i02 = i11;
176+
int i03 = i12;
156177

157178
for (int ind = get_local_id(0); ind < ne00/16; ind += get_local_size(0)) {
158179
float16 temp;
180+
if (ind >= ne00) {
181+
return;
182+
}
159183
dequantize_q4_0_f32(
160-
((global struct block_q4_0 *) ((global char *) src0 + r*nb01 + i02*nb02)) + ind/NL, ind%NL, &temp);
161-
*(((global float16 *) ((global char *) dst + i11*nb2 + i10*nb1)) + ind) = temp;
184+
((global struct block_q4_0 *) ((global char *) src0 + r*nb01 + i02*nb02 + i03*nb03)) + ind/NL, ind%NL, &temp);
185+
*(((global float16 *) ((global char *) dst + i12*nb3 + i11*nb2 + i10*nb1)) + ind) = temp;
162186
}
163187
}

0 commit comments

Comments
 (0)