Skip to content

Commit ac0366a

Browse files
committed
Merge commit 'd7f5f4e578d1f60b0835d1734a50438c309b3e5c' into concedo_experimental
# Conflicts: # .github/ISSUE_TEMPLATE/010-bug-compilation.yml # .github/ISSUE_TEMPLATE/011-bug-results.yml # .github/labeler.yml # .github/workflows/build.yml # docs/docker.md # examples/simple-chat/simple-chat.cpp # ggml/src/ggml-cann/aclnn_ops.cpp # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-metal/CMakeLists.txt # ggml/src/ggml-opencl/CMakeLists.txt # ggml/src/ggml-opencl/ggml-opencl.cpp # ggml/src/ggml-sycl/ggml-sycl.cpp # scripts/sync-ggml.last # tests/test-backend-ops.cpp
2 parents ae0c6b0 + d7f5f4e commit ac0366a

File tree

18 files changed

+660
-47
lines changed

18 files changed

+660
-47
lines changed

ggml/include/ggml.h

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,13 @@
320320
extern "C" {
321321
#endif
322322

323+
// Function type used in fatal error callbacks
324+
typedef void (*ggml_abort_callback_t)(const char * error_message);
325+
326+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
327+
// Returns the old callback for chaining
328+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
329+
323330
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
324331
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
325332

@@ -488,6 +495,7 @@ extern "C" {
488495
GGML_OP_CONV_TRANSPOSE_1D,
489496
GGML_OP_IM2COL,
490497
GGML_OP_IM2COL_BACK,
498+
GGML_OP_CONV_2D,
491499
GGML_OP_CONV_2D_DW,
492500
GGML_OP_CONV_TRANSPOSE_2D,
493501
GGML_OP_POOL_1D,
@@ -1826,6 +1834,17 @@ extern "C" {
18261834
struct ggml_tensor * b,
18271835
int stride);
18281836

1837+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
1838+
struct ggml_context * ctx,
1839+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1840+
struct ggml_tensor * b, // input data [W, H, C, N]
1841+
int s0, // stride dimension 0
1842+
int s1, // stride dimension 1
1843+
int p0, // padding dimension 0
1844+
int p1, // padding dimension 1
1845+
int d0, // dilation dimension 0
1846+
int d1); // dilation dimension 1
1847+
18291848
enum ggml_op_pool {
18301849
GGML_OP_POOL_MAX,
18311850
GGML_OP_POOL_AVG,
@@ -1868,6 +1887,12 @@ extern "C" {
18681887
enum ggml_scale_mode {
18691888
GGML_SCALE_MODE_NEAREST = 0,
18701889
GGML_SCALE_MODE_BILINEAR = 1,
1890+
1891+
GGML_SCALE_MODE_COUNT
1892+
};
1893+
1894+
enum ggml_scale_flag {
1895+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
18711896
};
18721897

18731898
// interpolate
@@ -1880,14 +1905,26 @@ extern "C" {
18801905

18811906
// interpolate
18821907
// interpolate scale to specified dimensions
1883-
GGML_API struct ggml_tensor * ggml_upscale_ext(
1908+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
18841909
struct ggml_context * ctx,
18851910
struct ggml_tensor * a,
18861911
int ne0,
18871912
int ne1,
18881913
int ne2,
18891914
int ne3,
1890-
enum ggml_scale_mode mode);
1915+
enum ggml_scale_mode mode),
1916+
"use ggml_interpolate instead");
1917+
1918+
// Up- or downsamples the input to the specified size.
1919+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
1920+
GGML_API struct ggml_tensor * ggml_interpolate(
1921+
struct ggml_context * ctx,
1922+
struct ggml_tensor * a,
1923+
int64_t ne0,
1924+
int64_t ne1,
1925+
int64_t ne2,
1926+
int64_t ne3,
1927+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
18911928

18921929
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
18931930
GGML_API struct ggml_tensor * ggml_pad(

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1198,7 +1198,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
11981198
}
11991199
}
12001200

1201-
static void ggml_compute_forward_mul_mat(
1201+
void ggml_compute_forward_mul_mat(
12021202
const struct ggml_compute_params * params,
12031203
struct ggml_tensor * dst) {
12041204

@@ -1880,6 +1880,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
18801880
{
18811881
ggml_compute_forward_im2col_back_f32(params, tensor);
18821882
} break;
1883+
case GGML_OP_CONV_2D:
1884+
{
1885+
ggml_compute_forward_conv_2d(params, tensor);
1886+
} break;
18831887
case GGML_OP_CONV_2D_DW:
18841888
{
18851889
ggml_compute_forward_conv_2d_dw(params, tensor);
@@ -2242,6 +2246,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
22422246
} break;
22432247
case GGML_OP_IM2COL:
22442248
case GGML_OP_IM2COL_BACK:
2249+
case GGML_OP_CONV_2D:
22452250
case GGML_OP_CONV_2D_DW:
22462251
case GGML_OP_CONV_TRANSPOSE_1D:
22472252
case GGML_OP_CONV_TRANSPOSE_2D:
@@ -2775,6 +2780,10 @@ struct ggml_cplan ggml_graph_plan(
27752780
GGML_ABORT("fatal error");
27762781
}
27772782
} break;
2783+
case GGML_OP_CONV_2D:
2784+
{
2785+
cur = GGML_IM2COL_WORK_SIZE;
2786+
} break;
27782787
case GGML_OP_CONV_TRANSPOSE_2D:
27792788
{
27802789
const int64_t ne00 = node->src[0]->ne[0]; // W

ggml/src/ggml-cpu/ops.cpp

Lines changed: 193 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "ggml-cpu.h"
44
#include "ggml-impl.h"
55
#include "binary-ops.h"
6+
#include "ggml.h"
67
#include "unary-ops.h"
78
#include "vec.h"
89

@@ -6545,6 +6546,186 @@ void ggml_compute_forward_im2col_back_f32(
65456546
}
65466547
}
65476548

6549+
static void ggml_call_mul_mat(ggml_type type, const ggml_compute_params * params, int64_t m, int64_t n, int64_t k,
6550+
void * a, void * b, float * c) {
6551+
const ggml_type_traits * traits = ggml_get_type_traits(type);
6552+
struct ggml_tensor src1 = {};
6553+
src1.type = type;
6554+
src1.ne[0] = k;
6555+
src1.ne[1] = m;
6556+
src1.ne[2] = 1;
6557+
src1.ne[3] = 1;
6558+
src1.nb[0] = traits->type_size;
6559+
src1.nb[1] = k * traits->type_size;
6560+
src1.nb[2] = src1.nb[1];
6561+
src1.nb[3] = src1.nb[2];
6562+
src1.data = a;
6563+
6564+
struct ggml_tensor src0 = {};
6565+
src0.type = type;
6566+
src0.ne[0] = k;
6567+
src0.ne[1] = n;
6568+
src0.ne[2] = 1;
6569+
src0.ne[3] = 1;
6570+
src0.nb[0] = traits->type_size;
6571+
src0.nb[1] = k * traits->type_size;
6572+
src0.nb[2] = src0.nb[1];
6573+
src0.nb[3] = src0.nb[2];
6574+
src0.data = b;
6575+
6576+
struct ggml_tensor dst = {};
6577+
dst.ne[0] = n;
6578+
dst.ne[1] = m;
6579+
dst.ne[2] = 1;
6580+
dst.ne[3] = 1;
6581+
dst.nb[0] = sizeof(float);
6582+
dst.nb[1] = n * sizeof(float);
6583+
dst.nb[2] = dst.nb[1];
6584+
dst.nb[3] = dst.nb[2];
6585+
dst.data = c;
6586+
dst.src[0] = &src0;
6587+
dst.src[1] = &src1;
6588+
6589+
ggml_compute_forward_mul_mat(params, &dst);
6590+
}
6591+
6592+
// ggml_compute_forward_conv_2d
6593+
6594+
static void ggml_compute_forward_conv_2d_impl(const ggml_compute_params * params,
6595+
const ggml_tensor * kernel, // [KW, KH, IC, OC]
6596+
const ggml_tensor * src, // [W, H, C, N]
6597+
ggml_tensor * dst, // [OW, OH, OC, N]
6598+
ggml_type kernel_type) {
6599+
6600+
GGML_ASSERT(ggml_is_contiguous(kernel));
6601+
GGML_ASSERT(kernel_type == GGML_TYPE_F16 || kernel_type == GGML_TYPE_F32);
6602+
GGML_ASSERT(kernel->type == kernel_type);
6603+
6604+
const ggml_type_traits * traits = ggml_get_type_traits(kernel_type);
6605+
6606+
const int32_t stride_x = dst->op_params[0];
6607+
const int32_t stride_y = dst->op_params[1];
6608+
const int32_t pad_x = dst->op_params[2];
6609+
const int32_t pad_y = dst->op_params[3];
6610+
const int32_t dilation_x = dst->op_params[4];
6611+
const int32_t dilation_y = dst->op_params[5];
6612+
6613+
const int64_t c_in = src->ne[2];
6614+
const int64_t c_out = kernel->ne[3];
6615+
GGML_ASSERT(c_in == kernel->ne[2]);
6616+
6617+
const int64_t src_w = src->ne[0];
6618+
const int64_t src_h = src->ne[1];
6619+
const int64_t knl_w = kernel->ne[0];
6620+
const int64_t knl_h = kernel->ne[1];
6621+
const int64_t dst_w = dst->ne[0];
6622+
const int64_t dst_h = dst->ne[1];
6623+
6624+
const float * src_data = (float *) src->data;
6625+
void * knl_data = kernel->data;
6626+
float * dst_data = (float *) dst->data;
6627+
6628+
const int64_t knl_n = knl_w * knl_h * c_in;
6629+
const int64_t patch_total = dst->ne[3] * dst_w * dst_h;
6630+
6631+
const int64_t space_per_patch = knl_n * traits->type_size + c_out * sizeof(float);
6632+
const int64_t batch_size = params->wsize / space_per_patch;
6633+
const int64_t patches_per_batch = batch_size > 8 ? (batch_size / 8) * 8 : batch_size;
6634+
const int64_t batch_n = (patch_total + patches_per_batch - 1) / patches_per_batch;
6635+
6636+
GGML_ASSERT(patches_per_batch > 0 && batch_size >= 1);
6637+
6638+
void * tmp = params->wdata;
6639+
6640+
for (int64_t batch_i = 0; batch_i < batch_n; ++batch_i) {
6641+
6642+
const int64_t patch_start_batch = batch_i * patches_per_batch;
6643+
const int64_t patch_end_batch = std::min(patch_start_batch + patches_per_batch,
6644+
patch_total);
6645+
const int64_t patch_n = patch_end_batch - patch_start_batch;
6646+
6647+
const int64_t patch_per_thread = (patch_n + params->nth - 1) / params->nth;
6648+
const int64_t patch_start = patch_start_batch + params->ith * patch_per_thread;
6649+
const int64_t patch_end = std::min(patch_start + patch_per_thread, patch_end_batch);
6650+
6651+
//im2col for a patch
6652+
for (int64_t p = patch_start; p < patch_end; ++p) {
6653+
const int64_t batch_n = p / (dst_w * dst_h);
6654+
const int64_t src_x = (p / dst_w) % dst_h;
6655+
const int64_t src_y = p % dst_w;
6656+
6657+
const float * src_base = (const float *)((const char *)src_data + batch_n * src->nb[3]);
6658+
char * dst_row = (char *) tmp + (p % patches_per_batch) * knl_n * traits->type_size;
6659+
6660+
for (int64_t ic = 0; ic < c_in; ++ic) {
6661+
for (int64_t ky = 0; ky < knl_h; ++ky) {
6662+
for (int64_t kx = 0; kx < knl_w; ++kx) {
6663+
const int64_t sy = src_x * stride_y + ky * dilation_y - pad_y;
6664+
const int64_t sx = src_y * stride_x + kx * dilation_x - pad_x;
6665+
6666+
int64_t dst_idx = ic * (knl_h * knl_w) + ky * knl_w + kx;
6667+
6668+
float src_val;
6669+
if (sy < 0 || sy >= src_h || sx < 0 || sx >= src_w) {
6670+
src_val = 0.0f;
6671+
} else {
6672+
const float * src_ptr = (const float *)((const char *)src_base + sx * src->nb[0] + sy * src->nb[1] + ic * src->nb[2]);
6673+
src_val = *src_ptr;
6674+
}
6675+
6676+
char * element_ptr = dst_row + dst_idx * traits->type_size;
6677+
if (kernel_type == GGML_TYPE_F32) {
6678+
*(float *) element_ptr = src_val;
6679+
} else if (kernel_type == GGML_TYPE_F16) {
6680+
*(ggml_fp16_t *) element_ptr = GGML_CPU_FP32_TO_FP16(src_val);
6681+
}
6682+
}
6683+
}
6684+
}
6685+
} // patches handled by this thread
6686+
6687+
ggml_barrier(params->threadpool);
6688+
6689+
float * gemm_output = (float *) ((char *) tmp + patches_per_batch * knl_n * traits->type_size);
6690+
6691+
GGML_ASSERT(gemm_output + patch_n * c_out <= (float*)tmp + params->wsize);
6692+
6693+
// GEMM: patches[patch_n, knl_n] × kernel[knl_n, c_out] = output[patch_n, c_out]
6694+
ggml_call_mul_mat(kernel_type, params, patch_n, c_out, knl_n, tmp, knl_data, gemm_output);
6695+
6696+
ggml_barrier(params->threadpool);
6697+
6698+
6699+
//permute back [OC, N, OH, OW] to [N, OC, OH, OW]
6700+
const int64_t permute_per_thread = (patch_n + params->nth - 1) / params->nth;
6701+
const int64_t permute_start = params->ith * permute_per_thread;
6702+
const int64_t permute_end = std::min(permute_start + permute_per_thread, patch_n);
6703+
6704+
for (int64_t i = permute_start; i < permute_end; ++i) {
6705+
const int64_t p = patch_start_batch + i;
6706+
const int64_t batch_n = p / (dst_w * dst_h);
6707+
const int64_t dst_y = (p / dst_w) % dst_h;
6708+
const int64_t dst_x = p % dst_w;
6709+
6710+
for (int64_t oc = 0; oc < c_out; ++oc) {
6711+
const float value = gemm_output[i * c_out + oc];
6712+
float * dst_ptr = (float *)((char *)dst_data + dst_x * dst->nb[0] + dst_y * dst->nb[1] + oc * dst->nb[2] + batch_n * dst->nb[3]);
6713+
*dst_ptr = value;
6714+
}
6715+
}
6716+
}
6717+
}
6718+
6719+
void ggml_compute_forward_conv_2d(
6720+
const ggml_compute_params * params,
6721+
ggml_tensor * dst) {
6722+
6723+
const ggml_tensor * src0 = dst->src[0];
6724+
const ggml_tensor * src1 = dst->src[1];
6725+
6726+
ggml_compute_forward_conv_2d_impl(params, src0, src1, dst, src0->type);
6727+
}
6728+
65486729
// ggml_compute_forward_conv_transpose_2d
65496730

65506731
void ggml_compute_forward_conv_transpose_2d(
@@ -7095,12 +7276,13 @@ static void ggml_compute_forward_upscale_f32(
70957276

70967277
GGML_TENSOR_UNARY_OP_LOCALS
70977278

7098-
const float sf0 = (float)ne0/src0->ne[0];
7099-
const float sf1 = (float)ne1/src0->ne[1];
7100-
const float sf2 = (float)ne2/src0->ne[2];
7101-
const float sf3 = (float)ne3/src0->ne[3];
7279+
float sf0 = (float)ne0/src0->ne[0];
7280+
float sf1 = (float)ne1/src0->ne[1];
7281+
float sf2 = (float)ne2/src0->ne[2];
7282+
float sf3 = (float)ne3/src0->ne[3];
71027283

7103-
const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
7284+
const int32_t mode_flags = ggml_get_op_params_i32(dst, 0);
7285+
const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF);
71047286

71057287
if (mode == GGML_SCALE_MODE_NEAREST) {
71067288
for (int64_t i3 = 0; i3 < ne3; i3++) {
@@ -7121,8 +7303,12 @@ static void ggml_compute_forward_upscale_f32(
71217303
}
71227304
}
71237305
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
7124-
// setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True
7125-
const float pixel_offset = 0.5f;
7306+
float pixel_offset = 0.5f;
7307+
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
7308+
pixel_offset = 0.0f;
7309+
sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1);
7310+
sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1);
7311+
}
71267312

71277313
for (int64_t i3 = 0; i3 < ne3; i3++) {
71287314
const int64_t i03 = i3 / sf3;

ggml/src/ggml-cpu/ops.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020

2121
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
2222

23+
// Work buffer size for im2col operations in CONV2D
24+
#define GGML_IM2COL_WORK_SIZE (16 * 1024 * 1024)
25+
2326
#ifdef __cplusplus
2427
extern "C" {
2528
#endif
@@ -65,6 +68,7 @@ void ggml_compute_forward_clamp(const struct ggml_compute_params * params, struc
6568
void ggml_compute_forward_conv_transpose_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6669
void ggml_compute_forward_im2col(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6770
void ggml_compute_forward_im2col_back_f32(const struct ggml_compute_params * params, struct ggml_tensor * dst);
71+
void ggml_compute_forward_conv_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6872
void ggml_compute_forward_conv_transpose_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6973
void ggml_compute_forward_conv_2d_dw(const struct ggml_compute_params * params, struct ggml_tensor * dst);
7074
void ggml_compute_forward_pool_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
@@ -107,6 +111,7 @@ void ggml_compute_forward_custom(const struct ggml_compute_params * params, stru
107111
void ggml_compute_forward_cross_entropy_loss(const struct ggml_compute_params * params, struct ggml_tensor * dst);
108112
void ggml_compute_forward_cross_entropy_loss_back(const struct ggml_compute_params * params, struct ggml_tensor * dst);
109113
void ggml_compute_forward_opt_step_adamw(const struct ggml_compute_params * params, struct ggml_tensor * dst);
114+
void ggml_compute_forward_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst);
110115

111116
#ifdef __cplusplus
112117
}

ggml/src/ggml-metal/ggml-metal.metal

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ void quantize_q4_0(device const float * src, device block_q4_0 & dst) {
138138
}
139139

140140
void quantize_q4_1(device const float * src, device block_q4_1 & dst) {
141+
#pragma METAL fp math_mode(safe)
141142
float min = FLT_MAX;
142143
float max = -FLT_MAX;
143144

@@ -203,6 +204,7 @@ void quantize_q5_0(device const float * src, device block_q5_0 & dst) {
203204
}
204205

205206
void quantize_q5_1(device const float * src, device block_q5_1 & dst) {
207+
#pragma METAL fp math_mode(safe)
206208
float max = src[0];
207209
float min = src[0];
208210

@@ -239,6 +241,7 @@ void quantize_q5_1(device const float * src, device block_q5_1 & dst) {
239241
}
240242

241243
void quantize_iq4_nl(device const float * src, device block_iq4_nl & dst) {
244+
#pragma METAL fp math_mode(safe)
242245
float amax = 0.0f; // absolute max
243246
float max = 0.0f;
244247

0 commit comments

Comments
 (0)