Skip to content

Commit 1c28a2b

Browse files
ggerganovNexesenex
authored andcommitted
sync : ggml
1 parent ee705df commit 1c28a2b

File tree

20 files changed

+1360
-175
lines changed

20 files changed

+1360
-175
lines changed

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ extern "C" {
6363
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
6464
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
6565

66+
// "offset" refers to the offset of the tensor data for setting/getting data
6667
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
6768
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
6869

ggml/include/ggml.h

Lines changed: 85 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@
227227
#include <stdio.h>
228228

229229
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
230-
#define GGML_FILE_VERSION 1
230+
#define GGML_FILE_VERSION 2
231231

232232
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
233233
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
@@ -562,6 +562,8 @@ extern "C" {
562562
GGML_OP_SQR,
563563
GGML_OP_SQRT,
564564
GGML_OP_LOG,
565+
GGML_OP_SIN,
566+
GGML_OP_COS,
565567
GGML_OP_SUM,
566568
GGML_OP_SUM_ROWS,
567569
GGML_OP_MEAN,
@@ -603,9 +605,11 @@ extern "C" {
603605
GGML_OP_CLAMP,
604606
GGML_OP_CONV_TRANSPOSE_1D,
605607
GGML_OP_IM2COL,
608+
GGML_OP_IM2COL_BACK,
606609
GGML_OP_CONV_TRANSPOSE_2D,
607610
GGML_OP_POOL_1D,
608611
GGML_OP_POOL_2D,
612+
GGML_OP_POOL_2D_BACK,
609613
GGML_OP_UPSCALE, // nearest interpolate
610614
GGML_OP_PAD,
611615
GGML_OP_ARANGE,
@@ -1106,6 +1110,22 @@ extern "C" {
11061110
struct ggml_context * ctx,
11071111
struct ggml_tensor * a);
11081112

1113+
GGML_API struct ggml_tensor * ggml_sin(
1114+
struct ggml_context * ctx,
1115+
struct ggml_tensor * a);
1116+
1117+
GGML_API struct ggml_tensor * ggml_sin_inplace(
1118+
struct ggml_context * ctx,
1119+
struct ggml_tensor * a);
1120+
1121+
GGML_API struct ggml_tensor * ggml_cos(
1122+
struct ggml_context * ctx,
1123+
struct ggml_tensor * a);
1124+
1125+
GGML_API struct ggml_tensor * ggml_cos_inplace(
1126+
struct ggml_context * ctx,
1127+
struct ggml_tensor * a);
1128+
11091129
// return scalar
11101130
GGML_API struct ggml_tensor * ggml_sum(
11111131
struct ggml_context * ctx,
@@ -1760,34 +1780,49 @@ extern "C" {
17601780
float min,
17611781
float max);
17621782

1783+
// im2col
1784+
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
17631785
GGML_API struct ggml_tensor * ggml_im2col(
17641786
struct ggml_context * ctx,
1765-
struct ggml_tensor * a,
1766-
struct ggml_tensor * b,
1767-
int s0,
1768-
int s1,
1769-
int p0,
1770-
int p1,
1771-
int d0,
1772-
int d1,
1773-
bool is_2D,
1774-
enum ggml_type dst_type);
1787+
struct ggml_tensor * a, // convolution kernel
1788+
struct ggml_tensor * b, // data
1789+
int s0, // stride dimension 0
1790+
int s1, // stride dimension 1
1791+
int p0, // padding dimension 0
1792+
int p1, // padding dimension 1
1793+
int d0, // dilation dimension 0
1794+
int d1, // dilation dimension 1
1795+
bool is_2D,
1796+
enum ggml_type dst_type);
1797+
1798+
GGML_API struct ggml_tensor * ggml_im2col_back(
1799+
struct ggml_context * ctx,
1800+
struct ggml_tensor * a, // convolution kernel
1801+
struct ggml_tensor * b, // gradient of im2col output
1802+
int64_t * ne, // shape of im2col input
1803+
int s0, // stride dimension 0
1804+
int s1, // stride dimension 1
1805+
int p0, // padding dimension 0
1806+
int p1, // padding dimension 1
1807+
int d0, // dilation dimension 0
1808+
int d1, // dilation dimension 1
1809+
bool is_2D);
17751810

17761811
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
17771812
struct ggml_context * ctx,
1778-
struct ggml_tensor * a,
1779-
struct ggml_tensor * b,
1780-
int s0,
1781-
int s1,
1782-
int p0,
1783-
int p1,
1784-
int d0,
1785-
int d1);
1813+
struct ggml_tensor * a, // convolution kernel
1814+
struct ggml_tensor * b, // data
1815+
int s0, // stride dimension 0
1816+
int s1, // stride dimension 1
1817+
int p0, // padding dimension 0
1818+
int p1, // padding dimension 1
1819+
int d0, // dilation dimension 0
1820+
int d1); // dilation dimension 1
17861821

17871822
GGML_API struct ggml_tensor * ggml_conv_1d(
17881823
struct ggml_context * ctx,
1789-
struct ggml_tensor * a,
1790-
struct ggml_tensor * b,
1824+
struct ggml_tensor * a, // convolution kernel
1825+
struct ggml_tensor * b, // data
17911826
int s0, // stride
17921827
int p0, // padding
17931828
int d0); // dilation
@@ -1796,29 +1831,29 @@ extern "C" {
17961831
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
17971832
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
17981833
struct ggml_context * ctx,
1799-
struct ggml_tensor * a,
1800-
struct ggml_tensor * b,
1801-
int s,
1802-
int d);
1834+
struct ggml_tensor * a, // convolution kernel
1835+
struct ggml_tensor * b, // data
1836+
int s, // stride
1837+
int d); // dilation
18031838

18041839
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
18051840
struct ggml_context * ctx,
1806-
struct ggml_tensor * a,
1807-
struct ggml_tensor * b,
1808-
int s0,
1809-
int p0,
1810-
int d0);
1841+
struct ggml_tensor * a, // convolution kernel
1842+
struct ggml_tensor * b, // data
1843+
int s0, // stride
1844+
int p0, // padding
1845+
int d0); // dilation
18111846

18121847
GGML_API struct ggml_tensor * ggml_conv_2d(
18131848
struct ggml_context * ctx,
1814-
struct ggml_tensor * a,
1815-
struct ggml_tensor * b,
1816-
int s0,
1817-
int s1,
1818-
int p0,
1819-
int p1,
1820-
int d0,
1821-
int d1);
1849+
struct ggml_tensor * a, // convolution kernel
1850+
struct ggml_tensor * b, // data
1851+
int s0, // stride dimension 0
1852+
int s1, // stride dimension 1
1853+
int p0, // padding dimension 0
1854+
int p1, // padding dimension 1
1855+
int d0, // dilation dimension 0
1856+
int d1); // dilation dimension 1
18221857

18231858

18241859
// kernel size is a->ne[0] x a->ne[1]
@@ -1880,6 +1915,18 @@ extern "C" {
18801915
float p0,
18811916
float p1);
18821917

1918+
GGML_API struct ggml_tensor * ggml_pool_2d_back(
1919+
struct ggml_context * ctx,
1920+
struct ggml_tensor * a,
1921+
struct ggml_tensor * af, // "a"/input used in forward pass
1922+
enum ggml_op_pool op,
1923+
int k0,
1924+
int k1,
1925+
int s0,
1926+
int s1,
1927+
float p0,
1928+
float p1);
1929+
18831930
// nearest interpolate
18841931
// multiplies ne0 and ne1 by scale factor
18851932
// used in stable-diffusion

ggml/src/ggml-cuda.cu

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
#include "ggml-cuda/binbcast.cuh"
1616
#include "ggml-cuda/clamp.cuh"
1717
#include "ggml-cuda/concat.cuh"
18+
#include "ggml-cuda/conv-transpose-1d.cuh"
1819
#include "ggml-cuda/convert.cuh"
1920
#include "ggml-cuda/cpy.cuh"
21+
#include "ggml-cuda/cross-entropy-loss.cuh"
2022
#include "ggml-cuda/diagmask.cuh"
2123
#include "ggml-cuda/dmmv.cuh"
2224
#include "ggml-cuda/fattn.cuh"
@@ -36,7 +38,6 @@
3638
#include "ggml-cuda/tsembd.cuh"
3739
#include "ggml-cuda/unary.cuh"
3840
#include "ggml-cuda/upscale.cuh"
39-
#include "ggml-cuda/conv-transpose-1d.cuh"
4041

4142
#include <algorithm>
4243
#include <array>
@@ -2807,6 +2808,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
28072808
case GGML_OP_ADD:
28082809
ggml_cuda_op_add(ctx, dst);
28092810
break;
2811+
case GGML_OP_SUB:
2812+
ggml_cuda_op_sub(ctx, dst);
2813+
break;
28102814
case GGML_OP_MULTI_ADD:
28112815
ggml_cuda_op_multi_add(ctx, dst);
28122816
break;
@@ -2911,6 +2915,12 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
29112915
case GGML_OP_SQRT:
29122916
ggml_cuda_op_sqrt(ctx, dst);
29132917
break;
2918+
case GGML_OP_SIN:
2919+
ggml_cuda_op_sin(ctx, dst);
2920+
break;
2921+
case GGML_OP_COS:
2922+
ggml_cuda_op_cos(ctx, dst);
2923+
break;
29142924
case GGML_OP_CLAMP:
29152925
ggml_cuda_op_clamp(ctx, dst);
29162926
break;
@@ -2953,6 +2963,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
29532963
case GGML_OP_FLASH_ATTN_EXT:
29542964
ggml_cuda_flash_attn_ext(ctx, dst);
29552965
break;
2966+
case GGML_OP_CROSS_ENTROPY_LOSS:
2967+
ggml_cuda_cross_entropy_loss(ctx, dst);
2968+
break;
29562969
default:
29572970
return false;
29582971
}
@@ -3274,6 +3287,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
32743287
assert(node->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device));
32753288
for (int j = 0; j < GGML_MAX_SRC; j++) {
32763289
if (node->src[j] != nullptr) {
3290+
assert(node->src[j]->buffer);
32773291
assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) || ggml_backend_buffer_is_cuda_split(node->src[j]->buffer));
32783292
}
32793293
}
@@ -3562,6 +3576,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
35623576
case GGML_OP_TRANSPOSE:
35633577
case GGML_OP_NORM:
35643578
case GGML_OP_ADD:
3579+
case GGML_OP_SUB:
35653580
case GGML_OP_MULTI_ADD:
35663581
case GGML_OP_MUL:
35673582
case GGML_OP_DIV:
@@ -3571,6 +3586,8 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
35713586
case GGML_OP_SOFTCAP:
35723587
case GGML_OP_SQR:
35733588
case GGML_OP_SQRT:
3589+
case GGML_OP_SIN:
3590+
case GGML_OP_COS:
35743591
case GGML_OP_CLAMP:
35753592
case GGML_OP_CONT:
35763593
case GGML_OP_DIAG_MASK_INF:
@@ -3622,6 +3639,8 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
36223639
}
36233640
return ggml_cuda_info().devices[cuda_ctx->device].cc >= CC_VOLTA &&
36243641
op->src[1]->type == GGML_TYPE_F16 && op->src[2]->type == GGML_TYPE_F16;
3642+
case GGML_OP_CROSS_ENTROPY_LOSS:
3643+
return true;
36253644
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
36263645
default:
36273646
return false;

ggml/src/ggml-cuda/binbcast.cu

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ static __device__ __forceinline__ float op_add(const float a, const float b) {
1616
return a + b;
1717
}
1818

19+
static __device__ __forceinline__ float op_sub(const float a, const float b) {
20+
return a - b;
21+
}
22+
1923
static __device__ __forceinline__ float op_mul(const float a, const float b) {
2024
return a * b;
2125
}
@@ -317,6 +321,10 @@ void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
317321
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_add>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
318322
}
319323

324+
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
325+
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_sub>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
326+
}
327+
320328
static __global__ void scale_f32_l(const float * x, float * dst, const void * data, const int k) {
321329
const int i = blockDim.x*blockIdx.x + threadIdx.x;
322330

ggml/src/ggml-cuda/binbcast.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22

33
void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
44
void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
5+
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
56
void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
67
void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

0 commit comments

Comments
 (0)