Skip to content

Commit 8a43e94

Browse files
JohannesGaesslerggerganov
authored andcommitted
ggml: new optimization interface (ggml/988)
1 parent 5c9a8b2 commit 8a43e94

File tree

15 files changed

+2667
-1637
lines changed

15 files changed

+2667
-1637
lines changed

ggml/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ set(GGML_PUBLIC_HEADERS
228228
include/ggml-cann.h
229229
include/ggml-cuda.h
230230
include/ggml-kompute.h
231+
include/ggml-opt.h
231232
include/ggml-metal.h
232233
include/ggml-rpc.h
233234
include/ggml-sycl.h

ggml/include/ggml-backend.h

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ extern "C" {
8686
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
8787
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
8888

89-
// "offset" refers to the offset of the tensor data for setting/getting data
89+
// "offset" refers to the offset in tensor->data for setting/getting data
9090
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
9191
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
9292
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
@@ -242,14 +242,20 @@ extern "C" {
242242
ggml_backend_sched_reserve(sched, reserve_graph);
243243
244244
// compute
245-
graph = build_graph(sched);
246-
ggml_backend_sched_graph_compute(sched, graph);
245+
graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation
246+
for (int i = 0; i < 10; ++i) {
247+
ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically
248+
}
247249
248250
// if there are graph inputs:
249-
ggml_backend_sched_reset(sched);
250-
ggml_backend_sched_alloc_graph(sched, graph);
251-
ggml_backend_tensor_set(input_tensor, ...);
252-
ggml_backend_sched_graph_compute(sched, graph);
251+
graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called)
252+
ggml_backend_sched_reset(sched); // clear the allocation of the previous graph
253+
ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it
254+
ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors
255+
ggml_backend_sched_graph_compute(sched, graph); // execute the graph
256+
257+
// as an alternative to the above it is also possible to assign the inputs to a dedicated context and
258+
// allocate them statically via ggml_backend_alloc_ctx_tensors
253259
}
254260
*/
255261

@@ -264,7 +270,7 @@ extern "C" {
264270
//
265271
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
266272

267-
// Initialize a backend scheduler
273+
// Initialize a backend scheduler, backends with low index are given priority over backends with high index
268274
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
269275
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
270276

@@ -289,7 +295,9 @@ extern "C" {
289295
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
290296
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
291297

292-
// Reset all assignments and allocators - must be called before changing the node backends
298+
// Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph.
299+
// This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers.
300+
// The correct way to use this API is to discard the deallocated tensors and create new ones.
293301
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
294302

295303
// Set a callback to be called for each resulting node during graph compute

ggml/include/ggml-opt.h

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
// This file contains functionality for training models using GGML.
2+
// It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets.
3+
// At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code.
4+
//
5+
// Module maintainer: Johannes Gäßler (@JohannesGaessler, [email protected])
6+
7+
#pragma once
8+
9+
#include "ggml.h"
10+
#include "ggml-backend.h"
11+
12+
#include <stdint.h>
13+
14+
#ifdef __cplusplus
15+
extern "C" {
16+
#endif
17+
18+
struct ggml_opt_dataset;
19+
struct ggml_opt_context;
20+
struct ggml_opt_result;
21+
22+
typedef struct ggml_opt_dataset * ggml_opt_dataset_t;
23+
typedef struct ggml_opt_context * ggml_opt_context_t;
24+
typedef struct ggml_opt_result * ggml_opt_result_t;
25+
26+
// ====== Loss ======
27+
28+
// built-in loss types, i.e. the built-in quantities minimized by the optimizer
29+
// custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value
30+
enum ggml_opt_loss_type {
31+
GGML_OPT_LOSS_TYPE_MEAN,
32+
GGML_OPT_LOSS_TYPE_SUM,
33+
GGML_OPT_LOSS_TYPE_CROSS_ENTROPY,
34+
GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR,
35+
};
36+
37+
// ====== Dataset ======
38+
39+
GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
40+
int64_t ne_datapoint, // number of elements per datapoint
41+
int64_t ne_label, // number of elements per label
42+
int64_t ndata, // total number of datapoints/labels
43+
int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
44+
GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
45+
46+
// get underlying tensors that store the data
47+
GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
48+
GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata]
49+
50+
// shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative
51+
GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata);
52+
53+
// get batch at position ibatch from dataset and copy the data to data_batch and labels_batch
54+
GGML_API void ggml_opt_dataset_get_batch(
55+
ggml_opt_dataset_t dataset,
56+
struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch]
57+
struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch]
58+
int64_t ibatch);
59+
60+
// ====== Model / Context ======
61+
62+
enum ggml_opt_build_type {
63+
GGML_OPT_BUILD_TYPE_FORWARD,
64+
GGML_OPT_BUILD_TYPE_GRAD,
65+
GGML_OPT_BUILD_TYPE_OPT,
66+
};
67+
68+
// parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
69+
struct ggml_opt_optimizer_params {
70+
// AdamW optimizer parameters
71+
struct {
72+
float alpha; // learning rate
73+
float beta1;
74+
float beta2;
75+
float eps; // epsilon for numerical stability
76+
float wd; // weight decay for AdamW, use 0.0f to disable
77+
} adamw;
78+
};
79+
80+
// callback to calculate optimizer parameters prior to a backward pass
81+
// userdata can be used to pass arbitrary data
82+
typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
83+
84+
// returns the default optimizer params (constant)
85+
// userdata is not used
86+
GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
87+
88+
// parameters for initializing a new optimization context
89+
struct ggml_opt_params {
90+
ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
91+
92+
struct ggml_context * ctx_compute; // created in user code, holds non-static tensors
93+
94+
// the forward graph is defined by inputs and outputs
95+
// those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts
96+
struct ggml_tensor * inputs;
97+
struct ggml_tensor * outputs;
98+
99+
enum ggml_opt_loss_type loss_type;
100+
enum ggml_opt_build_type build_type;
101+
102+
int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
103+
104+
ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
105+
void * get_opt_pars_ud; // userdata for calculating optimizer parameters
106+
};
107+
108+
// get parameters for an optimization context with defaults set where possible
109+
// parameters for which no sensible defaults exist are supplied as arguments to this function
110+
GGML_API ggml_opt_params ggml_opt_default_params(
111+
ggml_backend_sched_t backend_sched,
112+
struct ggml_context * ctx_compute,
113+
struct ggml_tensor * inputs,
114+
struct ggml_tensor * outputs,
115+
enum ggml_opt_loss_type loss_type);
116+
117+
GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
118+
GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
119+
120+
// set gradients to zero, initilize loss, and optionally reset the optimizer
121+
GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
122+
123+
// get underlying tensors that store data
124+
GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor
125+
GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
126+
GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against
127+
GGML_API struct ggml_tensor * ggml_opt_loss( ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss
128+
GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs
129+
GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
130+
131+
GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
132+
133+
// ====== Optimization Result ======
134+
135+
GGML_API ggml_opt_result_t ggml_opt_result_init();
136+
GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
137+
GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
138+
139+
// get data from result, uncertainties are optional and can be ignored by passing NULL
140+
GGML_API void ggml_opt_result_ndata( ggml_opt_result_t result, int64_t * ndata); // writes 1 value, number of datapoints
141+
GGML_API void ggml_opt_result_loss( ggml_opt_result_t result, double * loss, double * unc); // writes 1 value
142+
GGML_API void ggml_opt_result_pred( ggml_opt_result_t result, int32_t * pred); // writes ndata values
143+
GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double * accuracy, double * unc); // writes 1 value
144+
145+
// ====== Computation ======
146+
147+
// do forward pass, increment result if not NULL
148+
GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
149+
150+
// do forward pass, increment result if not NULL, do backward pass
151+
GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
152+
153+
// ############################################################################
154+
// ## The high-level functions start here. They do not depend on any private ##
155+
// ## functions or structs and can be copied to and adapted for user code. ##
156+
// ############################################################################
157+
158+
// ====== Intended Usage ======
159+
//
160+
// 1. Select the appropriate loss for your problem.
161+
// 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them.
162+
// Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster).
163+
// 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors.
164+
// The first context should contain the model parameters and inputs and be allocated statically in user code.
165+
// The second context should contain all other tensors and will be (re)allocated automatically.
166+
// Due to this automated allocation the data of the second context is not defined when accessed in user code.
167+
// Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors.
168+
// 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead.
169+
170+
// signature for a callback while evaluating opt_ctx on dataset, called after an evaluation
171+
typedef void (*ggml_opt_epoch_callback)(
172+
bool train, // true after training evaluation, false after validation evaluation
173+
ggml_opt_context_t opt_ctx,
174+
ggml_opt_dataset_t dataset,
175+
ggml_opt_result_t result, // result associated with the dataset subsection
176+
int64_t ibatch, // number of batches that have been evaluated so far
177+
int64_t ibatch_max, // total number of batches in this dataset subsection
178+
int64_t t_start_us); // time at which the evaluation on the dataset subsection was started
179+
180+
// do training on front of dataset, do evaluation only on back of dataset
181+
GGML_API void ggml_opt_epoch(
182+
ggml_opt_context_t opt_ctx,
183+
ggml_opt_dataset_t dataset,
184+
ggml_opt_result_t result_train, // result to increment during training, ignored if NULL
185+
ggml_opt_result_t result_eval, // result to increment during evaluation, ignored if NULL
186+
int64_t idata_split, // data index at which to split training and evaluation
187+
ggml_opt_epoch_callback callback_train,
188+
ggml_opt_epoch_callback callback_eval);
189+
190+
// callback that prints a progress bar on stderr
191+
GGML_API void ggml_opt_epoch_callback_progress_bar(
192+
bool train,
193+
ggml_opt_context_t opt_ctx,
194+
ggml_opt_dataset_t dataset,
195+
ggml_opt_result_t result,
196+
int64_t ibatch,
197+
int64_t ibatch_max,
198+
int64_t t_start_us);
199+
200+
// fit model defined by inputs and outputs to dataset
201+
GGML_API void ggml_opt_fit(
202+
ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs
203+
ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
204+
ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
205+
ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
206+
ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
207+
enum ggml_opt_loss_type loss_type, // loss to minimize
208+
ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
209+
int64_t nepoch, // how many times the dataset should be iterated over
210+
int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
211+
float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
212+
bool silent); // whether or not info prints to stderr should be suppressed
213+
214+
#ifdef __cplusplus
215+
}
216+
#endif

0 commit comments

Comments
 (0)