Skip to content

Commit aaa93bc

Browse files
author
katsu560
committed
read file data from tensor
1 parent 3234fa1 commit aaa93bc

File tree

3 files changed

+1144
-1220
lines changed

3 files changed

+1144
-1220
lines changed

examples/yolo/yolov3-tiny.cpp

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ struct yolo_model {
3030
int height = 416;
3131
std::vector<conv2d_layer> conv2d_layers;
3232
struct ggml_context * ctx;
33-
struct gguf_context * ggufctx;
33+
struct gguf_context * ctx_gguf;
3434
};
3535

3636
struct yolo_layer {
@@ -72,7 +72,7 @@ static bool load_model(const std::string & fname, yolo_model & model) {
7272
fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
7373
return false;
7474
}
75-
model.ggufctx = ctx;
75+
model.ctx_gguf = ctx;
7676
model.width = 416;
7777
model.height = 416;
7878
model.conv2d_layers.resize(13);
@@ -157,15 +157,19 @@ static bool load_labels(const char * filename, std::vector<std::string> & labels
157157
return true;
158158
}
159159

160-
static bool load_labels_kv(const struct gguf_context * ctx, const char * filename, std::vector<std::string> & labels)
160+
static bool load_labels_gguf(const struct gguf_context * ctx, const char * filename, std::vector<std::string> & labels)
161161
{
162-
int key_id = gguf_find_key(ctx, filename);
162+
int key_id = gguf_find_key_array(ctx, "embedded_files", filename);
163163
if (key_id == -1) {
164164
return false;
165165
}
166-
const char * data = gguf_get_val_str(ctx, key_id);
167-
uint64_t n = gguf_get_val_str_len(ctx, key_id);
168-
membuf buf(data, data + n);
166+
char *data = NULL;
167+
size_t size = 0;
168+
int tensor = gguf_find_and_get_tensor(ctx, filename, &data, &size);
169+
if (tensor == -1) {
170+
return false;
171+
}
172+
membuf buf(data, data + size);
169173
std::istream file_in(&buf);
170174
if (!file_in) {
171175
return false;
@@ -194,21 +198,26 @@ static bool load_alphabet(std::vector<yolo_image> & alphabet)
194198
return true;
195199
}
196200

197-
static bool load_alphabet_kv(const struct gguf_context * ctx, std::vector<yolo_image> & alphabet)
201+
static bool load_alphabet_gguf(const struct gguf_context * ctx, std::vector<yolo_image> & alphabet)
198202
{
199203
alphabet.resize(8 * 128);
200204
for (int j = 0; j < 8; j++) {
201205
for (int i = 32; i < 127; i++) {
202206
char fname[256];
203-
sprintf(fname, "/data/labels/%d_%d.png", i, j);
204-
int key_id = gguf_find_key(ctx, fname);
207+
sprintf(fname, "data/labels/%d_%d.png", i, j);
208+
int key_id = gguf_find_key_array(ctx, "embedded_files", fname);
205209
if (key_id == -1) {
206-
fprintf(stderr, "Cannot find '%s'\n", fname);
210+
fprintf(stderr, "Cannot find '%s' in embedded_files\n", fname);
211+
return false;
212+
}
213+
char *data = NULL;
214+
size_t size = 0;
215+
int tensor = gguf_find_and_get_tensor(ctx, fname, &data, &size);
216+
if (tensor == -1) {
217+
fprintf(stderr, "Cannot find '%s' in tensor\n", fname);
207218
return false;
208219
}
209-
const char * data = gguf_get_val_str(ctx, key_id);
210-
uint64_t n = gguf_get_val_str_len(ctx, key_id);
211-
if (!load_image_from_memory(data, n, alphabet[j*128 + i])) {
220+
if (!load_image_from_memory(data, size, alphabet[j*128 + i])) {
212221
fprintf(stderr, "Cannot load '%s'\n", fname);
213222
return false;
214223
}
@@ -499,7 +508,7 @@ void detect(yolo_image & img, const yolo_model & model, float thresh, const std:
499508
print_shape(18, result);
500509
result = ggml_upscale(ctx0, result, 2);
501510
print_shape(19, result);
502-
result = ggml_concat(ctx0, result, layer_8);
511+
result = ggml_concat(ctx0, result, layer_8, 2);
503512
print_shape(20, result);
504513
result = apply_conv2d(ctx0, result, model.conv2d_layers[11]);
505514
print_shape(21, result);
@@ -590,15 +599,15 @@ int main(int argc, char *argv[])
590599
return 1;
591600
}
592601
std::vector<std::string> labels;
593-
if (!load_labels_kv(model.ggufctx, "/data/coco.names", labels)) {
594-
fprintf(stderr, "%s: failed to load labels from '/data/coco.names' in model\n", __func__);
602+
if (!load_labels_gguf(model.ctx_gguf, "data/coco.names", labels)) {
603+
fprintf(stderr, "%s: failed to load labels from 'data/coco.names' in model\n", __func__);
595604
if (!load_labels("data/coco.names", labels)) {
596605
fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__);
597606
return 1;
598607
}
599608
}
600609
std::vector<yolo_image> alphabet;
601-
if (!load_alphabet_kv(model.ggufctx, alphabet)) {
610+
if (!load_alphabet_gguf(model.ctx_gguf, alphabet)) {
602611
fprintf(stderr, "%s: failed to load alphabet from model\n", __func__);
603612
if (!load_alphabet(alphabet)) {
604613
fprintf(stderr, "%s: failed to load alphabet\n", __func__);

include/ggml/ggml.h

Lines changed: 78 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -481,9 +481,7 @@ extern "C" {
481481
GGML_OP_ARGSORT,
482482
GGML_OP_LEAKY_RELU,
483483

484-
GGML_OP_FLASH_ATTN,
485484
GGML_OP_FLASH_ATTN_EXT,
486-
GGML_OP_FLASH_FF,
487485
GGML_OP_FLASH_ATTN_BACK,
488486
GGML_OP_SSM_CONV,
489487
GGML_OP_SSM_SCAN,
@@ -565,7 +563,8 @@ extern "C" {
565563
// n-dimensional tensor
566564
struct ggml_tensor {
567565
enum ggml_type type;
568-
enum ggml_backend_type backend;
566+
567+
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
569568

570569
struct ggml_backend_buffer * buffer;
571570

@@ -1008,12 +1007,13 @@ extern "C" {
10081007
struct ggml_tensor * a,
10091008
struct ggml_tensor * b);
10101009

1011-
// concat a and b on dim 2
1010+
// concat a and b along dim
10121011
// used in stable-diffusion
10131012
GGML_API struct ggml_tensor * ggml_concat(
10141013
struct ggml_context * ctx,
10151014
struct ggml_tensor * a,
1016-
struct ggml_tensor * b);
1015+
struct ggml_tensor * b,
1016+
int dim);
10171017

10181018
GGML_API struct ggml_tensor * ggml_abs(
10191019
struct ggml_context * ctx,
@@ -1459,11 +1459,12 @@ extern "C" {
14591459
struct ggml_tensor * b);
14601460

14611461
// rotary position embedding
1462-
// if mode & 1 == 1, skip n_past elements (DEPRECATED)
1462+
// if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
14631463
// if mode & 2 == 1, GPT-NeoX style
14641464
// if mode & 4 == 1, ChatGLM style
14651465
//
14661466
// b is an int32 vector with size a->ne[2], it contains the positions
1467+
// c is freq factors (e.g. phi3-128k), (optional)
14671468
GGML_API struct ggml_tensor * ggml_rope(
14681469
struct ggml_context * ctx,
14691470
struct ggml_tensor * a,
@@ -1482,10 +1483,11 @@ extern "C" {
14821483
int n_ctx);
14831484

14841485
// custom RoPE
1485-
GGML_API struct ggml_tensor * ggml_rope_custom(
1486+
GGML_API struct ggml_tensor * ggml_rope_ext(
14861487
struct ggml_context * ctx,
14871488
struct ggml_tensor * a,
14881489
struct ggml_tensor * b,
1490+
struct ggml_tensor * c,
14891491
int n_dims,
14901492
int mode,
14911493
int n_ctx,
@@ -1498,10 +1500,11 @@ extern "C" {
14981500
float beta_slow);
14991501

15001502
// in-place, returns view(a)
1501-
GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
1503+
GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
15021504
struct ggml_context * ctx,
15031505
struct ggml_tensor * a,
15041506
struct ggml_tensor * b,
1507+
struct ggml_tensor * c,
15051508
int n_dims,
15061509
int mode,
15071510
int n_ctx,
@@ -1513,25 +1516,57 @@ extern "C" {
15131516
float beta_fast,
15141517
float beta_slow);
15151518

1516-
// compute correction dims for YaRN RoPE scaling
1517-
GGML_CALL void ggml_rope_yarn_corr_dims(
1518-
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
1519+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
1520+
struct ggml_context * ctx,
1521+
struct ggml_tensor * a,
1522+
struct ggml_tensor * b,
1523+
int n_dims,
1524+
int mode,
1525+
int n_ctx,
1526+
int n_orig_ctx,
1527+
float freq_base,
1528+
float freq_scale,
1529+
float ext_factor,
1530+
float attn_factor,
1531+
float beta_fast,
1532+
float beta_slow),
1533+
"use ggml_rope_ext instead");
15191534

1520-
// xPos RoPE, in-place, returns view(a)
1521-
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
1535+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
15221536
struct ggml_context * ctx,
15231537
struct ggml_tensor * a,
15241538
struct ggml_tensor * b,
15251539
int n_dims,
1526-
float base,
1527-
bool down);
1540+
int mode,
1541+
int n_ctx,
1542+
int n_orig_ctx,
1543+
float freq_base,
1544+
float freq_scale,
1545+
float ext_factor,
1546+
float attn_factor,
1547+
float beta_fast,
1548+
float beta_slow),
1549+
"use ggml_rope_ext_inplace instead");
1550+
1551+
struct ggml_tensor * ggml_rope_xpos_inplace(
1552+
struct ggml_context * ctx,
1553+
struct ggml_tensor * a,
1554+
struct ggml_tensor * b,
1555+
int n_dims,
1556+
float base,
1557+
bool down);
1558+
1559+
// compute correction dims for YaRN RoPE scaling
1560+
GGML_CALL void ggml_rope_yarn_corr_dims(
1561+
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
15281562

15291563
// rotary position embedding backward, i.e compute dx from dy
15301564
// a - dy
15311565
GGML_API struct ggml_tensor * ggml_rope_back(
15321566
struct ggml_context * ctx,
15331567
struct ggml_tensor * a,
15341568
struct ggml_tensor * b,
1569+
struct ggml_tensor * c,
15351570
int n_dims,
15361571
int mode,
15371572
int n_ctx,
@@ -1733,13 +1768,6 @@ extern "C" {
17331768
struct ggml_tensor * a,
17341769
int k);
17351770

1736-
GGML_API struct ggml_tensor * ggml_flash_attn(
1737-
struct ggml_context * ctx,
1738-
struct ggml_tensor * q,
1739-
struct ggml_tensor * k,
1740-
struct ggml_tensor * v,
1741-
bool masked);
1742-
17431771
#define GGML_KQ_MASK_PAD 32
17441772

17451773
// q: [n_embd, n_batch, n_head, 1]
@@ -1760,6 +1788,7 @@ extern "C" {
17601788
struct ggml_tensor * a,
17611789
enum ggml_prec prec);
17621790

1791+
// TODO: needs to be adapted to ggml_flash_attn_ext
17631792
GGML_API struct ggml_tensor * ggml_flash_attn_back(
17641793
struct ggml_context * ctx,
17651794
struct ggml_tensor * q,
@@ -1768,14 +1797,6 @@ extern "C" {
17681797
struct ggml_tensor * d,
17691798
bool masked);
17701799

1771-
GGML_API struct ggml_tensor * ggml_flash_ff(
1772-
struct ggml_context * ctx,
1773-
struct ggml_tensor * a,
1774-
struct ggml_tensor * b0,
1775-
struct ggml_tensor * b1,
1776-
struct ggml_tensor * c0,
1777-
struct ggml_tensor * c1);
1778-
17791800
GGML_API struct ggml_tensor * ggml_ssm_conv(
17801801
struct ggml_context * ctx,
17811802
struct ggml_tensor * s,
@@ -2298,35 +2319,37 @@ extern "C" {
22982319

22992320
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
23002321
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
2322+
GGML_API int gguf_find_key_array(const struct gguf_context * ctx, const char * key, const char * val);
23012323
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
23022324

23032325
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
23042326
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
23052327

23062328
// will abort if the wrong type is used for the key
2307-
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
2308-
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
2309-
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
2310-
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
2311-
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
2312-
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
2313-
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
2314-
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
2315-
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
2316-
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
2317-
GGML_API bool gguf_get_val_bool (const struct gguf_context * ctx, int key_id);
2318-
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
2319-
GGML_API uint64_t gguf_get_val_str_len(const struct gguf_context * ctx, int key_id);
2320-
GGML_API const void * gguf_get_val_data (const struct gguf_context * ctx, int key_id);
2321-
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
2322-
GGML_API const void * gguf_get_arr_data (const struct gguf_context * ctx, int key_id);
2323-
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
2324-
2325-
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2326-
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2327-
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2328-
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2329-
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
2329+
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
2330+
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
2331+
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
2332+
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
2333+
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
2334+
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
2335+
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
2336+
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
2337+
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
2338+
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
2339+
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
2340+
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
2341+
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
2342+
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
2343+
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
2344+
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
2345+
2346+
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2347+
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2348+
GGML_API size_t gguf_get_tensor_offset (const struct gguf_context * ctx, int i);
2349+
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2350+
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
2351+
GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx, int i);
2352+
GGML_API int gguf_find_and_get_tensor(const struct gguf_context * ctx, const char * name, char ** data, size_t * size);
23302353

23312354
// removes key if it exists
23322355
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
@@ -2344,7 +2367,6 @@ extern "C" {
23442367
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
23452368
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
23462369
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
2347-
GGML_API void gguf_set_val_data(struct gguf_context * ctx, const char * key, const char * val, int n);
23482370
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
23492371
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
23502372

@@ -2391,8 +2413,10 @@ extern "C" {
23912413
GGML_API int ggml_cpu_has_avx512 (void);
23922414
GGML_API int ggml_cpu_has_avx512_vbmi(void);
23932415
GGML_API int ggml_cpu_has_avx512_vnni(void);
2416+
GGML_API int ggml_cpu_has_avx512_bf16(void);
23942417
GGML_API int ggml_cpu_has_fma (void);
23952418
GGML_API int ggml_cpu_has_neon (void);
2419+
GGML_API int ggml_cpu_has_sve (void);
23962420
GGML_API int ggml_cpu_has_arm_fma (void);
23972421
GGML_API int ggml_cpu_has_metal (void);
23982422
GGML_API int ggml_cpu_has_f16c (void);

0 commit comments

Comments
 (0)