Skip to content

Commit 6002bd8

Browse files
committed
llama : pimpl llama_model
ggml-ci
1 parent 4d27597 commit 6002bd8

File tree

6 files changed

+358
-327
lines changed

6 files changed

+358
-327
lines changed

src/llama-adapter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "llama-adapter.h"
22

3+
#include "llama-mmap.h"
34
#include "llama-model.h"
45

56
#include <algorithm>

src/llama-context.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "llama-context.h"
22

3+
#include "llama-mmap.h"
4+
35
#include <cassert>
46
#include <cmath>
57
#include <cstring>
@@ -504,7 +506,7 @@ size_t llama_output_reserve(struct llama_context & lctx, size_t n_outputs) {
504506

505507
auto * buft = ggml_backend_cpu_buffer_type();
506508
// try to use the host buffer of the device where the output tensor is allocated for faster transfer to system memory
507-
auto * output_dev = lctx.model.dev_output.dev;
509+
auto * output_dev = lctx.model.dev_output();
508510
auto * output_dev_host_buft = output_dev ? ggml_backend_dev_host_buffer_type(output_dev) : nullptr;
509511
if (output_dev_host_buft) {
510512
buft = output_dev_host_buft;

src/llama-kv-cache.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ bool llama_kv_cache_init(
7979

8080
ggml_backend_buffer_type_t buft;
8181
if (offload) {
82-
auto * dev = model.dev_layer.at(i).dev;
82+
auto * dev = model.dev_layer(i);
8383
buft = ggml_backend_dev_buffer_type(dev);
8484
} else {
8585
buft = ggml_backend_cpu_buffer_type();

0 commit comments

Comments
 (0)