Skip to content

Commit a17e36f

Browse files
author
Dinesh Reddy
committed
-Fixed issue for llama-cli. Tested on posix & FPGA
Signed-off-by: Dinesh Reddy <[email protected]>
1 parent d853890 commit a17e36f

File tree

2 files changed

+32
-25
lines changed

2 files changed

+32
-25
lines changed

ggml/src/ggml-tsavorite/ggml-tsavorite.cpp

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,18 @@ typedef struct _txe_device_t *txe_device_s;
3333
typedef struct _txe_compute_pipeline_state_t *txe_compute_pipeline_state_s;
3434
FILE *tsi_op_log_file;
3535
uint64_t num_of_op;
36-
36+
// Centralized TSI runtime initialization - called once globally
37+
static void ensure_tsi_runtime_initialized() {
38+
static bool runtime_initialized = false;
39+
if (!runtime_initialized) {
40+
std::string mainProfilerName = "OPU ";
41+
tsirt::utils::TSIProfiler::initialize();
42+
// TSI Run time Initalization
43+
tsi_initialize(NUM_OF_TXES, NULL);
44+
runtime_initialized = true;
45+
GGML_TSAVORITE_LOG_INFO("Profiler and TSI runtime initialized early in registration\n");
46+
}
47+
}
3748
#ifdef USE_COMMAND_BUFFERS
3849
typedef struct _txe_command_queue_t *txe_command_queue_s;
3950
typedef struct _txe_dispatch_queue_t *txe_dispatch_queue_s;
@@ -384,6 +395,7 @@ static txe_compute_pipeline_state_s tsi_kernel_setup(enum ggml_tsavorite_kernel_
384395
}
385396
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
386397

398+
387399
switch (kernel_type) {
388400
case GGML_TSAVORITE_KERNEL_TYPE_ADD:
389401
if (ggml_tsavorite_kernel_mode_flag == GGML_TSAVORITE_KERNEL_MODE_CPU)
@@ -475,7 +487,6 @@ static txe_device_s
475487
ggml_backend_tsavorite_device_acq(struct ggml_backend_tsavorite_device_context *ctx) {
476488
assert(ctx != NULL);
477489
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
478-
479490
if (ctx->device == tsi_nil) {
480491
ctx->device = tsi_system_default_device_create();
481492
snprintf(ctx->name, sizeof("txe"), "txe");
@@ -492,7 +503,6 @@ static void ggml_backend_tsavorite_device_rel(struct ggml_backend_tsavorite_devi
492503
assert(ctx != NULL);
493504
assert(ctx->ref_count > 0);
494505
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
495-
496506
ctx->ref_count--;
497507

498508
// Need to define function txe_device_free
@@ -507,7 +517,6 @@ static void ggml_backend_tsavorite_device_rel(struct ggml_backend_tsavorite_devi
507517
static void *ggml_tsavorite_host_malloc(size_t n) {
508518
void *data = NULL;
509519
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
510-
511520
GGML_TSAVORITE_LOG_INFO("\n Allocating memory from tsi_alloc with size %ld \n", n);
512521
data = tsi_alloc(n);
513522
GGML_TSAVORITE_LOG_CONT("\n Allocating memory from tsi_alloc with size %ld starting memory %p\n",
@@ -526,12 +535,9 @@ static struct ggml_backend_tsavorite_context *ggml_tsavorite_init(ggml_backend_d
526535
if (tsi_log_setup() == false)
527536
return NULL;
528537

538+
529539
std::string mainProfilerName = "OPU ";
530-
tsirt::utils::TSIProfiler::initialize();
531540
tsirt::utils::TSIScopedProfiler mainProfiler(mainProfilerName);
532-
533-
// TSI Run time Initalization
534-
tsi_initialize(NUM_OF_TXES, NULL);
535541

536542
// init context
537543
struct ggml_backend_tsavorite_context *ctx = (struct ggml_backend_tsavorite_context *)calloc(
@@ -608,6 +614,7 @@ static struct ggml_backend_tsavorite_context *ggml_tsavorite_init(ggml_backend_d
608614
static void ggml_tsavorite_free(struct ggml_backend_tsavorite_context *ctx) {
609615
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
610616

617+
611618
for (int i = 0; i < GGML_TSAVORITE_KERNEL_TYPE_COUNT; ++i) {
612619
if (ctx->kernels[i].pipeline) {
613620
tsi_kernel_release(ctx->kernels[i].pipeline);
@@ -649,6 +656,7 @@ static ggml_backend_tsavorite_buffer_s ggml_tsavorite_get_buffer(struct ggml_ten
649656
// GGML_TSAVORITE_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
650657
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
651658

659+
652660
const int64_t tsize = ggml_nbytes(t);
653661

654662
ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
@@ -682,7 +690,6 @@ static bool ggml_tsavorite_supports_op(const struct ggml_backend_tsavorite_devic
682690
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
683691
if (!ctx_dev)
684692
return false;
685-
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
686693
for (size_t i = 0, n = 3; i < n; ++i) {
687694
if (op->src[i] != NULL && op->src[i]->type != GGML_TYPE_F32) {
688695
return false;
@@ -1127,6 +1134,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
11271134
#if 0
11281135
static const char * ggml_backend_tsavorite_buffer_get_name(ggml_backend_buffer_t buffer) {
11291136
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1137+
11301138
GGML_TSAVORITE_LOG_INFO("End %s\n", __func__);
11311139
return "tSavorite";
11321140

@@ -1217,7 +1225,6 @@ static bool ggml_backend_tsavorite_buffer_cpy_tensor(ggml_backend_buffer_t buffe
12171225
const struct ggml_tensor *src,
12181226
struct ggml_tensor *dst) {
12191227
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1220-
12211228
if (ggml_backend_buffer_is_host(src->buffer)) {
12221229
memcpy(dst->data, src->data, (ggml_nbytes(src)));
12231230
return true;
@@ -1468,7 +1475,6 @@ static enum ggml_status ggml_backend_tsavorite_graph_compute(ggml_backend_t back
14681475
static void ggml_backend_tsavorite_set_n_cb(ggml_backend_t backend, int n_cb) {
14691476
// GGML_ASSERT(ggml_backend_is_tsavorite(backend));
14701477
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1471-
14721478
struct ggml_backend_tsavorite_context *ctx =
14731479
(struct ggml_backend_tsavorite_context *)backend->context;
14741480

@@ -1508,6 +1514,7 @@ static struct ggml_backend_i ggml_backend_tsavorite_i = {
15081514

15091515
static ggml_guid_t ggml_backend_tsavorite_guid(void) {
15101516
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1517+
15111518
static ggml_guid guid = {0x81, 0xa1, 0x8b, 0x1e, 0x71, 0xec, 0x79, 0xed,
15121519
0x2b, 0x85, 0xdc, 0x8a, 0x61, 0x98, 0x30, 0xe6};
15131520
GGML_TSAVORITE_LOG_INFO("End %s\n", __func__);
@@ -1518,13 +1525,11 @@ static ggml_guid_t ggml_backend_tsavorite_guid(void) {
15181525
ggml_backend_t ggml_backend_tsavorite_init(void) {
15191526
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
15201527
ggml_backend_dev_t dev = ggml_backend_reg_dev_get(ggml_backend_tsavorite_reg(), 0);
1521-
15221528
struct ggml_backend_tsavorite_context *ctx = ggml_tsavorite_init(dev);
15231529
if (ctx == NULL) {
15241530
GGML_TSAVORITE_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
15251531
return NULL;
15261532
}
1527-
15281533
ggml_backend_t backend = (ggml_backend_t)malloc(sizeof(struct ggml_backend));
15291534
if (backend) {
15301535
backend->guid = ggml_backend_tsavorite_guid();
@@ -1550,7 +1555,6 @@ void ggml_backend_tsavorite_set_abort_callback(ggml_backend_t backend,
15501555
void *user_data) {
15511556
GGML_ASSERT(ggml_backend_is_tsavorite(backend));
15521557
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1553-
15541558
struct ggml_backend_tsavorite_context *ctx =
15551559
(struct ggml_backend_tsavorite_context *)backend->context;
15561560

@@ -1562,7 +1566,6 @@ void ggml_backend_tsavorite_set_abort_callback(ggml_backend_t backend,
15621566
void ggml_backend_tsavorite_capture_next_compute(ggml_backend_t backend) {
15631567
GGML_ASSERT(ggml_backend_is_tsavorite(backend));
15641568
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1565-
15661569
struct ggml_backend_tsavorite_context *ctx =
15671570
(struct ggml_backend_tsavorite_context *)backend->context;
15681571
ctx->capture_next_compute = true;
@@ -1582,6 +1585,7 @@ static const char *ggml_backend_tsavorite_device_get_name(ggml_backend_dev_t dev
15821585
static const char *ggml_backend_tsavorite_device_get_description(ggml_backend_dev_t dev) {
15831586
// acq/rel just to populate ctx->name in case it hasn't been done yet
15841587
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1588+
15851589
struct ggml_backend_tsavorite_device_context *ctx_dev =
15861590
(struct ggml_backend_tsavorite_device_context *)dev->context;
15871591
ggml_backend_tsavorite_device_acq(ctx_dev);
@@ -1594,7 +1598,6 @@ static const char *ggml_backend_tsavorite_device_get_description(ggml_backend_de
15941598
static void ggml_backend_tsavorite_device_get_memory(ggml_backend_dev_t dev, size_t *free,
15951599
size_t *total) {
15961600
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1597-
15981601
if (!dev || !free || !total) {
15991602
GGML_TSAVORITE_LOG_INFO("One of more pointers(dev, free, total) are NULL\n");
16001603
GGML_TSAVORITE_LOG_INFO("End %s\n", __func__);
@@ -1632,6 +1635,7 @@ static enum ggml_backend_dev_type ggml_backend_tsavorite_device_get_type(ggml_ba
16321635
static void ggml_backend_tsavorite_device_get_props(ggml_backend_dev_t dev,
16331636
struct ggml_backend_dev_props *props) {
16341637
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1638+
16351639
props->name = ggml_backend_tsavorite_device_get_name(dev);
16361640
props->description = ggml_backend_tsavorite_device_get_description(dev);
16371641
props->type = ggml_backend_tsavorite_device_get_type(dev);
@@ -1650,6 +1654,7 @@ static void ggml_backend_tsavorite_device_get_props(ggml_backend_dev_t dev,
16501654
static ggml_backend_t ggml_backend_tsavorite_device_init(ggml_backend_dev_t dev,
16511655
const char *params) {
16521656
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1657+
16531658
struct ggml_backend_tsavorite_context *ctx = ggml_tsavorite_init(dev);
16541659
if (ctx == NULL) {
16551660
GGML_TSAVORITE_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
@@ -1763,6 +1768,7 @@ static ggml_backend_buffer_t ggml_backend_tsavorite_device_buffer_from_ptr(ggml_
17631768
static bool ggml_backend_tsavorite_device_supports_op(ggml_backend_dev_t dev,
17641769
const struct ggml_tensor *op) {
17651770
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1771+
17661772
struct ggml_backend_tsavorite_device_context *ctx_dev =
17671773
(struct ggml_backend_tsavorite_device_context *)dev->context;
17681774

@@ -1789,7 +1795,7 @@ static bool ggml_backend_tsavorite_device_offload_op(ggml_backend_dev_t dev,
17891795
if (op->type != GGML_TYPE_F32)
17901796
return false;
17911797
switch (op->op) {
1792-
// case GGML_OP_NONE:
1798+
case GGML_OP_NONE:
17931799
case GGML_OP_ADD:
17941800
case GGML_OP_SUB:
17951801
case GGML_OP_DIV:
@@ -1885,14 +1891,13 @@ ggml_backend_reg_t ggml_backend_tsavorite_reg(void) {
18851891
ggml_tsavorite_log_type_val = GGML_TSAVORITE_LOG_NONE;
18861892
ggml_tsavorite_kernel_mode_flag = GGML_TSAVORITE_KERNEL_MODE_MLIR;
18871893
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
1894+
ensure_tsi_runtime_initialized();
18881895
g_ggml_backend_tsavorite_reg.iface = ggml_backend_tsavorite_reg_i;
18891896
g_ggml_backend_tsavorite_reg.context = NULL;
1890-
18911897
g_ggml_backend_tsavorite_device.iface = ggml_backend_tsavorite_device_i;
18921898
g_ggml_backend_tsavorite_device.reg = &g_ggml_backend_tsavorite_reg;
18931899
g_ggml_backend_tsavorite_device.context = &g_ggml_ctx_dev_main;
18941900
GGML_TSAVORITE_LOG_INFO("End %s\n", __func__);
1895-
18961901
return &g_ggml_backend_tsavorite_reg;
18971902
}
18981903

src/llama-kv-cache.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,14 @@ llama_kv_cache::llama_kv_cache(
109109

110110
ggml_backend_buffer_type_t buft = ggml_backend_cpu_buffer_type();
111111

112-
if (offload) {
113-
auto * dev = model.dev_layer(il);
114-
buft = ggml_backend_dev_buffer_type(dev);
115-
116-
dev_name = ggml_backend_dev_name(dev);
117-
}
112+
// Always use CPU for KV cache tensors to avoid issues with operations
113+
// that are not supported by the offloaded backend (e.g., SET_ROWS)
114+
// if (offload) {
115+
// auto * dev = model.dev_layer(il);
116+
// buft = ggml_backend_dev_buffer_type(dev);
117+
//
118+
// dev_name = ggml_backend_dev_name(dev);
119+
// }
118120

119121
LLAMA_LOG_DEBUG("%s: layer %3d: dev = %s\n", __func__, il, dev_name);
120122

0 commit comments

Comments
 (0)