@@ -699,9 +699,9 @@ struct whisper_kv_cache {
699699 struct ggml_tensor * k;
700700 struct ggml_tensor * v;
701701
702- struct ggml_context * ctx = nullptr ;
703-
704702 ggml_backend_buffer_t buffer = nullptr ;
703+
704+ std::vector<uint8_t > ctx_buf;
705705};
706706
707707struct whisper_model {
@@ -941,9 +941,11 @@ static bool whisper_kv_cache_init(
941941 const int64_t n_mem = n_text_layer*n_ctx;
942942 const int64_t n_elements = n_text_state*n_mem;
943943
944+ cache.ctx_buf .resize (2 *ggml_tensor_overhead ());
945+
944946 struct ggml_init_params params = {
945- /* .mem_size =*/ 2 * ggml_tensor_overhead (),
946- /* .mem_buffer =*/ nullptr ,
947+ /* .mem_size =*/ cache. ctx_buf . size (),
948+ /* .mem_buffer =*/ cache. ctx_buf . data () ,
947949 /* .no_alloc =*/ true ,
948950 };
949951
@@ -953,31 +955,31 @@ static bool whisper_kv_cache_init(
953955 cache.cells .clear ();
954956 cache.cells .resize (n_ctx);
955957
956- cache. ctx = ggml_init (params);
958+ struct ggml_context * ctx = ggml_init (params);
957959
958- if (!cache. ctx ) {
960+ if (!ctx) {
959961 WHISPER_LOG_ERROR (" %s: failed to allocate memory for the kv cache context\n " , __func__);
960962 return false ;
961963 }
962964
963- cache.k = ggml_new_tensor_1d (cache. ctx , wtype, n_elements);
964- cache.v = ggml_new_tensor_1d (cache. ctx , wtype, n_elements);
965+ cache.k = ggml_new_tensor_1d (ctx, wtype, n_elements);
966+ cache.v = ggml_new_tensor_1d (ctx, wtype, n_elements);
965967
966- cache.buffer = ggml_backend_alloc_ctx_tensors (cache. ctx , backend);
968+ cache.buffer = ggml_backend_alloc_ctx_tensors (ctx, backend);
967969 if (!cache.buffer ) {
968970 WHISPER_LOG_ERROR (" %s: failed to allocate memory for the kv cache\n " , __func__);
969971 return false ;
970972 }
971973
972974 ggml_backend_buffer_clear (cache.buffer , 0 );
973975
976+ ggml_free (ctx);
977+
974978 return true ;
975979}
976980
977981static void whisper_kv_cache_free (struct whisper_kv_cache & cache) {
978- ggml_free (cache.ctx );
979982 ggml_backend_buffer_free (cache.buffer );
980- cache.ctx = nullptr ;
981983}
982984
983985static bool whisper_kv_cache_find_slot (
@@ -2002,7 +2004,7 @@ static struct ggml_cgraph * whisper_build_graph_encoder(
20022004
20032005 auto & kv_pad = wstate.kv_pad ;
20042006
2005- WHISPER_ASSERT (!!kv_pad.ctx );
2007+ WHISPER_ASSERT (!!kv_pad.buffer );
20062008
20072009 const int n_ctx_pad = GGML_PAD (n_ctx, 256 );
20082010
@@ -2416,7 +2418,7 @@ static struct ggml_cgraph * whisper_build_graph_decoder(
24162418
24172419 auto & kv_self = wstate.kv_self ;
24182420
2419- WHISPER_ASSERT (!!kv_self.ctx );
2421+ WHISPER_ASSERT (!!kv_self.buffer );
24202422
24212423 const int n_ctx = kv_self.size ;
24222424 const int n_state = hparams.n_text_state ;
0 commit comments