@@ -89,16 +89,17 @@ struct log_scope {
8989static void test_llama_kv_cache_unified_constructor () {
9090 auto model = _make_model ();
9191 llama_kv_cache_unified cache (
92- /* model */ *model,
93- /* filter */ nullptr ,
94- /* type_k */ GGML_TYPE_F32,
95- /* type_v */ GGML_TYPE_F16,
96- /* v_trans */ false ,
97- /* offload */ false ,
98- /* kv_size */ 10 ,
99- /* padding */ 10 ,
100- /* n_swa */ 0 ,
101- /* swa_type */ LLAMA_SWA_TYPE_NONE
92+ /* model */ *model,
93+ /* filter */ nullptr ,
94+ /* type_k */ GGML_TYPE_F32,
95+ /* type_v */ GGML_TYPE_F16,
96+ /* v_trans */ false ,
97+ /* offload */ false ,
98+ /* kv_size */ 10 ,
99+ /* n_seq_max */ 1 ,
100+ /* padding */ 10 ,
101+ /* n_swa */ 0 ,
102+ /* swa_type */ LLAMA_SWA_TYPE_NONE
102103 );
103104}
104105
@@ -113,11 +114,11 @@ static void test_llama_kv_cache_unified_single_seq() {
113114 /* v_trans */ false ,
114115 /* offload */ false ,
115116 /* kv_size */ 10 ,
117+ /* n_seq_max */ 1 ,
116118 /* padding */ 10 ,
117119 /* n_swa */ 0 ,
118120 /* swa_type */ LLAMA_SWA_TYPE_NONE
119121 );
120- GGML_ASSERT (cache.get_used_cells () == 0 );
121122
122123 // Create the micro batch with a single 3-token sequence
123124 //
@@ -155,12 +156,13 @@ static void test_llama_kv_cache_unified_single_seq() {
155156static void test_llama_kv_cache_recurrent_constructor () {
156157 auto model = _make_model (LLM_ARCH_MAMBA);
157158 llama_kv_cache_recurrent cache (
158- /* model */ *model,
159- /* filter */ nullptr ,
160- /* type_k */ GGML_TYPE_F32,
161- /* type_v */ GGML_TYPE_F16,
162- /* offload */ false ,
163- /* kv_size */ 10
159+ /* model */ *model,
160+ /* filter */ nullptr ,
161+ /* type_k */ GGML_TYPE_F32,
162+ /* type_v */ GGML_TYPE_F16,
163+ /* offload */ false ,
164+ /* kv_size */ 10 ,
165+ /* n_seq_max */ 1
164166 );
165167}
166168
@@ -195,27 +197,29 @@ static void test_llama_kv_cache_hybrid_constructor() {
195197
196198 std::unique_ptr<llama_kv_cache_unified> u_cache (
197199 new llama_kv_cache_unified (
198- /* model */ *model,
199- /* filter */ unified_filter,
200- /* type_k */ GGML_TYPE_F32,
201- /* type_v */ GGML_TYPE_F16,
202- /* v_trans */ false ,
203- /* offload */ false ,
204- /* kv_size */ 10 ,
205- /* padding */ 10 ,
206- /* n_swa */ 0 ,
207- /* swa_type */ LLAMA_SWA_TYPE_NONE
200+ /* model */ *model,
201+ /* filter */ unified_filter,
202+ /* type_k */ GGML_TYPE_F32,
203+ /* type_v */ GGML_TYPE_F16,
204+ /* v_trans */ false ,
205+ /* offload */ false ,
206+ /* kv_size */ 10 ,
207+ /* n_seq_max */ 1 ,
208+ /* padding */ 10 ,
209+ /* n_swa */ 0 ,
210+ /* swa_type */ LLAMA_SWA_TYPE_NONE
208211 )
209212 );
210213 auto * u_cache_ptr = u_cache.get ();
211214 std::unique_ptr<llama_kv_cache_recurrent> r_cache (
212215 new llama_kv_cache_recurrent (
213- /* model */ *model,
214- /* filter */ recurrent_filter,
215- /* type_k */ GGML_TYPE_F32,
216- /* type_v */ GGML_TYPE_F16,
217- /* offload */ false ,
218- /* kv_size */ 10
216+ /* model */ *model,
217+ /* filter */ recurrent_filter,
218+ /* type_k */ GGML_TYPE_F32,
219+ /* type_v */ GGML_TYPE_F16,
220+ /* offload */ false ,
221+ /* kv_size */ 10 ,
222+ /* n_seq_max */ 1
219223 )
220224 );
221225 auto * r_cache_ptr = r_cache.get ();
0 commit comments