11use super :: { CacheParts , Progress , model:: ModelExec , upos} ;
22use crate :: { batch:: Req , handle:: Handle , load:: load_weight, memory:: MemPages } ;
3+ use cuda:: { DevByte , DevMem , Stream , VirByte } ;
34use nn:: {
45 Distribution , Graph , GraphBuilder , LLaMA , NNGraph , Tensor , TensorMeta , digit_layout:: types, op,
56} ;
6- use operators:: {
7- attention_kv_cached:: cuda:: Operator as Attn ,
8- cuda:: { DevByte , DevMem , Stream , VirByte } ,
9- } ;
107use std:: {
118 collections:: BTreeMap ,
129 num:: { NonZero , NonZeroUsize } ,
@@ -16,7 +13,6 @@ use tokeneer::utok;
1613
1714pub ( crate ) struct ModelGroup < ' ctx > {
1815 internal : Internal < ' ctx > ,
19- attn : Attn ,
2016 pages : MemPages ,
2117 _weight : DevMem < ' ctx > ,
2218}
@@ -36,7 +32,6 @@ impl<'ctx> ModelGroup<'ctx> {
3632
3733 config : ModelGroupConfig < T > ,
3834
39- attn : Attn ,
4035 handle : & mut Handle < ' ctx > ,
4136 barrier : Option < & Barrier > ,
4237 ) -> Self {
@@ -82,7 +77,6 @@ impl<'ctx> ModelGroup<'ctx> {
8277 let models_with_one_dyn = Internal :: new ( graph, static_models, dyn_cache_size) ;
8378 Self {
8479 internal : models_with_one_dyn,
85- attn,
8680 pages,
8781 _weight,
8882 }
@@ -125,10 +119,7 @@ impl<'ctx> ModelGroup<'ctx> {
125119 stream : & Stream < ' ctx > ,
126120 ) -> Tensor < * const VirByte , 2 > {
127121 let Self {
128- internal,
129- attn,
130- pages,
131- ..
122+ internal, pages, ..
132123 } = self ;
133124
134125 let mut reqs = reqs
@@ -142,7 +133,8 @@ impl<'ctx> ModelGroup<'ctx> {
142133 let reqs = reqs
143134 . iter_mut ( )
144135 . map ( |req| {
145- req. cache . update ( req. pos + req. seq , pages) ;
136+ req. cache
137+ . update ( ( req. pos + req. seq ) . div_ceil ( 32 ) * 32 , pages) ;
146138 Req {
147139 cache : req. cache . as_tensor ( ) ,
148140 pos : req. pos ,
@@ -154,7 +146,7 @@ impl<'ctx> ModelGroup<'ctx> {
154146 internal
155147 . get_mut ( & key)
156148 . unwrap ( )
157- . launch ( attn , handle, & reqs, stream)
149+ . launch ( handle, & reqs, stream)
158150 }
159151}
160152
0 commit comments