We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8572b8a commit 19109e4Copy full SHA for 19109e4
fastdeploy/model_executor/layers/attention/ops/append_attention.py
@@ -144,9 +144,9 @@ def append_attention(
144
raise NotImplementedError
145
146
147
-# TODO: merge w/o output append attention after finishing developing sub-graph cudagraph capture
148
-
149
+# TODO: (mengyuan) merge w/o output version append attention after
+# finishing developing sub-graph cudagraph capture to reduce
+# compilation volume
150
def append_attention_with_output(
151
qkv: paddle.Tensor,
152
key_cache: paddle.Tensor,
0 commit comments