We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9673ff4 commit d25473aCopy full SHA for d25473a
src/models/decoder_only.cpp
@@ -26,7 +26,7 @@ DecoderOnly_State::DecoderOnly_State(const DecoderOnly_Model& model, DeviceSpan<
26
27
DeviceSpan<float> DecoderOnly_State::Run(int total_length, DeviceSpan<int32_t>& next_tokens, DeviceSpan<int32_t> next_indices) {
28
size_t num_tokens = next_tokens.size();
29
- const size_t chunk_size = 15;
+ const size_t chunk_size = 1024; // Experimental value
30
31
if (num_tokens > chunk_size) {
32
// Chunking logic for context phase - process in chunks of 512 tokens
0 commit comments