We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3936fa9 commit 17d7549Copy full SHA for 17d7549
src/models/decoder_only.cpp
@@ -29,7 +29,7 @@ void DecoderOnly_State::SetExtraInputs(const std::vector<ExtraInput>& extra_inpu
29
30
DeviceSpan<float> DecoderOnly_State::Run(int total_length, DeviceSpan<int32_t>& next_tokens, DeviceSpan<int32_t> next_indices) {
31
size_t num_tokens = next_tokens.size();
32
- const size_t chunk_size = 15;
+ const size_t chunk_size = 1024; // Experimental value
33
34
if (num_tokens > chunk_size) {
35
// Chunking logic for context phase - process in chunks of 512 tokens
0 commit comments