|
13 | 13 | #include <vector> |
14 | 14 | #include <ctime> |
15 | 15 |
|
| 16 | +#define LLAMA_ASSERT(condition, ...) { \ |
| 17 | + if (!condition) { \ |
| 18 | + LOG_ERR(__VA_ARGS__); \ |
| 19 | + return 1; \ |
| 20 | + } \ |
| 21 | +} |
| 22 | + |
16 | 23 | // trim whitespace from the beginning and end of a string |
17 | 24 | static std::string trim(const std::string & str) { |
18 | 25 | size_t start = 0; |
@@ -188,6 +195,9 @@ int main(int argc, char ** argv) { |
188 | 195 | { |
189 | 196 | LOG_INF("%s: Evaluating the system prompt ...\n", __func__); |
190 | 197 |
|
| 198 | + LLAMA_ASSERT((batch.n_tokens + n_tokens_system < n_ctx), |
| 199 | + "%s: Unable to add system tokens (%d tokens) to batch due to context overflow. " |
| 200 | + "Consider increasing context size (%d).\n" , __func__, n_tokens_system, n_ctx); |
191 | 201 | for (int32_t i = 0; i < n_tokens_system; ++i) { |
192 | 202 | llama_batch_add(batch, tokens_system[i], i, { 0 }, false); |
193 | 203 | } |
@@ -223,6 +233,9 @@ int main(int argc, char ** argv) { |
223 | 233 |
|
224 | 234 | client.i_batch = batch.n_tokens; |
225 | 235 |
|
| 236 | + LLAMA_ASSERT((batch.n_tokens + 1 < n_ctx), |
| 237 | + "%s: Unable to add client %d's sampled token to batch due to context overflow. " |
| 238 | + "Consider increasing context size (Found: %d).\n", __func__, client.id, n_ctx); |
226 | 239 | llama_batch_add(batch, client.sampled, n_tokens_system + client.n_prompt + client.n_decoded, { client.id + 1 }, true); |
227 | 240 |
|
228 | 241 | client.n_decoded += 1; |
@@ -258,7 +271,11 @@ int main(int argc, char ** argv) { |
258 | 271 | std::vector<llama_token> tokens_prompt; |
259 | 272 | tokens_prompt = ::llama_tokenize(ctx, client.prompt, false); |
260 | 273 |
|
261 | | - for (size_t i = 0; i < tokens_prompt.size(); ++i) { |
| 274 | + size_t n_tokens_prompt = tokens_prompt.size(); |
| 275 | + LLAMA_ASSERT((batch.n_tokens + n_tokens_prompt < n_ctx), |
| 276 | + "%s: Unable to add client %d's prompt tokens (%d tokens) to batch due to context overflow. " |
| 277 | + "Consider increasing context size (Found: %d).\n", __func__, client.id, n_tokens_prompt, n_ctx); |
| 278 | + for (size_t i = 0; i < n_tokens_prompt; ++i) { |
262 | 279 | llama_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id + 1 }, false); |
263 | 280 | } |
264 | 281 |
|
|
0 commit comments