Skip to content

Commit f104678

Browse files
committed
common, examples, llama : optimize using reserve if possible
1 parent 0d41771 commit f104678

File tree

8 files changed

+25
-2
lines changed

8 files changed

+25
-2
lines changed

common/sampling.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,8 @@ static llama_token llama_sampling_sample_impl(
200200
}
201201

202202
cur.clear();
203-
203+
cur.reserve(n_vocab);
204+
204205
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
205206
cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
206207
}

common/train.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -883,9 +883,11 @@ size_t tokenize_file(
883883

884884
// generate sample starts at all token positions
885885
out_samples_begin.clear();
886+
size_t end = (out_tokens.size() >= context_length) ? (out_tokens.size() - context_length) : 0;
887+
out_samples_begin.reserve(end);
886888
out_samples_begin.push_back(0);
889+
out_samples_size.reserve(end);
887890
out_samples_size.push_back(std::min((size_t) context_length, out_tokens.size()));
888-
size_t end = (out_tokens.size() >= context_length) ? (out_tokens.size() - context_length) : 0;
889891
for (size_t sample_begin = 1; sample_begin < end; ++sample_begin) {
890892
out_samples_begin.push_back(sample_begin);
891893
out_samples_size.push_back(context_length);

examples/llava/clip.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,6 +1473,7 @@ static std::vector<clip_image_u8*> divide_to_patches_u8(const clip_image_u8 & im
14731473
std::vector<clip_image_u8*> patches;
14741474
int width = image.nx;
14751475
int height = image.ny;
1476+
patches.reserve((height / patch_size) * (width / patch_size));
14761477
for (int i = 0; i < height; i += patch_size) {
14771478
for (int j = 0; j < width; j += patch_size) {
14781479
clip_image_u8 *patch = clip_image_u8_init();
@@ -1542,6 +1543,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
15421543
if (params.image_grid_pinpoints[0] != 0) {
15431544
// "spatial_unpad" with "anyres" processing for llava-1.6
15441545
std::vector<std::pair<int, int>> possible_resolutions;
1546+
possible_resolutions.reserve(16);
15451547
for (int i = 0; i < 32 && params.image_grid_pinpoints[i] != 0; i+=2) {
15461548
possible_resolutions.push_back({params.image_grid_pinpoints[i], params.image_grid_pinpoints[i+1]});
15471549
}

examples/llava/llava.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
262262
const int32_t * image_grid = clip_image_grid(ctx_clip);
263263

264264
std::vector<std::pair<int, int>> grid_pinpoints;
265+
grid_pinpoints.reserve(16);
265266
for (int i = 0; i < 32 && image_grid[i] != 0; i += 2) {
266267
grid_pinpoints.push_back({image_grid[i], image_grid[i+1]});
267268
}

examples/lookup/lookup.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ int main(int argc, char ** argv){
181181
const int startIdx = i + ngram_size;
182182
const int endIdx = startIdx + n_draft;
183183
if (endIdx < inp_size) {
184+
draft.reserve(endIdx - startIdx);
184185
for (int j = startIdx; j < endIdx; ++j) {
185186
LOG(" - draft candidate %d: %d\n", j, inp[j]);
186187
draft.push_back(inp[j]);

examples/perplexity/perplexity.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -876,10 +876,12 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
876876
// Compute log-probs in parallel
877877
// First we collect all tasks
878878
eval_pairs.clear();
879+
eval_pairs.reserve((i1 - i0) * 4);
879880
for (size_t i = i0; i < i1; ++i) {
880881
auto & hs_cur = hs_data[i];
881882
size_t li = hs_cur.common_prefix;
882883
for (int s = 0; s < 4; ++s) {
884+
eval_pairs.reserve((hs_cur.seq_tokens[s].size() - 1) - hs_cur.common_prefix);
883885
for (size_t j = hs_cur.common_prefix; j < hs_cur.seq_tokens[s].size() - 1; j++) {
884886
eval_pairs.emplace_back(hs_cur.i_batch + li++, hs_cur.seq_tokens[s][j + 1]);
885887
}
@@ -1148,6 +1150,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
11481150
}
11491151

11501152
eval_pairs.clear();
1153+
eval_pairs.reserve((i1 - i0));
11511154
for (size_t i = i0; i < i1; ++i) {
11521155
auto & task = data[i];
11531156

@@ -1158,12 +1161,14 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
11581161
const auto& n_base1 = skip_choice ? task.n_base1 : task.common_prefix;
11591162
const int last_1st = task.seq_tokens[0].size() - n_base1 > 1 ? 1 : 0;
11601163
size_t li = n_base1 - 1;
1164+
eval_pairs.reserve((task.seq_tokens[0].size() - 1 - last_1st) - (n_base1 - 1));
11611165
for (size_t j = n_base1-1; j < task.seq_tokens[0].size()-1-last_1st; ++j) {
11621166
eval_pairs.emplace_back(task.i_batch + li++, task.seq_tokens[0][j+1]);
11631167
}
11641168
const auto& n_base2 = skip_choice ? task.n_base2 : task.common_prefix;
11651169
const int last_2nd = task.seq_tokens[1].size() - n_base2 > 1 ? 1 : 0;
11661170
li = task.seq_tokens[0].size() - task.common_prefix + n_base2 - 1;
1171+
eval_pairs.reserve((task.seq_tokens[1].size() - 1 - last_2nd) - (n_base2 - 1));
11671172
for (size_t j = n_base2-1; j < task.seq_tokens[1].size()-1-last_2nd; ++j) {
11681173
eval_pairs.emplace_back(task.i_batch + li++, task.seq_tokens[1][j+1]);
11691174
}
@@ -1519,10 +1524,13 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
15191524
// Compute log-probs in parallel
15201525
// First we collect all tasks
15211526
eval_pairs.clear();
1527+
eval_pairs.reserve(i1 - i0);
15221528
for (size_t i = i0; i < i1; ++i) {
15231529
auto& cur_task = tasks[i];
15241530
size_t li = cur_task.common_prefix;
1531+
eval_pairs.reserve(cur_task.seq_tokens.size());
15251532
for (int s = 0; s < int(cur_task.seq_tokens.size()); ++s) {
1533+
eval_pairs.reserve((cur_task.seq_tokens[s].size() - 1) - cur_task.common_prefix);
15261534
for (size_t j = cur_task.common_prefix; j < cur_task.seq_tokens[s].size() - 1; j++) {
15271535
eval_pairs.emplace_back(cur_task.i_batch + li++, cur_task.seq_tokens[s][j + 1]);
15281536
}

examples/quantize/quantize.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
4949
static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) {
5050
std::string ftype_str;
5151

52+
ftype_str.reserve(ftype_str_in.size());
5253
for (auto ch : ftype_str_in) {
5354
ftype_str.push_back(std::toupper(ch));
5455
}

llama.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,7 @@ struct llama_mmap {
11071107

11081108
// update the list of mapped fragments to avoid unmapping the same range again in the destructor
11091109
std::vector<std::pair<size_t, size_t>> new_mapped_fragments;
1110+
new_mapped_fragments.reserve(mapped_fragments.size());
11101111
for (const auto & frag : mapped_fragments) {
11111112
if (frag.first < first && frag.second > last) {
11121113
// the range is in the middle of the fragment, split it
@@ -7908,6 +7909,7 @@ struct llm_tokenizer_spm {
79087909
// split string into utf8 chars
79097910
int index = 0;
79107911
size_t offs = 0;
7912+
symbols.reserve(text.size());
79117913
while (offs < text.size()) {
79127914
llm_symbol sym;
79137915
size_t len = utf8_len(text[offs]);
@@ -8065,6 +8067,7 @@ struct llm_tokenizer_bpe {
80658067
int index = 0;
80668068
size_t offset = 0;
80678069

8070+
symbols.reserve(word.size());
80688071
while (offset < word.size()) {
80698072
llm_symbol sym;
80708073
size_t char_len = std::min(word.size() - offset, (size_t) ::utf8_len(word[offset]));
@@ -8138,6 +8141,7 @@ struct llm_tokenizer_bpe {
81388141
const auto token = vocab.token_to_id.find(str);
81398142

81408143
if (token == vocab.token_to_id.end()) {
8144+
output.reserve(str.end() - str.begin());
81418145
for (auto j = str.begin(); j != str.end(); ++j) {
81428146
std::string byte_str(1, *j);
81438147
auto token_multibyte = vocab.token_to_id.find(byte_str);
@@ -8309,6 +8313,7 @@ struct llm_tokenizer_bpe {
83098313
}
83108314
}
83118315

8316+
bpe_encoded_words.reserve(bpe_words.size());
83128317
for (std::string & word : bpe_words) {
83138318
std::string encoded_token = "";
83148319
for (char & c : word) {
@@ -10194,6 +10199,7 @@ static void llama_convert_tensor_internal(
1019410199
size_t in_buff_offs = 0;
1019510200
size_t out_buff_offs = 0;
1019610201

10202+
workers.reserve(nthread);
1019710203
for (int tnum = 0; tnum < nthread; tnum++) {
1019810204
size_t thr_blocks = blocks_per_thread + (tnum == nthread - 1 ? spare_blocks : 0); // num blocks for this thread
1019910205
size_t thr_elems = thr_blocks * block_size; // number of elements for this thread
@@ -10697,6 +10703,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1069710703
first_row * n_per_row, this_nrow, n_per_row, local_hist.data(), imatrix);
1069810704
}
1069910705
};
10706+
workers.reserve(nthread_use - 1);
1070010707
for (int it = 0; it < nthread_use - 1; ++it) {
1070110708
workers.emplace_back(compute);
1070210709
}

0 commit comments

Comments
 (0)