Skip to content

Commit cbee690

Browse files
committed
内存优化
1 parent c966c4f commit cbee690

File tree

4 files changed

+279
-432
lines changed

4 files changed

+279
-432
lines changed

src/runner/LLM.hpp

Lines changed: 3 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ class LLM
184184

185185
std::vector<int> rets(attr.axmodel_num);
186186
std::atomic<int> process_idx = 2;
187-
#pragma omp parallel for
187+
#pragma omp parallel for if (_attr.dev_ids.size() > 1)
188188
for (int i = 0; i < attr.axmodel_num; i++)
189189
{
190190
char axmodel_path[1024];
@@ -273,9 +273,9 @@ class LLM
273273
{
274274
for (int i = 0; i < _attr.axmodel_num; i++)
275275
{
276-
llama_layers[i].layer.release();
276+
llama_layers[i].layer.deinit();
277277
}
278-
llama_post.release();
278+
llama_post.deinit();
279279

280280
embed_selector.Deinit();
281281

@@ -710,109 +710,6 @@ class LLM
710710
return 0;
711711
}
712712

713-
// int Encode(cv::Mat src, std::vector<unsigned short> &out_embed)
714-
// {
715-
// std::vector<float> mean = {0.485, 0.456, 0.406};
716-
// std::vector<float> scale = {0.229, 0.224, 0.225};
717-
// timer t;
718-
// t.start();
719-
// cv::Mat dst;
720-
// cv::resize(src, dst, cv::Size(_attr.image_encoder_width, _attr.image_encoder_height));
721-
// cv::cvtColor(dst, dst, cv::COLOR_BGR2RGB);
722-
723-
// // std::vector<float> input_data(dst.rows * dst.cols * 3);
724-
725-
// float *input_data = (float *)image_encoder.get_input(0).pVirAddr;
726-
727-
// unsigned char *img_data = dst.data;
728-
// int letterbox_rows = dst.rows;
729-
// int letterbox_cols = dst.cols;
730-
731-
// for (int h = 0; h < letterbox_rows; h++)
732-
// {
733-
// for (int w = 0; w < letterbox_cols; w++)
734-
// {
735-
// for (int c = 0; c < 3; c++)
736-
// {
737-
// int in_index = h * letterbox_cols * 3 + w * 3 + c;
738-
// int out_index = c * letterbox_rows * letterbox_cols + h * letterbox_cols + w;
739-
// input_data[out_index] = (float(img_data[in_index]) / 255.0 - mean[c]) / scale[c];
740-
// }
741-
// }
742-
// }
743-
744-
// // void *data = image_encoder.get_input("input").pVirAddr;
745-
// // memcpy(data, dst.data, dst.rows * dst.cols * 3);
746-
747-
// // std::vector<char> vit_in;
748-
// // if (!read_file("/home/axera/internvl2_5-8b-mpo_ax-infer/img.bin", vit_in))
749-
// // {
750-
// // ALOGE("read img.bin failed");
751-
// // return -1;
752-
// // }
753-
// // memcpy(input_data, vit_in.data(), image_encoder.get_input(0).nSize);
754-
755-
// image_encoder.inference();
756-
// int size = 1;
757-
// for (size_t i = 0; i < image_encoder.get_output(0).vShape.size(); i++)
758-
// {
759-
// size *= image_encoder.get_output(0).vShape[i];
760-
// }
761-
762-
// out_embed.resize(size);
763-
764-
// float *out_data = (float *)image_encoder.get_output(0).pVirAddr;
765-
766-
// for (size_t i = 0; i < size; i++)
767-
// {
768-
// out_embed[i] = bfloat16(out_data[i]).data;
769-
// }
770-
771-
// // memcpy(out_embed.data(), image_encoder.get_output(0).pVirAddr, image_encoder.get_output(0).nSize);
772-
// ALOGI("image encode time : %0.2f ms, size : %ld", t.cost(), out_embed.size());
773-
// return 0;
774-
// }
775-
776-
// int Encode(std::vector<unsigned short> &img_embed, std::vector<unsigned short> &out_embed, std::string prompt = "What is in the image?")
777-
// {
778-
// std::vector<int> input_ids = tokenizer->Encode(prompt, true);
779-
780-
// // constexpr int IMG_CONTEXT = 151648; // InternVL2
781-
// // constexpr int IMG_CONTEXT = 151667; // InternVL2.5
782-
// constexpr int IMG_CONTEXT = 92546; // InternVL2.5-8B-MPO
783-
// int offset = 0;
784-
785-
// for (size_t i = 0; i < input_ids.size(); i++)
786-
// {
787-
// if (input_ids[i] == IMG_CONTEXT)
788-
// {
789-
// offset = i;
790-
// break;
791-
// }
792-
// }
793-
794-
// // for (size_t i = 0; i < input_ids.size(); i++)
795-
// // {
796-
// // printf("%d ", input_ids[i]);
797-
// // }
798-
// // printf("\n");
799-
800-
// if (input_ids.size() > _attr.prefill_token_num)
801-
// {
802-
// ALOGE("input_ids(%ld) > prefill_token_num(%d)", input_ids.size(), _attr.prefill_token_num);
803-
// return -1;
804-
// }
805-
// out_embed.resize(input_ids.size() * _attr.tokens_embed_size);
806-
807-
// for (size_t i = 0; i < input_ids.size(); i++)
808-
// {
809-
// embed_selector.getByIndex(input_ids[i], out_embed.data() + i * _attr.tokens_embed_size);
810-
// }
811-
// memcpy(out_embed.data() + offset * _attr.tokens_embed_size, img_embed.data(), img_embed.size() * sizeof(unsigned short));
812-
// ALOGI("offset : %d out_embed.size() : %ld", offset, out_embed.size());
813-
// return 0;
814-
// }
815-
816713
int Encode(std::vector<unsigned short> &out_embed, std::string prompt, std::string last_reply, std::vector<int> &tokens_ids, std::vector<int> &tokens_diff)
817714
{
818715
if (!tokenizer->Encode(prompt, last_reply, tokens_ids, tokens_diff))

src/runner/ax_model_runner/ax_model_runner.hpp

Lines changed: 53 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,35 @@ class ax_runner_base
5858

5959
int dev_id = 0;
6060

61+
// 辅助函数:初始化完成后构建映射表,提高后续查找速度
62+
void build_tensor_maps()
63+
{
64+
map_input_tensors.clear();
65+
for (const auto &t : minput_tensors)
66+
map_input_tensors[t.sName] = t;
67+
68+
map_output_tensors.clear();
69+
for (const auto &t : moutput_tensors)
70+
map_output_tensors[t.sName] = t;
71+
72+
map_group_input_tensors.clear();
73+
for (const auto &grp : mgroup_input_tensors)
74+
{
75+
for (const auto &t : grp)
76+
map_group_input_tensors[t.sName].push_back(t);
77+
}
78+
79+
map_group_output_tensors.clear();
80+
for (const auto &grp : mgroup_output_tensors)
81+
{
82+
for (const auto &t : grp)
83+
map_group_output_tensors[t.sName].push_back(t);
84+
}
85+
}
86+
6187
public:
6288
virtual int init(const char *model_file, int devid) = 0;
63-
virtual int init(char *model_buffer, size_t model_size) = 0;
89+
virtual int init(char *model_buffer, size_t model_size, int devid) = 0;
6490

6591
virtual void deinit() = 0;
6692

@@ -74,83 +100,51 @@ class ax_runner_base
74100

75101
const ax_runner_tensor_t &get_input(int idx) { return minput_tensors[idx]; }
76102
const ax_runner_tensor_t *get_inputs_ptr() { return minput_tensors.data(); }
77-
const ax_runner_tensor_t &get_input(std::string name)
103+
104+
const ax_runner_tensor_t &get_input(const std::string &name)
78105
{
79-
if (map_input_tensors.size() == 0)
80-
{
81-
for (size_t i = 0; i < minput_tensors.size(); i++)
82-
{
83-
map_input_tensors[minput_tensors[i].sName] = minput_tensors[i];
84-
}
85-
}
86-
if (map_input_tensors.find(name) == map_input_tensors.end())
87-
{
106+
auto it = map_input_tensors.find(name);
107+
if (it == map_input_tensors.end())
88108
throw std::runtime_error("input tensor not found: " + name);
89-
}
90-
91-
return map_input_tensors[name];
109+
return it->second;
92110
}
93111

94112
const ax_runner_tensor_t &get_input(int grpid, int idx) { return mgroup_input_tensors[grpid][idx]; }
95113
const ax_runner_tensor_t *get_inputs_ptr(int grpid) { return mgroup_input_tensors[grpid].data(); }
96-
const ax_runner_tensor_t &get_input(int grpid, std::string name)
114+
115+
const ax_runner_tensor_t &get_input(int grpid, const std::string &name)
97116
{
98-
if (map_group_input_tensors.size() == 0)
99-
{
100-
for (size_t i = 0; i < mgroup_input_tensors.size(); i++)
101-
{
102-
for (size_t j = 0; j < mgroup_input_tensors[i].size(); j++)
103-
{
104-
map_group_input_tensors[mgroup_input_tensors[i][j].sName].push_back(mgroup_input_tensors[i][j]);
105-
}
106-
}
107-
}
108-
if (map_group_input_tensors.find(name) == map_group_input_tensors.end())
109-
{
117+
auto it = map_group_input_tensors.find(name);
118+
if (it == map_group_input_tensors.end())
110119
throw std::runtime_error("input tensor not found: " + name);
111-
}
112-
return map_group_input_tensors[name][grpid];
113-
// return map_input_tensors[name];
120+
// 简单的越界检查
121+
if (grpid < 0 || grpid >= (int)it->second.size())
122+
throw std::runtime_error("group id out of range for: " + name);
123+
return it->second[grpid];
114124
}
115125

116126
const ax_runner_tensor_t &get_output(int idx) { return moutput_tensors[idx]; }
117127
const ax_runner_tensor_t *get_outputs_ptr() { return moutput_tensors.data(); }
118-
const ax_runner_tensor_t &get_output(std::string name)
128+
129+
const ax_runner_tensor_t &get_output(const std::string &name)
119130
{
120-
if (map_output_tensors.size() == 0)
121-
{
122-
for (size_t i = 0; i < moutput_tensors.size(); i++)
123-
{
124-
map_output_tensors[moutput_tensors[i].sName] = moutput_tensors[i];
125-
}
126-
}
127-
if (map_output_tensors.find(name) == map_output_tensors.end())
128-
{
131+
auto it = map_output_tensors.find(name);
132+
if (it == map_output_tensors.end())
129133
throw std::runtime_error("output tensor not found: " + name);
130-
}
131-
132-
return map_output_tensors[name];
134+
return it->second;
133135
}
134136

135137
const ax_runner_tensor_t &get_output(int grpid, int idx) { return mgroup_output_tensors[grpid][idx]; }
136138
const ax_runner_tensor_t *get_outputs_ptr(int grpid) { return mgroup_output_tensors[grpid].data(); }
137-
const ax_runner_tensor_t &get_output(int grpid, std::string name)
139+
140+
const ax_runner_tensor_t &get_output(int grpid, const std::string &name)
138141
{
139-
if (map_group_output_tensors.size() == 0)
140-
{
141-
for (size_t i = 0; i < mgroup_output_tensors.size(); i++)
142-
{
143-
for (size_t j = 0; j < mgroup_output_tensors[i].size(); j++)
144-
{
145-
map_group_output_tensors[mgroup_output_tensors[i][j].sName].push_back(mgroup_output_tensors[i][j]);
146-
}
147-
}
148-
}
149-
if (map_group_output_tensors.find(name) == map_group_output_tensors.end())
150-
{
151-
throw std::runtime_error("input tensor not found: " + name);
152-
}
153-
return map_group_output_tensors[name][grpid];
142+
auto it = map_group_output_tensors.find(name);
143+
if (it == map_group_output_tensors.end())
144+
throw std::runtime_error("output tensor not found: " + name);
145+
if (grpid < 0 || grpid >= (int)it->second.size())
146+
throw std::runtime_error("group id out of range for: " + name);
147+
return it->second[grpid];
154148
}
155149

156150
virtual int get_algo_width() = 0;

0 commit comments

Comments
 (0)