44
55#include < numeric>
66
7+ #include " mmdeploy/archive/json_archive.h"
78#include " mmdeploy/archive/value_archive.h"
89#include " mmdeploy/codebase/mmaction/mmaction.h"
910#include " mmdeploy/codebase/mmcls/mmcls.h"
@@ -67,15 +68,42 @@ void ConvertDetections(const Value& item, std::vector<Tensor>& tensors) {
6768 " labels" });
6869 auto bboxes_data = bboxes.data <float >();
6970 auto labels_data = labels.data <int32_t >();
71+ int64_t sum_byte_size = 0 ;
7072 for (const auto & det : detections) {
7173 for (const auto & x : det.bbox ) {
7274 *bboxes_data++ = x;
7375 }
7476 *bboxes_data++ = det.score ;
7577 *labels_data++ = det.label_id ;
78+ sum_byte_size += det.mask .byte_size ();
7679 }
7780 tensors.push_back (std::move (bboxes));
7881 tensors.push_back (std::move (labels));
82+ if (sum_byte_size > 0 ) {
83+ // return mask
84+ Tensor masks (TensorDesc{bboxes.device (),
85+ ::mmdeploy::DataType::kINT8 ,
86+ {static_cast <int64_t >(sum_byte_size)},
87+ " masks" });
88+ Tensor offs (TensorDesc{bboxes.device (),
89+ ::mmdeploy::DataType::kINT32 ,
90+ {static_cast <int64_t >(detections.size ()), 3 },
91+ " mask_offs" }); // [(off, w, h), ... ]
92+
93+ auto masks_data = masks.data <int8_t >();
94+ auto offs_data = offs.data <int32_t >();
95+ int sum_offs = 0 ;
96+ for (const auto & det : detections) {
97+ memcpy (masks_data, det.mask .data <int8_t >(), det.mask .byte_size ());
98+ masks_data += det.mask .byte_size ();
99+ *offs_data++ = sum_offs;
100+ *offs_data++ = det.mask .width ();
101+ *offs_data++ = det.mask .height ();
102+ sum_offs += det.mask .byte_size ();
103+ }
104+ tensors.push_back (std::move (masks));
105+ tensors.push_back (std::move (offs));
106+ }
79107}
80108
81109void ConvertSegmentation (const Value& item, std::vector<Tensor>& tensors) {
@@ -105,39 +133,75 @@ void ConvertTextDetections(const Value& item, std::vector<Tensor>& tensors) {
105133 ::mmdeploy::from_value (item, detections);
106134 Tensor bboxes (TensorDesc{::mmdeploy::Device (0 ),
107135 ::mmdeploy::DataType::kFLOAT ,
108- {static_cast <int64_t >(detections.size ()), 9 },
109- " dets" });
136+ {static_cast <int64_t >(detections.size ()), 8 },
137+ " bboxes" });
138+ Tensor scores (TensorDesc{::mmdeploy::Device (0 ),
139+ ::mmdeploy::DataType::kFLOAT ,
140+ {static_cast <int64_t >(detections.size ()), 1 },
141+ " scores" });
110142 auto bboxes_data = bboxes.data <float >();
143+ auto scores_data = scores.data <float >();
111144 for (const auto & det : detections) {
112145 bboxes_data = std::copy (det.bbox .begin (), det.bbox .end (), bboxes_data);
113- *bboxes_data ++ = det.score ;
146+ *scores_data ++ = det.score ;
114147 }
115148 tensors.push_back (std::move (bboxes));
149+ tensors.push_back (std::move (scores));
150+ }
151+
152+ void ConvertTextRecognitions (const Value& item, int request_count,
153+ const std::vector<int >& batch_per_request,
154+ std::vector<std::vector<Tensor>>& tensors,
155+ std::vector<std::string>& strings) {
156+ std::vector<::mmdeploy::mmocr::TextRecognition> recognitions;
157+ ::mmdeploy::from_value (item, recognitions);
158+
159+ int k = 0 ;
160+ for (int i = 0 ; i < request_count; i++) {
161+ int num = batch_per_request[i];
162+ Tensor texts (TensorDesc{
163+ ::mmdeploy::Device (0 ), ::mmdeploy::DataType::kINT32, {static_cast <int64_t >(num)}, " texts" });
164+ Tensor score (TensorDesc{::mmdeploy::Device (0 ),
165+ ::mmdeploy::DataType::kINT32 ,
166+ {static_cast <int64_t >(num)},
167+ " scores" });
168+ auto text_data = texts.data <int32_t >();
169+ auto score_data = score.data <int32_t >();
170+
171+ for (int j = 0 ; j < num; j++) {
172+ auto & recognition = recognitions[k++];
173+ text_data[j] = static_cast <int32_t >(strings.size ());
174+ strings.push_back (recognition.text );
175+ score_data[j] = static_cast <int32_t >(strings.size ());
176+ strings.push_back (::mmdeploy::to_json (::mmdeploy::to_value (recognition.score )).dump ());
177+ }
178+ tensors[i].push_back (std::move (texts));
179+ tensors[i].push_back (std::move (score));
180+ }
116181}
117182
118183void ConvertTextRecognitions (const Value& item, std::vector<Tensor>& tensors,
119184 std::vector<std::string>& strings) {
120185 std::vector<::mmdeploy::mmocr::TextRecognition> recognitions;
121186 ::mmdeploy::from_value (item, recognitions);
187+
122188 Tensor texts (TensorDesc{::mmdeploy::Device (0 ),
123189 ::mmdeploy::DataType::kINT32 ,
124190 {static_cast <int64_t >(recognitions.size ())},
125- " text " });
191+ " rec_texts " });
126192 Tensor score (TensorDesc{::mmdeploy::Device (0 ),
127- ::mmdeploy::DataType::kFLOAT ,
193+ ::mmdeploy::DataType::kINT32 ,
128194 {static_cast <int64_t >(recognitions.size ())},
129- " text_score " });
195+ " rec_scores " });
130196 auto text_data = texts.data <int32_t >();
131- auto score_data = score.data <float >();
132- for (size_t text_id = 0 ; text_id < recognitions.size (); ++text_id) {
133- text_data[text_id] = static_cast <int32_t >(strings.size ());
134- strings.push_back (recognitions[text_id].text );
135- auto & s = recognitions[text_id].score ;
136- if (!s.empty ()) {
137- score_data[text_id] = std::accumulate (s.begin (), s.end (), 0 .f ) / static_cast <float >(s.size ());
138- } else {
139- score_data[text_id] = 0 ;
140- }
197+ auto score_data = score.data <int32_t >();
198+
199+ for (size_t j = 0 ; j < recognitions.size (); j++) {
200+ auto & recognition = recognitions[j];
201+ text_data[j] = static_cast <int32_t >(strings.size ());
202+ strings.push_back (recognition.text );
203+ score_data[j] = static_cast <int32_t >(strings.size ());
204+ strings.push_back (::mmdeploy::to_json (::mmdeploy::to_value (recognition.score )).dump ());
141205 }
142206 tensors.push_back (std::move (texts));
143207 tensors.push_back (std::move (score));
@@ -164,28 +228,47 @@ void ConvertPreprocess(const Value& item, std::vector<Tensor>& tensors,
164228 tensors.push_back (std::move (img_meta_tensor));
165229}
166230
167- void ConvertPoseDetections (const Value& item, std::vector<Tensor>& tensors) {
168- ::mmdeploy::mmpose::PoseDetectorOutput detections;
231+ void ConvertInference (const Value& item, std::vector<Tensor>& tensors) {
232+ for (auto it = item.begin (); it != item.end (); ++it) {
233+ auto tensor = it->get <Tensor>();
234+ auto desc = tensor.desc ();
235+ desc.name = it.key ();
236+ tensors.emplace_back (desc, tensor.buffer ());
237+ }
238+ }
239+
240+ void ConvertPoseDetections (const Value& item, int request_count,
241+ const std::vector<int >& batch_per_request,
242+ std::vector<std::vector<Tensor>>& tensors) {
243+ std::vector<::mmdeploy::mmpose::PoseDetectorOutput> detections;
169244 ::mmdeploy::from_value (item, detections);
170- Tensor pts (TensorDesc{::mmdeploy::Device (0 ),
171- ::mmdeploy::DataType::kFLOAT ,
172- {static_cast <int64_t >(detections.key_points .size ()), 3 },
173- " keypoints" });
174- auto pts_data = pts.data <float >();
175- for (const auto & p : detections.key_points ) {
176- *pts_data++ = p.bbox [0 ];
177- *pts_data++ = p.bbox [1 ];
178- *pts_data++ = p.score ;
245+
246+ int k = 0 ;
247+ for (int i = 0 ; i < request_count; i++) {
248+ int num = batch_per_request[i];
249+ Tensor pts (TensorDesc{::mmdeploy::Device (0 ),
250+ ::mmdeploy::DataType::kFLOAT ,
251+ {num, static_cast <int64_t >(detections[0 ].key_points .size ()), 3 },
252+ " keypoints" });
253+ auto pts_data = pts.data <float >();
254+ for (int j = 0 ; j < num; j++) {
255+ auto & detection = detections[k++];
256+ for (const auto & p : detection.key_points ) {
257+ *pts_data++ = p.bbox [0 ];
258+ *pts_data++ = p.bbox [1 ];
259+ *pts_data++ = p.score ;
260+ }
261+ }
262+ tensors[i].push_back (std::move (pts));
179263 }
180- tensors.push_back ({std::move (pts)});
181264}
182265
183266void ConvertRotatedDetections (const Value& item, std::vector<Tensor>& tensors) {
184267 ::mmdeploy::mmrotate::RotatedDetectorOutput detections;
185268 ::mmdeploy::from_value (item, detections);
186269 Tensor bboxes (TensorDesc{::mmdeploy::Device (0 ),
187270 ::mmdeploy::DataType::kFLOAT ,
188- {static_cast <int64_t >(detections.detections .size ()), 5 },
271+ {static_cast <int64_t >(detections.detections .size ()), 6 },
189272 " bboxes" });
190273 Tensor labels (TensorDesc{::mmdeploy::Device (0 ),
191274 ::mmdeploy::DataType::kINT32 ,
@@ -203,13 +286,19 @@ void ConvertRotatedDetections(const Value& item, std::vector<Tensor>& tensors) {
203286}
204287
205288std::vector<std::vector<Tensor>> ConvertOutputToTensors (const std::string& type,
206- int32_t request_count, const Value& output,
289+ int32_t request_count,
290+ const std::vector<int >& batch_per_request,
291+ const Value& output,
207292 std::vector<std::string>& strings) {
208293 std::vector<std::vector<Tensor>> tensors (request_count);
209294 if (type == " Preprocess" ) {
210295 for (int i = 0 ; i < request_count; ++i) {
211296 ConvertPreprocess (output.front ()[i], tensors[i], strings);
212297 }
298+ } else if (type == " Inference" ) {
299+ for (int i = 0 ; i < request_count; ++i) {
300+ ConvertInference (output.front ()[i], tensors[i]);
301+ }
213302 } else if (type == " Classifier" ) {
214303 for (int i = 0 ; i < request_count; ++i) {
215304 ConvertClassifications (output.front ()[i], tensors[i]);
@@ -231,13 +320,9 @@ std::vector<std::vector<Tensor>> ConvertOutputToTensors(const std::string& type,
231320 ConvertTextDetections (output.front ()[i], tensors[i]);
232321 }
233322 } else if (type == " TextRecognizer" ) {
234- for (int i = 0 ; i < request_count; ++i) {
235- ConvertTextRecognitions (output.front (), tensors[i], strings);
236- }
323+ ConvertTextRecognitions (output.front (), request_count, batch_per_request, tensors, strings);
237324 } else if (type == " PoseDetector" ) {
238- for (int i = 0 ; i < request_count; ++i) {
239- ConvertPoseDetections (output.front ()[i], tensors[i]);
240- }
325+ ConvertPoseDetections (output.front (), request_count, batch_per_request, tensors);
241326 } else if (type == " RotatedDetector" ) {
242327 for (int i = 0 ; i < request_count; ++i) {
243328 ConvertRotatedDetections (output.front ()[i], tensors[i]);
0 commit comments