@@ -67,12 +67,7 @@ bool Segmentation::initialize()
6767 _ortSession = std::make_unique<Ort::Session>(*_ortEnvironment, _parameters.modelWeights .c_str (), ortSessionOptions);
6868 #endif
6969
70- Ort::MemoryInfo memInfoCuda (" Cuda" , OrtAllocatorType::OrtArenaAllocator, 0 , OrtMemType::OrtMemTypeDefault);
71- Ort::Allocator cudaAllocator (*_ortSession, memInfoCuda);
72-
7370 _output.resize (_parameters.classes .size () * _parameters.modelHeight * _parameters.modelWidth );
74- _cudaInput = cudaAllocator.Alloc (_output.size () * sizeof (float ));
75- _cudaOutput = cudaAllocator.Alloc (_output.size () * sizeof (float ));
7671#endif
7772 }
7873 else
@@ -88,18 +83,6 @@ bool Segmentation::initialize()
8883 return true ;
8984}
9085
91- bool Segmentation::terminate ()
92- {
93- #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_ONNX_GPU)
94- Ort::MemoryInfo mem_info_cuda (" Cuda" , OrtAllocatorType::OrtArenaAllocator, 0 , OrtMemType::OrtMemTypeDefault);
95- Ort::Allocator cudaAllocator (*_ortSession, mem_info_cuda);
96- cudaAllocator.Free (_cudaInput);
97- cudaAllocator.Free (_cudaOutput);
98- #endif
99-
100- return true ;
101- }
102-
10386bool Segmentation::processImage (image::Image<IndexT>& labels, const image::Image<image::RGBfColor>& source)
10487{
10588 // Todo : handle orientation and small images smaller than model input
@@ -244,7 +227,7 @@ bool Segmentation::mergeLabels(image::Image<ScoredLabel>& labels, image::Image<S
244227 return true ;
245228}
246229
247- bool Segmentation::labelsFromModelOutput (image::Image<ScoredLabel>& labels, const std::vector< float > & modelOutput)
230+ bool Segmentation::labelsFromOutputTensor (image::Image<ScoredLabel>& labels, Ort::Value & modelOutput)
248231{
249232 for (int outputY = 0 ; outputY < _parameters.modelHeight ; outputY++)
250233 {
@@ -255,10 +238,8 @@ bool Segmentation::labelsFromModelOutput(image::Image<ScoredLabel>& labels, cons
255238
256239 for (int classe = 0 ; classe < _parameters.classes .size (); classe++)
257240 {
258- int classPos = classe * _parameters.modelWidth * _parameters.modelHeight ;
259- int pos = classPos + outputY * _parameters.modelWidth + outputX;
260-
261- float val = modelOutput[pos];
241+ const std::vector<int64_t > coords = {0 ,classe,outputY,outputX};
242+ const float val = modelOutput.At <float >(coords);
262243 if (val > maxVal)
263244 {
264245 maxVal = val;
@@ -281,76 +262,77 @@ bool Segmentation::processTile(image::Image<ScoredLabel>& labels, const image::I
281262 std::vector<const char *> inputNames{" input" };
282263 std::vector<const char *> outputNames{" output" };
283264 std::vector<int64_t > inputDimensions = {1 , 3 , _parameters.modelHeight , _parameters.modelWidth };
284- std::vector<int64_t > outputDimensions = {1 , static_cast <int64_t >(_parameters.classes .size ()), _parameters.modelHeight , _parameters.modelWidth };
285-
286- std::vector<float > output (_parameters.classes .size () * _parameters.modelHeight * _parameters.modelWidth );
287- Ort::Value outputTensors =
288- Ort::Value::CreateTensor<float >(memInfo, output.data (), output.size (), outputDimensions.data (), outputDimensions.size ());
289265
290266 std::vector<float > transformedInput;
291267 imageToPlanes (transformedInput, source);
292268
293269 Ort::Value inputTensors =
294270 Ort::Value::CreateTensor<float >(memInfo, transformedInput.data (), transformedInput.size (), inputDimensions.data (), inputDimensions.size ());
295271
272+ std::vector<Ort::Value> outTensor;
273+
296274 try
297275 {
298- _ortSession->Run (Ort::RunOptions{nullptr }, inputNames.data (), &inputTensors, 1 , outputNames.data (), &outputTensors , 1 );
276+ outTensor = _ortSession->Run (Ort::RunOptions{nullptr }, inputNames.data (), &inputTensors, 1 , outputNames.data (), 1 );
299277 }
300278 catch (const Ort::Exception& exception)
301279 {
302280 ALICEVISION_LOG_ERROR (" ERROR running model inference: " << exception.what ());
303281 return false ;
304282 }
305283
306- if (!labelsFromModelOutput (labels, output ))
284+ if (!labelsFromOutputTensor (labels, outTensor[ 0 ] ))
307285 {
308286 return false ;
309287 }
310288
289+ std::vector<float > output (_parameters.classes .size () * _parameters.modelHeight * _parameters.modelWidth );
290+ auto *outTData = outTensor.front ().GetTensorMutableData <float >();
291+ output.assign (outTData, outTData + _parameters.classes .size () * _parameters.modelHeight * _parameters.modelWidth );
292+
311293 return true ;
312294}
313295
314296bool Segmentation::processTileGPU (image::Image<ScoredLabel>& labels, const image::Image<image::RGBfColor>::Base& source)
315297{
316298 ALICEVISION_LOG_TRACE (" Process tile using gpu" );
317299#if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA)
318- Ort::MemoryInfo mem_info_cuda (" Cuda" , OrtAllocatorType::OrtArenaAllocator, 0 , OrtMemType::OrtMemTypeDefault);
319- Ort::Allocator cudaAllocator (*_ortSession, mem_info_cuda);
300+ Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu (OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
320301
321302 std::vector<const char *> inputNames{" input" };
322303 std::vector<const char *> outputNames{" output" };
323304 std::vector<int64_t > inputDimensions = {1 , 3 , _parameters.modelHeight , _parameters.modelWidth };
324- std::vector<int64_t > outputDimensions = {1 , static_cast <int64_t >(_parameters.classes .size ()), _parameters.modelHeight , _parameters.modelWidth };
325-
326- Ort::Value outputTensors = Ort::Value::CreateTensor<float >(
327- mem_info_cuda, reinterpret_cast <float *>(_cudaOutput), _output.size (), outputDimensions.data (), outputDimensions.size ());
328305
329306 std::vector<float > transformedInput;
330307 imageToPlanes (transformedInput, source);
331308
332- cudaMemcpy (_cudaInput, transformedInput.data (), sizeof (float ) * transformedInput.size (), cudaMemcpyHostToDevice);
309+ std::vector<Ort::Value> inputTensors;
310+ inputTensors.emplace_back (Ort::Value::CreateTensor<float >(memInfo,
311+ transformedInput.data (),
312+ transformedInput.size (),
313+ inputDimensions.data (),
314+ inputDimensions.size ()));
333315
334- Ort::Value inputTensors = Ort::Value::CreateTensor<float >(
335- mem_info_cuda, reinterpret_cast <float *>(_cudaInput), transformedInput.size (), inputDimensions.data (), inputDimensions.size ());
316+ std::vector<Ort::Value> outTensor;
336317
337318 try
338319 {
339- _ortSession->Run (Ort::RunOptions{nullptr }, inputNames.data (), & inputTensors, 1 , outputNames.data (), &outputTensors , 1 );
320+ outTensor = _ortSession->Run (Ort::RunOptions{nullptr }, inputNames.data (), inputTensors. data () , 1 , outputNames.data (), 1 );
340321 }
341322 catch (const Ort::Exception& exception)
342323 {
343324 ALICEVISION_LOG_ERROR (" ERROR running model inference: " << exception.what ());
344325 return false ;
345326 }
346327
347- cudaMemcpy (_output.data (), _cudaOutput, sizeof (float ) * _output.size (), cudaMemcpyDeviceToHost);
348-
349- if (!labelsFromModelOutput (labels, _output))
328+ if (!labelsFromOutputTensor (labels, outTensor[0 ]))
350329 {
351330 return false ;
352331 }
353332
333+ auto *outTData = outTensor.front ().GetTensorMutableData <float >();
334+ _output.assign (outTData, outTData + _parameters.classes .size () * _parameters.modelHeight * _parameters.modelWidth );
335+
354336#endif
355337
356338 return true ;
0 commit comments