@@ -11,42 +11,53 @@ void TRTFaceFusionFaceRestoration::detect(cv::Mat &face_swap_image, std::vector<
1111
1212 cv::Mat crop_image;
1313 cv::Mat affine_matrix;
14- std::tie (crop_image,affine_matrix) = face_utils::warp_face_by_face_landmark_5 (face_swap_image,target_landmarks_5,face_utils::FFHQ_512);
14+ // 记录时间
15+ auto start_warp = std::chrono::high_resolution_clock::now ();
16+ std::tie (crop_image,affine_matrix) = face_utils::warp_face_by_face_landmark_5 (face_swap_image,target_landmarks_5,
17+ face_utils::FFHQ_512);
1518
1619 std::vector<float > crop_size = {512 ,512 };
1720 cv::Mat box_mask = face_utils::create_static_box_mask (crop_size);
1821 std::vector<cv::Mat> crop_mask_list;
1922 crop_mask_list.emplace_back (box_mask);
2023
21- cv::cvtColor (crop_image,crop_image,cv::COLOR_BGR2RGB);
22- crop_image.convertTo (crop_image,CV_32FC3,1 .f / 255 .f );
23- crop_image.convertTo (crop_image,CV_32FC3,2 .0f ,-1 .f );
24+ cv::Mat crop_image_rgb;
25+ launch_bgr2rgb (crop_image,crop_image_rgb);
26+ crop_image_rgb.convertTo (crop_image_rgb,CV_32FC3,1 .f / 255 .f );
27+ crop_image_rgb.convertTo (crop_image_rgb,CV_32FC3,2 .0f ,-1 .f );
2428
2529 std::vector<float > input_vector;
26- trtcv::utils::transform::create_tensor (crop_image ,input_vector,input_node_dims,trtcv::utils::transform::CHW);
30+ trtcv::utils::transform::create_tensor (crop_image_rgb ,input_vector,input_node_dims,trtcv::utils::transform::CHW);
2731
28- // 拷贝
32+ auto end_warp = std::chrono::high_resolution_clock::now ();
33+ std::chrono::duration<double , std::milli> fp_ms_warp = end_warp - start_warp;
34+ std::cout << " FaceRestoration preprocess time: " << fp_ms_warp.count () << " ms" << std::endl;
2935
36+
37+ // 记录时间
38+ auto start = std::chrono::high_resolution_clock::now ();
3039 // 先不用拷贝了 处理完成再拷贝出来 类似于整个后处理放在GPU上完成
3140 cudaMemcpyAsync (buffers[0 ],input_vector.data (),1 * 3 * 512 * 512 * sizeof (float ),cudaMemcpyHostToDevice,stream);
32-
3341 // 同步
3442 cudaStreamSynchronize (stream);
35-
3643 // 推理
3744 bool status = trt_context->enqueueV3 (stream);
45+
3846 if (!status) {
3947 std::cerr << " Failed to inference" << std::endl;
4048 return ;
4149 }
42-
43-
4450 // 同步
4551 cudaStreamSynchronize (stream);
52+ auto end = std::chrono::high_resolution_clock::now ();
53+ std::chrono::duration<double , std::milli> fp_ms = end - start;
54+ std::cout << " FaceRestoration Inference time: " << fp_ms.count () << " ms" << std::endl;
4655 std::vector<unsigned char > transposed_data (1 * 3 * 512 * 512 );
4756
4857// std::vector<float> transposed_data(1 * 3 * 512 * 512);
4958
59+ // 记录时间
60+ auto start_postprocess = std::chrono::high_resolution_clock::now ();
5061 // 这里buffer1就是输出了
5162 launch_face_restoration_postprocess (
5263 static_cast <float *>(buffers[1 ]),
@@ -64,47 +75,31 @@ void TRTFaceFusionFaceRestoration::detect(cv::Mat &face_swap_image, std::vector<
6475 std::vector<float > output_vector (1 * 3 * 512 * 512 );
6576// cudaMemcpyAsync(output_vector.data(),buffers[1],1 * 3 * 512 * 512 * sizeof(float),cudaMemcpyDeviceToHost,stream);
6677 cudaStreamSynchronize (stream);
67- //
6878 // 后处理
6979 int channel = 3 ;
7080 int height = 512 ;
7181 int width = 512 ;
72- // std::vector<float> output(channel * height * width);
73- // output.assign(output_vector.begin(),output_vector.end());
74- //
75- // std::transform(output.begin(),output.end(),output.begin(),
76- // [](double x){return std::max(-1.0,std::max(-1.0,std::min(1.0,x)));});
77- //
78- // std::transform(output.begin(),output.end(),output.begin(),
79- // [](double x){return (x + 1.f) /2.f;});
80- //
81- // // CHW2HWC
82- // for (int c = 0; c < channel; ++c){
83- // for (int h = 0 ; h < height; ++h){
84- // for (int w = 0; w < width ; ++w){
85- // int src_index = c * (height * width) + h * width + w;
86- // int dst_index = h * (width * channel) + w * channel + c;
87- // transposed_data[dst_index] = output[src_index];
88- // }
89- // }
90- // }
91- //
92- // std::transform(transposed_data.begin(),transposed_data.end(),transposed_data.begin(),
93- // [](float x){return std::round(x * 255.f);});
94- //
95- // std::transform(transposed_data.begin(), transposed_data.end(), transposed_data.begin(),
96- // [](float x) { return static_cast<uint8_t>(x); });
9782
9883
9984 cv::Mat mat (height, width, CV_32FC3, transposed_data_float.data ());
100- // cv::imwrite("/home/lite.ai.toolkit/mid_process.jpg",mat);
10185 cv::cvtColor (mat, mat, cv::COLOR_RGB2BGR);
86+ // 到这里为止基本不耗时
10287
10388
10489 auto crop_mask = crop_mask_list[0 ];
105- cv::Mat paste_frame = face_utils::paste_back (ori_image,mat,crop_mask,affine_matrix);
106-
90+ // 这里的paste_back 40ms左右
91+ cv::Mat paste_frame = launch_paste_back (ori_image,mat,crop_mask,affine_matrix);
92+ // cv::Mat paste_frame = face_utils::paste_back(ori_image,mat,crop_mask,affine_matrix);
10793 cv::Mat dst_image = face_utils::blend_frame (ori_image,paste_frame);
94+ auto end_postprocess = std::chrono::high_resolution_clock::now ();
95+ std::chrono::duration<double , std::milli> fp_ms_postprocess = end_postprocess - start_postprocess;
96+ std::cout << " FaceRestoration postprocess time: " << fp_ms_postprocess.count () << " ms" << std::endl;
10897
98+ // 记录时间
99+ auto start_save = std::chrono::high_resolution_clock::now ();
109100 cv::imwrite (face_enchaner_path,dst_image);
101+ auto end_save = std::chrono::high_resolution_clock::now ();
102+ std::chrono::duration<double , std::milli> fp_ms_save = end_save - start_save;
103+ std::cout << " FaceRestoration save time: " << fp_ms_save.count () << " ms" << std::endl;
104+
110105}
0 commit comments