Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions deploy/cpp_infer/cli.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ std::tuple<PaddleOCRParams, DocPreprocessorParams,
DocImgOrientationClassificationParams, TextImageUnwarpingParams,
TextDetectionParams, TextLineOrientationClassificationParams,
TextRecognitionParams>
GetPipelineMoudleParams() {
GetPipelineModuleParams() {
PaddleOCRParams ocr_params;
DocPreprocessorParams doc_pre_params;
DocImgOrientationClassificationParams doc_orient_params;
Expand Down Expand Up @@ -181,6 +181,10 @@ GetPipelineMoudleParams() {
if (!FLAGS_text_rec_score_thresh.empty()) {
ocr_params.text_rec_score_thresh = std::stof(FLAGS_text_rec_score_thresh);
}
if (!FLAGS_return_word_box.empty()) {
ocr_params.return_word_box = Utility::StringToBool(FLAGS_return_word_box);
rec_params.return_word_box = Utility::StringToBool(FLAGS_return_word_box);
}
if (!FLAGS_text_rec_input_shape.empty()) {
ocr_params.text_rec_input_shape =
YamlConfig::SmartParseVector(FLAGS_text_rec_input_shape).vec_int;
Expand Down Expand Up @@ -280,7 +284,7 @@ int main(int argc, char *argv[]) {
"<pipeline_or_module> [--param1] [--param2] [...]");
exit(-1);
}
auto params = GetPipelineMoudleParams();
auto params = GetPipelineModuleParams();
using PredFunc = std::function<std::vector<std::unique_ptr<BaseCVResult>>(
const std::string &)>;
std::unordered_map<std::string, PredFunc> pred_map = {
Expand Down
1 change: 1 addition & 0 deletions deploy/cpp_infer/src/api/models/text_recognition.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ TextRecPredictorParams TextRecognition::ToTextRecognitionModelParams(
COPY_PARAMS(model_dir)
COPY_PARAMS(batch_size)
COPY_PARAMS(input_shape)
COPY_PARAMS(return_word_box)
COPY_PARAMS(vis_font_dir)
COPY_PARAMS(device)
COPY_PARAMS(enable_mkldnn)
Expand Down
1 change: 1 addition & 0 deletions deploy/cpp_infer/src/api/models/text_recognition.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct TextRecognitionParams {
int cpu_threads = 8;
int batch_size = 1;
absl::optional<std::vector<int>> input_shape = absl::nullopt;
absl::optional<bool> return_word_box = absl::nullopt;
};

class TextRecognition {
Expand Down
1 change: 1 addition & 0 deletions deploy/cpp_infer/src/api/pipelines/ocr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ OCRPipelineParams PaddleOCR::ToOCRPipelineParams(const PaddleOCRParams &from) {
COPY_PARAMS(text_det_input_shape)
COPY_PARAMS(text_rec_score_thresh)
COPY_PARAMS(text_rec_input_shape)
COPY_PARAMS(return_word_box)
COPY_PARAMS(lang)
COPY_PARAMS(ocr_version)
COPY_PARAMS(vis_font_dir)
Expand Down
1 change: 1 addition & 0 deletions deploy/cpp_infer/src/api/pipelines/ocr.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct PaddleOCRParams {
absl::optional<std::vector<int>> text_det_input_shape = absl::nullopt;
absl::optional<float> text_rec_score_thresh = absl::nullopt;
absl::optional<std::vector<int>> text_rec_input_shape = absl::nullopt;
absl::optional<bool> return_word_box = absl::nullopt;
absl::optional<std::string> lang = absl::nullopt;
absl::optional<std::string> ocr_version = absl::nullopt;
absl::optional<std::string> vis_font_dir = absl::nullopt;
Expand Down
137 changes: 130 additions & 7 deletions deploy/cpp_infer/src/common/processors.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <algorithm>
#include <cmath>
#include <codecvt>
#include <numeric>
#include <stdexcept>
#include <unordered_map>
Expand Down Expand Up @@ -636,9 +637,9 @@ std::vector<std::vector<cv::Point2f>> ComponentsProcessor::SortPolyBoxes(
return dt_polys_rank;
}

std::vector<std::array<float, 4>> ComponentsProcessor::ConvertPointsToBoxes(
std::vector<std::array<int, 4>> ComponentsProcessor::ConvertPointsToBoxes(
const std::vector<std::vector<cv::Point2f>> &dt_polys) {
std::vector<std::array<float, 4>> dt_boxes;
std::vector<std::array<int, 4>> dt_boxes;
for (const auto &poly : dt_polys) {
if (poly.empty()) {
continue;
Expand All @@ -658,7 +659,8 @@ std::vector<std::array<float, 4>> ComponentsProcessor::ConvertPointsToBoxes(
if (pt.y > bottom)
bottom = pt.y;
}
dt_boxes.push_back({left, top, right, bottom});
dt_boxes.push_back({static_cast<int>(left), static_cast<int>(top),
static_cast<int>(right), static_cast<int>(bottom)});
}
return dt_boxes;
}
Expand Down Expand Up @@ -773,17 +775,14 @@ CropByPolys::GetPolyRectCrop(const cv::Mat &img,
if (poly.size() < 4)
return absl::InvalidArgumentError(
"Less than 4 points for GetPolyRectCrop.");
// 对Poly和最小外接矩形做IoU判断
std::vector<cv::Point2f> minrect = GetMinAreaRectPoints(poly);
if (minrect.size() != 4)
return absl::InternalError("Failed to get minarea rect.");
double iou = IoU(poly, minrect);
// 若IoU>0.7则返回直接crop,否则可做更复杂处理,如透视矫正,可进一步实现自定义变形矫正
auto crop_result = GetRotateCropImage(img, minrect);
if (!crop_result.ok())
return crop_result.status();
// 测试下如果IoU很高就用直接的最小外接矩形crop,否则复杂矫正(本实现只用直接crop)
// 若需更强几何修复,可集成TPS、ThinPlateSpline或AutoRectifier

return *crop_result;
}

Expand Down Expand Up @@ -824,3 +823,127 @@ double CropByPolys::IoU(const std::vector<cv::Point2f> &poly1,
return 0.0;
return area_inter / area_union;
}

std::vector<ComponentsProcessor::Box>
ComponentsProcessor::SortBoxes(const std::vector<Box> &boxes, float y_thresh) {
struct BoxWithCenter {
Box box;
Point center;
};
std::vector<BoxWithCenter> items;
for (const Box &box : boxes) {
double x = 0, y = 0;
for (const auto &p : box) {
x += p.x;
y += p.y;
}
x /= box.size();
y /= box.size();
items.push_back({box, Point(x, y)});
}
std::sort(items.begin(), items.end(),
[](const BoxWithCenter &a, const BoxWithCenter &b) {
return a.center.y < b.center.y;
});

std::vector<std::vector<BoxWithCenter>> lines;
std::vector<BoxWithCenter> current_line;
double last_y = NAN;
for (const auto &item : items) {
if (std::isnan(last_y) || std::fabs(item.center.y - last_y) < y_thresh) {
current_line.push_back(item);
} else {
lines.push_back(current_line);
current_line.clear();
current_line.push_back(item);
}
last_y = item.center.y;
}
if (!current_line.empty())
lines.push_back(current_line);

std::vector<Box> final_boxes;
for (auto &line : lines) {
std::sort(line.begin(), line.end(),
[](const BoxWithCenter &a, const BoxWithCenter &b) {
return a.center.x < b.center.x;
});
for (const auto &item : line) {
final_boxes.push_back(item.box);
}
}
return final_boxes;
}

std::pair<std::vector<std::wstring>, std::vector<ComponentsProcessor::Box>>
ComponentsProcessor::CalOCRWordBox(
const std::string &rec_str, const Box &box, int col_num,
const std::vector<std::wstring> &word_list,
const std::vector<std::vector<int>> &word_col_list,
const std::vector<std::string> &state_list) {
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
std::wstring text = converter.from_bytes(rec_str);
double bbox_x_start = box[0].x;
double bbox_x_end = box[1].x;
double bbox_y_start = box[0].y;
double bbox_y_end = box[2].y;
double cell_width = (bbox_x_end - bbox_x_start) / col_num;

std::vector<Box> word_box_list;
std::vector<std::wstring> word_box_content_list;
std::vector<double> cn_width_list;
std::vector<int> cn_col_list;
std::wstring word_box_content_cn;

for (size_t idx = 0; idx < word_list.size(); ++idx) {
const std::wstring &word = word_list[idx];
const std::vector<int> &word_col = word_col_list[idx];
const std::string &state = state_list[idx];
if (state == "cn") {
if (word_col.size() != 1) {
double char_seq_length =
(word_col.back() - word_col.front() + 1) * cell_width;
double char_width = char_seq_length / (word_col.size() - 1);
cn_width_list.push_back(char_width);
}
for (int col : word_col)
cn_col_list.push_back(col);
word_box_content_cn += word;
} else {
double cell_x_start = bbox_x_start + word_col.front() * cell_width;
double cell_x_end = bbox_x_start + (word_col.back() + 1) * cell_width;
Box cell = {
Point(cell_x_start, bbox_y_start), Point(cell_x_end, bbox_y_start),
Point(cell_x_end, bbox_y_end), Point(cell_x_start, bbox_y_end)};
word_box_list.push_back(cell);
word_box_content_list.push_back(word);
}
}

if (!cn_col_list.empty()) {
double avg_char_width;
if (!cn_width_list.empty()) {
avg_char_width =
std::accumulate(cn_width_list.begin(), cn_width_list.end(), 0.0) /
cn_width_list.size();
} else {
avg_char_width = (bbox_x_end - bbox_x_start) / rec_str.size();
}
for (int center_idx : cn_col_list) {
double center_x = (center_idx + 0.5) * cell_width;
double cell_x_start =
std::max(center_x - avg_char_width / 2, 0.0) + bbox_x_start;
double cell_x_end =
std::min(center_x + avg_char_width / 2, bbox_x_end - bbox_x_start) +
bbox_x_start;
Box cell = {
Point(cell_x_start, bbox_y_start), Point(cell_x_end, bbox_y_start),
Point(cell_x_end, bbox_y_end), Point(cell_x_start, bbox_y_end)};
word_box_list.push_back(cell);
}
word_box_content_list.push_back(word_box_content_cn);
}

std::vector<Box> sorted_word_box_list = SortBoxes(word_box_list, 12.0);
return {word_list, sorted_word_box_list};
}
16 changes: 15 additions & 1 deletion deploy/cpp_infer/src/common/processors.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,27 @@ class ToBatch : public BaseProcessor {

class ComponentsProcessor {
public:
// using Point = std::array<double, 2>;
// using Box = std::array<Point, 4> ;
using Point = cv::Point2f;
using Box = std::vector<Point>;

static absl::StatusOr<cv::Mat> RotateImage(const cv::Mat &image, int angle);
static std::vector<std::vector<cv::Point2f>>
SortQuadBoxes(const std::vector<std::vector<cv::Point2f>> &dt_polys);
static std::vector<std::vector<cv::Point2f>>
SortPolyBoxes(const std::vector<std::vector<cv::Point2f>> &dt_polys);
static std::vector<std::array<float, 4>>
static std::vector<std::array<int, 4>>
ConvertPointsToBoxes(const std::vector<std::vector<cv::Point2f>> &dt_polys);

static std::vector<Box> SortBoxes(const std::vector<Box> &boxes,
float y_thresh = 10.0);

static std::pair<std::vector<std::wstring>, std::vector<Box>>
CalOCRWordBox(const std::string &rec_str, const Box &box, int col_num,
const std::vector<std::wstring> &word_list,
const std::vector<std::vector<int>> &word_col_list,
const std::vector<std::string> &state_list);
};

class CropByPolys {
Expand Down
46 changes: 41 additions & 5 deletions deploy/cpp_infer/src/modules/text_recognition/predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ TextRecPredictor::TextRecPredictor(const TextRecPredictorParams &params)
absl::Status TextRecPredictor::Build() {
const auto &pre_params = config_.PreProcessOpInfo();
Register<ReadImage>("Read", "BGR"); //******
Register<OCRReisizeNormImg>("ReisizeNorm", params_.input_shape);
rec_image_shape_ =
YamlConfig::SmartParseVector(pre_params.at("RecResizeImg.image_shape"))
.vec_int;
Register<OCRResizeNormImg>("ReisizeNorm", params_.input_shape,
rec_image_shape_);
Register<ToBatchUniform>("ToBatch");
infer_ptr_ = CreateStaticInfer();
const auto &post_params = config_.PostProcessOpInfo();
Expand All @@ -60,6 +64,19 @@ TextRecPredictor::Process(std::vector<cv::Mat> &batch_data) {
exit(-1);
}

std::vector<float> width_list;
for (const auto &img : batch_read.value()) {
double ratio = static_cast<float>(img.cols) / static_cast<float>(img.rows);
width_list.push_back(ratio);
}

std::vector<int> indices(width_list.size());
for (int i = 0; i < indices.size(); ++i)
indices[i] = i;

std::sort(indices.begin(), indices.end(),
[&](int a, int b) { return width_list[a] < width_list[b]; });

auto batch_resize_norm = pre_op_.at("ReisizeNorm")->Apply(batch_read.value());
if (!batch_resize_norm.ok()) {
INFOE(batch_resize_norm.status().ToString().c_str());
Expand All @@ -77,8 +94,26 @@ TextRecPredictor::Process(std::vector<cv::Mat> &batch_data) {
exit(-1);
}

auto ctc_result =
post_op_.at("CTCLabelDecode")->Apply(batch_infer.value()[0]);
int batch_num = batch_sampler_ptr_->BatchSize();
int img_num = batch_data.size();

int imgC = rec_image_shape_[0];
int imgH = rec_image_shape_[1];
int imgW = rec_image_shape_[2];
float max_wh_ratio = static_cast<float>(imgW) / static_cast<float>(imgH);
int end_img_no = std::min(img_num, batch_num);
std::vector<float> wh_ratio_list = {};
for (int ino = 0; ino < end_img_no; ino++) {
int h = batch_read.value()[indices[ino]].size[0];
int w = batch_read.value()[indices[ino]].size[1];
float wh_ratio = static_cast<float>(w) / static_cast<float>(h);
max_wh_ratio = std::max(max_wh_ratio, wh_ratio);
wh_ratio_list.push_back(wh_ratio);
}
auto ctc_result = post_op_.at("CTCLabelDecode")
->Apply(batch_infer.value()[0],
params_.return_word_box.value_or(false),
wh_ratio_list, max_wh_ratio);

if (!ctc_result.ok()) {
INFOE(ctc_result.status().ToString().c_str());
Expand All @@ -94,8 +129,9 @@ TextRecPredictor::Process(std::vector<cv::Mat> &batch_data) {
predictor_result.input_path = input_path_[input_index_];
}
predictor_result.input_image = origin_image[i];
predictor_result.rec_text = ctc_result.value()[i].first;
predictor_result.rec_score = ctc_result.value()[i].second;
predictor_result.rec_text = ctc_result.value()[i].sentence.first;
predictor_result.rec_score = ctc_result.value()[i].sentence.second;
predictor_result.ctc_result = ctc_result.value()[i];
predictor_result.vis_font = params_.vis_font_dir.value_or("");
predictor_result_vec_.push_back(predictor_result);
base_cv_result_ptr_vec.push_back(
Expand Down
3 changes: 3 additions & 0 deletions deploy/cpp_infer/src/modules/text_recognition/predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ struct TextRecPredictorResult {
std::string rec_text = "";
float rec_score = 0.0;
std::string vis_font = "";
CTCLabelDecodeResult ctc_result;
};

struct TextRecPredictorParams {
Expand All @@ -41,6 +42,7 @@ struct TextRecPredictorParams {
int cpu_threads = 8;
int batch_size = 1;
absl::optional<std::vector<int>> input_shape = absl::nullopt;
absl::optional<bool> return_word_box = absl::nullopt;
};

class TextRecPredictor : public BasePredictor {
Expand All @@ -66,4 +68,5 @@ class TextRecPredictor : public BasePredictor {
std::unique_ptr<PaddleInfer> infer_ptr_;
TextRecPredictorParams params_;
int input_index_ = 0;
std::vector<int> rec_image_shape_ = {};
};
Loading
Loading