Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
**Issues:**

- https://github.com/openmpf/openmpf/issues/1 < Example
- https://github.com/openmpf/openmpf/issues/2 < Example

If a related issue doesn't exist, then create one first and assign it to yourself.

**Related PRs:**

- https://github.com/openmpf/openmpf/pull/22 < Example
- https://github.com/openmpf/openmpf/pull/53 < Example

Please review our [Contributor Guide](https://openmpf.github.io/docs/site/Contributor-Guide/index.html).
7 changes: 5 additions & 2 deletions cpp/TesseractOCRTextDetection/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
This repository contains source code and model data for the OpenMPF Tesseract
OCR text detection component.

The component extracts text found in an image, reported as a single track
detection. PDF documents can also be processed with one track detection per
The component extracts text found in an image, video, or generic document.
Image results are reported as a single track
detection [per specified language setting](#detecting-multiple-languages).
Video results are reported as single track detections per frame and language setting.
PDF documents can also be processed with one track detection per
page. The first page corresponds to the detection property `PAGE_NUM=1`. For
debugging purposes, images converted from documents are stored in a temporary
job directory under `plugin/TesseractOCR/tmp-[job-id]-[random tag]`. This
Expand Down
552 changes: 310 additions & 242 deletions cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.cpp

Large diffs are not rendered by default.

32 changes: 20 additions & 12 deletions cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ namespace MPF {

class TessApiWrapper;

class TesseractOCRTextDetection : public MPFImageDetectionComponentAdapter {
class TesseractOCRTextDetection : public MPFDetectionComponent {

public:
bool Init() override;
Expand All @@ -71,6 +71,10 @@ namespace MPF {

std::vector<MPFGenericTrack> GetDetections(const MPFGenericJob &job) override;

std::vector<MPFVideoTrack> GetDetections(const MPFVideoJob &job) override;

std::vector<MPFAudioTrack> GetDetections(const MPFAudioJob &job) override;

std::string GetDetectionType() override;

bool Supports(MPFDetectionDataType data_type) override;
Expand Down Expand Up @@ -158,13 +162,11 @@ namespace MPF {

struct Image_results{
std::vector<OCR_output> detections_by_lang;
MPFDetectionError job_status;
};

struct OCR_results {
std::string text_result;
std::string lang;
MPFDetectionError job_status;
double confidence;
};

Expand All @@ -180,7 +182,6 @@ namespace MPF {

struct PDF_page_results {
std::set<std::string> all_missing_languages;
MPFDetectionError job_status;
std::vector<MPFGenericTrack> *tracks;
};

Expand All @@ -201,7 +202,12 @@ namespace MPF {
}
};

bool process_ocr_text(Properties &detection_properties, const MPFImageJob &job, const OCR_output &ocr_out,
std::vector<MPFImageLocation> process_image_job(const MPFJob &job,
TesseractOCRTextDetection::OCR_filter_settings &ocr_fset,
cv::Mat &image_data,
const std::string &run_dir);

bool process_ocr_text(Properties &detection_properties, const MPFJob &job, const OCR_output &ocr_out,
const TesseractOCRTextDetection::OCR_filter_settings &ocr_fset,
int page_num = -1);

Expand All @@ -222,29 +228,32 @@ namespace MPF {
static void process_parallel_image_runs(OCR_job_inputs &inputs, Image_results &results);
static void process_serial_image_runs(OCR_job_inputs &inputs, Image_results &results);

void preprocess_image(const MPFImageJob &job, cv::Mat &input_image, const OCR_filter_settings &ocr_fset);
void rescale_image(const MPFImageJob &job, cv::Mat &input_image, const OCR_filter_settings &ocr_fset);
void preprocess_image(const MPFJob &job, cv::Mat &input_image, const OCR_filter_settings &ocr_fset);
void rescale_image(const MPFJob &job, cv::Mat &input_image, const OCR_filter_settings &ocr_fset);

static void process_tesseract_lang_model(OCR_job_inputs &input, OCR_results &result);

void set_default_parameters();

void set_read_config_parameters();

void load_settings(const MPFJob &job, OCR_filter_settings &ocr_fset, const Text_type &text_type = Unknown);
void load_settings(const MPFJob &job, OCR_filter_settings &ocr_fset);
void load_image_preprocessing_settings(const MPFJob &job,
OCR_filter_settings &ocr_fset,
const Text_type &text_type = Unknown);

void sharpen(cv::Mat &image, double weight);

static std::string process_osd_lang(const std::string &script_type,
const OCR_filter_settings &ocr_fset);

void get_OSD(OSBestResult &best_result, cv::Mat &imi, const MPFImageJob &job,
void get_OSD(OSBestResult &best_result, cv::Mat &imi, const MPFJob &job,
OCR_filter_settings &ocr_fset,
Properties &detection_properties,
std::string &tessdata_script_dir, std::set<std::string> &missing_languages);

bool get_OSD_rotation(OSResults *results, cv::Mat &imi_scaled, cv::Mat &imi_original,
int &rotation, const MPFImageJob &job, OCR_filter_settings &ocr_fset);
int &rotation, const MPFJob &job, OCR_filter_settings &ocr_fset);

static std::string return_valid_tessdir(const std::string &job_name,
const std::string &lang_str,
Expand All @@ -265,8 +274,7 @@ namespace MPF {

void check_default_languages(const OCR_filter_settings &ocr_fset,
const std::string &job_name,
const std::string &run_dir,
MPFDetectionError &job_status);
const std::string &run_dir);
};

// The primary reason this class exists is that tesseract::TessBaseAPI segfaults when copying or moving.
Expand Down
44 changes: 36 additions & 8 deletions cpp/TesseractOCRTextDetection/sample_tesseract_ocr_detector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ using std::to_string;
void print_usage(char *argv[]) {

std::cout << "Usage: " << argv[0] <<
" <-i | -g> [--osd] [--oem TESSERACT_OEM] <IMAGE_URI | GENERIC_URI> [TESSERACT_LANGUAGE]" <<
" <-i | -v | -g> [--osd] [--oem TESSERACT_OEM] <IMAGE_URI | VIDEO_URI <START_FRAME> <END_FRAME> | GENERIC_URI> [TESSERACT_LANGUAGE]" <<
std::endl << std::endl;
std::cout << "Notes: " << std::endl << std::endl;
std::cout << " -i | -g : Specifies whether to process an image (-i <IMAGE_URI>) or generic document (-g <GENERIC_URI>)." <<
std::cout << " <-i | -v | -g> : Specifies whether to process an image (-i <IMAGE_URI>), video (-v <VIDEO_URI> <START_FRAME> <END_FRAME>), or generic document (-g <GENERIC_URI>)." <<
std::endl << std::endl;
std::cout << " --osd : When provided, runs the job with automatic orientation and script detection (OSD). " <<
std::endl;
Expand Down Expand Up @@ -102,8 +102,8 @@ bool check_options(const std::string &next_option, const int &argc, char *argv[
if (next_option == "--osd") {
algorithm_properties["ENABLE_OSD_AUTOMATION"] = "true";
uri_index++;
} else if (next_option == "--oem" || argc - uri_index > 2) {
std::cout << "Updating OEM MODE " << argv[uri_index + 1];
} else if (next_option == "--oem" && argc - uri_index > 2) {
std::cout << "Updating OEM MODE " << argv[uri_index + 1] << std::endl;
algorithm_properties["TESSERACT_OEM"] = argv[uri_index + 1];
uri_index += 2;
} else {
Expand Down Expand Up @@ -131,18 +131,30 @@ int main(int argc, char *argv[]) {
algorithm_properties["SHARPEN"] = "1.0";
algorithm_properties["ENABLE_OSD_AUTOMATION"] = "false";

int uri_index = 2;
int uri_index = 2, video_params = 0, start_frame = 0, end_frame = 1;

std::string next_option = std::string(argv[uri_index]);
if (check_options(next_option, argc, argv, algorithm_properties, uri_index)) {
next_option = std::string(argv[uri_index]);
check_options(next_option, argc, argv, algorithm_properties, uri_index);
}

if (argc - uri_index == 1) {
if (media_option == "-v") {
video_params = 2;
if (argc - uri_index < 3) {
print_usage(argv);
return 0;

}
start_frame = std::stoi(argv[uri_index+1]);
end_frame = std::stoi(argv[uri_index+2]);
}

if (argc - uri_index - video_params == 1) {
uri = argv[uri_index];
} else if (argc - uri_index == 2) {
} else if (argc - uri_index - video_params == 2) {
uri = argv[uri_index];
algorithm_properties["TESSERACT_LANGUAGE"] = argv[uri_index + 1];
algorithm_properties["TESSERACT_LANGUAGE"] = argv[uri_index + video_params + 1];
} else {
print_usage(argv);
return 0;
Expand Down Expand Up @@ -176,6 +188,22 @@ int main(int argc, char *argv[]) {
print_detection_properties(locations[i].detection_properties, locations[i].confidence);
}
}
else if (media_option == "-v") {
// Run uri as an image data file.
std::cout << "Running job on video data uri: " << uri << std::endl;
MPFVideoJob job(job_name, uri, start_frame, end_frame, algorithm_properties, media_properties);
int count = 0;
for (auto track: im.GetDetections(job)) {
std::cout << "Track number: " << count << std::endl;
std::map<int, MPFImageLocation> locations = track.frame_locations;
std::cout << "Number of image locations: " << locations.size() << std::endl << std::endl;
for (const auto &location: locations) {
std::cout << "Frame number: " << location.first << std::endl;
print_detection_properties(location.second.detection_properties, location.second.confidence);
}
count ++;
}
}
else {
print_usage(argv);
}
Expand Down
13 changes: 13 additions & 0 deletions cpp/TesseractOCRTextDetection/test/data/NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,19 @@ Custom generated pdf for testing document text extraction.
# test-backslash.png
Custom generated image for testing escaped backslash tagging.

# test-video-detection.avi
Short clip of three separate image frames for testing video detection capability.
Contains public domain text from the following sources:

https://en.wikipedia.org/wiki/Diazepam
(Japanese Translation)
Public Domain

http://www.un.org/en/universal-declaration-human-rights/
English text from the Universal
Declaration of Human Rights.
Public Domain

# text-demo.png
Text extracted from open source project https://github.com/tesseract-ocr/tesseract.

Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,25 @@ MPFGenericJob createPDFJob(const std::string &uri, const std::map<std::string, s
return job;
}

/**
* Helper function for creating a video job
* @param uri - Path to existing input media.
* @param start - Video start frame.
* @param end - Video end frame.
* @param custom - Custom job algorithm properties.
*
* @return - An MPF video job with the specified algorithm properties.
*/
MPFVideoJob createVideoJob(const std::string &uri, const int &start, const int &end,
const std::map<std::string, std::string> &custom = {}) {
Properties algorithm_properties;
Properties media_properties;
std::string job_name("OCR_test");
setAlgorithmProperties(algorithm_properties, custom);
MPFVideoJob job(job_name, uri, start, end, algorithm_properties, media_properties);
return job;
}

/**
* Helper function for running given image job. Checks if job results is not empty.
*
Expand Down Expand Up @@ -136,6 +155,27 @@ void runDocumentDetection(const std::string &doc_path, TesseractOCRTextDetection
ASSERT_FALSE(generic_tracks.empty());
}


/**
* Helper function for running given video job. Checks if job results is not empty.
*
* @param vid_path - Path of given video.
* @param ocr - TesseractOCRTextDetection component for running given job.
* @param video_tracks - Output vector of video detection tracks for given job.
* @param start - Video start frame.
* @param end - Video end frame.
* @param custom - Mapping of input job properties.
*/
void runVideoDetection(const std::string &vid_path, TesseractOCRTextDetection &ocr,
std::vector<MPFVideoTrack> &video_tracks,
const int &start, const int &end,
const std::map<std::string, std::string> &custom = {}) {
MPFVideoJob job = createVideoJob(vid_path, start, end, custom);
video_tracks = ocr.GetDetections(job);
ASSERT_FALSE(video_tracks.empty());
}


/**
* Helper function for checking if running given image job will return no results.
*
Expand Down Expand Up @@ -485,6 +525,40 @@ TEST(TESSERACTOCR, CustomModelTest) {
ASSERT_TRUE(ocr.Close());
}

TEST(TESSERACTOCR, VideoProcessingTest) {

// Ensure video processing works as expected.

TesseractOCRTextDetection ocr;
ocr.SetRunDirectory("../plugin");
std::vector<MPFVideoTrack> track_results;
std::vector<MPFImageLocation> results;
ASSERT_TRUE(ocr.Init());

std::map<std::string,std::string> custom_properties = {{"TESSERACT_LANGUAGE", "eng"},
{"ENABLE_OSD_AUTOMATION","TRUE"}};

ASSERT_NO_FATAL_FAILURE(runVideoDetection("data/test-video-detection.avi", ocr, track_results, 0, 2, custom_properties));

for (auto track_result: track_results) {
for (auto result: track_result.frame_locations) {
results.push_back(result.second);
}
}

assertInImage("data/test-video-detection.avi", "Testing Text Detection", results, "TEXT", 0);
assertInImage("data/test-video-detection.avi", "eng", results, "TEXT_LANGUAGE", 0);

assertInImage("data/test-video-detection.avi", "Japanese", results, "OSD_PRIMARY_SCRIPT", 1);
assertInImage("data/test-video-detection.avi", "Japanese", results, "MISSING_LANGUAGE_MODELS", 1);

assertInImage("data/test-video-detection.avi", "All human beings", results, "TEXT", 2);
assertInImage("data/test-video-detection.avi", "Latin", results, "TEXT_LANGUAGE", 2);


ASSERT_TRUE(ocr.Close());
}

TEST(TESSERACTOCR, ImageProcessingTest) {

// Ensure contrast and unstructured image processing settings are enabled.
Expand Down Expand Up @@ -546,8 +620,6 @@ TEST(TESSERACTOCR, ImageProcessingTest) {
ASSERT_TRUE(ocr.Close());
}



TEST(TESSERACTOCR, ModelTest) {

// Ensure user can specify custom model directory locations.
Expand Down
28 changes: 22 additions & 6 deletions cpp/TrtisDetection/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,16 @@ set(PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/plugin/${PROJECT_NAME})
message("Package in ${PACKAGE_DIR}")

find_package(OpenCV 4.5.0 EXACT REQUIRED PATHS /opt/opencv-4.5.0
COMPONENTS opencv_core)
COMPONENTS opencv_core)
find_package(mpfComponentInterface REQUIRED)
find_package(mpfDetectionComponentApi REQUIRED)
find_package(mpfComponentUtils REQUIRED)
find_package(Qt4 REQUIRED)
find_package(request REQUIRED)

set(CMAKE_PREFIX_PATH /opt/triton)
find_package(CURL REQUIRED)
find_package(TritonCommon PATH_SUFFIXES 64 REQUIRED)
find_package(TRITON REQUIRED)

set(BUILD_SHARED_LIBS ON) # make AWS use shared linking
find_package(AWSSDK REQUIRED COMPONENTS core s3)
Expand All @@ -57,13 +60,26 @@ set(TRTIS_DETECTION_SOURCE_FILES
S3FeatureStorage.cpp S3FeatureStorage.h S3StorageUtil.cpp S3StorageUtil.h base64.h uri.h)

add_library(mpfTrtisDetection SHARED ${TRTIS_DETECTION_SOURCE_FILES})
target_link_libraries(mpfTrtisDetection request mpfComponentInterface mpfDetectionComponentApi mpfComponentUtils
${OpenCV_LIBS} ${PROTOBUF_LIBRARY} ${CURL_LIBRARIES} ${AWSSDK_LINK_LIBRARIES})
target_link_libraries(mpfTrtisDetection
mpfComponentInterface
mpfDetectionComponentApi
mpfComponentUtils
TRITON::grpcclient_static
${OpenCV_LIBS}
${PROTOBUF_LIBRARY}
${CURL_LIBRARIES}
${AWSSDK_LINK_LIBRARIES})
# add seperate dynamic libcurl for aws
target_link_libraries(mpfTrtisDetection /usr/local/lib64/libcurl.so)


configure_mpf_component(TrtisDetection TARGETS mpfTrtisDetection)

add_subdirectory(test)

# Build sample executable
add_executable(sample_trtis_detector sample_trtis_detector.cpp)
target_link_libraries(sample_trtis_detector mpfTrtisDetection)
include_directories(/home/mpf/mpf-sdk-install/include /opt/triton/include)
add_executable(sample_trtis_detector
sample_trtis_detector.cpp)
target_link_libraries(sample_trtis_detector
mpfTrtisDetection)
Loading