Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 7 additions & 12 deletions src/ccstruct/imagedata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

#include <cinttypes> // for PRId64
#include <fstream> // for std::ifstream
#include <thread> // for std::this_thread

namespace tesseract {

Expand Down Expand Up @@ -389,9 +390,6 @@ DocumentData::DocumentData(const std::string &name)
reader_(nullptr) {}

DocumentData::~DocumentData() {
if (thread.joinable()) {
thread.join();
}
std::lock_guard<std::mutex> lock_p(pages_mutex_);
std::lock_guard<std::mutex> lock_g(general_mutex_);
for (auto data : pages_) {
Expand Down Expand Up @@ -438,9 +436,8 @@ void DocumentData::AddPageToDocument(ImageData *page) {
set_memory_used(memory_used() + page->MemoryUsed());
}

// If the given index is not currently loaded, loads it using a separate
// thread.
void DocumentData::LoadPageInBackground(int index) {
// If the given index is not currently loaded, loads it.
void DocumentData::LoadPage(const int index) {
ImageData *page = nullptr;
if (IsPageAvailable(index, &page)) {
return;
Expand All @@ -456,9 +453,7 @@ void DocumentData::LoadPageInBackground(int index) {
}
pages_.clear();
}
if (thread.joinable()) {
thread.join();
}

// Don't run next statement asynchronously because that would
// create too many threads on Linux (see issue #3111).
ReCachePages();
Expand All @@ -474,7 +469,7 @@ const ImageData *DocumentData::GetPage(int index) {
bool needs_loading = pages_offset_ != index;
pages_mutex_.unlock();
if (needs_loading) {
LoadPageInBackground(index);
LoadPage(index);
}
// We can't directly load the page, or the background load will delete it
// while the caller is using it, so give it a chance to work.
Expand Down Expand Up @@ -713,7 +708,7 @@ const ImageData *DocumentCache::GetPageRoundRobin(int serial) {
for (int offset = 1; offset <= kMaxReadAhead && offset < num_docs; ++offset) {
doc_index = (serial + offset) % num_docs;
int page = (serial + offset) / num_docs;
documents_[doc_index]->LoadPageInBackground(page);
documents_[doc_index]->LoadPage(page);
}
return doc;
}
Expand Down Expand Up @@ -770,7 +765,7 @@ const ImageData *DocumentCache::GetPageSequential(int serial) {
}
int next_index = (doc_index + 1) % num_docs;
if (!documents_[next_index]->IsCached() && total_memory < max_memory_) {
documents_[next_index]->LoadPageInBackground(0);
documents_[next_index]->LoadPage(0);
}
return doc;
}
Expand Down
30 changes: 14 additions & 16 deletions src/ccstruct/imagedata.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include "points.h" // for FCOORD

#include <mutex> // for std::mutex
#include <thread> // for std::thread

struct Pix;

Expand Down Expand Up @@ -202,18 +201,20 @@ class DocumentData {
std::lock_guard<std::mutex> lock(general_mutex_);
return memory_used_;
}
// If the given index is not currently loaded, loads it using a separate
// thread. Note: there are 4 cases:
// Document uncached: IsCached() returns false, total_pages_ < 0.
// Required page is available: IsPageAvailable returns true. In this case,
// total_pages_ > 0 and
// pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size()
// Pages are loaded, but the required one is not.
// The requested page is being loaded by LoadPageInBackground. In this case,
// index == pages_offset_. Once the loading starts, the pages lock is held
// until it completes, at which point IsPageAvailable will unblock and return
// true.
void LoadPageInBackground(int index);

// If the given index is not currently loaded, loads it.
// Note: there are 4 cases:
// - Document uncached: IsCached() returns false, total_pages_ < 0.
// - Required page is available: IsPageAvailable returns true. In this case,
// total_pages_ > 0 and
// pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size()
// - Pages are loaded, but the required one is not.
// - The requested page is being loaded by LoadPage. In this case,
// index == pages_offset_. Once the loading starts, the pages lock is
// held until it completes, at which point IsPageAvailable will unblock
// and return true.
void LoadPage(const int index);

// Returns a pointer to the page with the given index, modulo the total
// number of pages. Blocks until the background load is completed.
TESS_API
Expand Down Expand Up @@ -275,9 +276,6 @@ class DocumentData {
// Mutex that protects other data members that callers want to access without
// waiting for a load operation.
mutable std::mutex general_mutex_;

// Thread which loads document.
std::thread thread;
};

// A collection of DocumentData that knows roughly how much memory it is using.
Expand Down
Loading