Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ jobs:
- name: Copy files to directory for installer
run: mkdir installer; cp ./x64/Release-Full/ccextractorwinfull.exe ./installer; cp ./x64/Release-Full/*.dll ./installer
working-directory: ./windows
- name: Download tessdata for OCR support
run: |
mkdir -p ./installer/tessdata
# Download English traineddata from tessdata_fast (smaller, faster, good for most use cases)
Invoke-WebRequest -Uri "https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata" -OutFile "./installer/tessdata/eng.traineddata"
# Download OSD (Orientation and Script Detection) for automatic script detection
Invoke-WebRequest -Uri "https://github.com/tesseract-ocr/tessdata_fast/raw/main/osd.traineddata" -OutFile "./installer/tessdata/osd.traineddata"
working-directory: ./windows
- name: install WiX
run: dotnet tool uninstall --global wix; dotnet tool install --global wix --version 6.0.2 && wix extension add -g WixToolset.UI.wixext/6.0.2
- name: Make sure WiX works
Expand Down
74 changes: 72 additions & 2 deletions src/lib_ccx/ocr.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
#include <dirent.h>
#include "ccx_encoders_helpers.h"
#include "ccx_encoders_spupng.h"
#ifdef _WIN32
#include <windows.h>
#elif defined(__APPLE__)
#include <mach-o/dyld.h>
#endif
#include "ocr.h"

struct ocrCtx
Expand Down Expand Up @@ -100,22 +105,87 @@ void delete_ocr(void **arg)
freep(arg);
}

/**
* get_executable_directory
*
* Returns the directory containing the executable.
* Returns a pointer to a static buffer, or NULL on failure.
*/
static const char *get_executable_directory(void)
{
static char exe_dir[1024] = {0};
static int initialized = 0;

if (initialized)
return exe_dir[0] ? exe_dir : NULL;

initialized = 1;

#ifdef _WIN32
char exe_path[MAX_PATH];
DWORD len = GetModuleFileNameA(NULL, exe_path, MAX_PATH);
if (len == 0 || len >= MAX_PATH)
return NULL;

// Find the last backslash and truncate there
char *last_sep = strrchr(exe_path, '\\');
if (last_sep)
{
*last_sep = '\0';
strncpy(exe_dir, exe_path, sizeof(exe_dir) - 1);
exe_dir[sizeof(exe_dir) - 1] = '\0';
}
#elif defined(__linux__)
char exe_path[1024];
ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1);
if (len <= 0)
return NULL;
exe_path[len] = '\0';

char *last_sep = strrchr(exe_path, '/');
if (last_sep)
{
*last_sep = '\0';
strncpy(exe_dir, exe_path, sizeof(exe_dir) - 1);
exe_dir[sizeof(exe_dir) - 1] = '\0';
}
#elif defined(__APPLE__)
char exe_path[1024];
uint32_t size = sizeof(exe_path);
if (_NSGetExecutablePath(exe_path, &size) != 0)
return NULL;

char *last_sep = strrchr(exe_path, '/');
if (last_sep)
{
*last_sep = '\0';
strncpy(exe_dir, exe_path, sizeof(exe_dir) - 1);
exe_dir[sizeof(exe_dir) - 1] = '\0';
}
#endif

return exe_dir[0] ? exe_dir : NULL;
}

/**
* probe_tessdata_location
*
* This function probe tesseract data location
*
* Priority of Tesseract traineddata file search paths:-
* 1. tessdata in TESSDATA_PREFIX, if it is specified. Overrides others
* 2. tessdata in current working directory
* 3. tessdata in /usr/share
* 2. tessdata in executable directory (for bundled tessdata)
* 3. tessdata in current working directory
* 4. tessdata in system locations (/usr/share, etc.)
* 5. tessdata in default Tesseract install location (Windows)
*/
char *probe_tessdata_location(const char *lang)
{
int ret = 0;

const char *paths[] = {
getenv("TESSDATA_PREFIX"),
get_executable_directory(),
"./",
"/usr/share/",
"/usr/local/share/",
Expand Down
11 changes: 11 additions & 0 deletions windows/installer.wxs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
<MediaTemplate EmbedCab="yes"/>
<Feature Id="CCX" Title="CCExtractor Setup" Level="1">
<ComponentGroupRef Id="CCX_Components_MainFolder"/>
<ComponentGroupRef Id="CCX_Components_tessdata"/>
<ComponentGroupRef Id="CCX_Components_MainFolder_data"/>
<ComponentGroupRef Id="CCX_Components_MainFolder_data_flutter_assets"/>
<ComponentGroupRef Id="CCX_Components_MainFolder_data_flutter_assets_assets"/>
Expand Down Expand Up @@ -40,6 +41,7 @@
</StandardDirectory>
<StandardDirectory Id="ProgramFiles6432Folder">
<Directory Id="INSTALLFOLDER" Name="CCExtractor">
<Directory Id="CCX_tessdata" Name="tessdata"/>
<Directory Id="CCX_data" Name="data">
<Directory Id="CCX_data_flutter_assets" Name="flutter_assets">
<Directory Id="CCX_data_flutter_assets_assets" Name="assets"/>
Expand Down Expand Up @@ -194,6 +196,15 @@
<File Id="filWmUnmaximize" KeyPath="yes" Source="./installer/data/flutter_assets/packages/window_manager/images/ic_chrome_unmaximize.png"/>
</Component>
</ComponentGroup>
<!-- Tesseract OCR data files for HardSubx feature -->
<ComponentGroup Id="CCX_Components_tessdata" Directory="CCX_tessdata">
<Component Id="cmpTessdataEng" Guid="{A1234567-8901-2345-6789-0123456789AB}">
<File Id="filEngTraineddata" KeyPath="yes" Source="./installer/tessdata/eng.traineddata"/>
</Component>
<Component Id="cmpTessdataOsd" Guid="{B2345678-9012-3456-7890-123456789ABC}">
<File Id="filOsdTraineddata" KeyPath="yes" Source="./installer/tessdata/osd.traineddata"/>
</Component>
</ComponentGroup>
</Fragment>
</Wix>

Loading