diff --git a/.github/workflows/build_mac.yml b/.github/workflows/build_mac.yml index c81af5327..c654aceac 100644 --- a/.github/workflows/build_mac.yml +++ b/.github/workflows/build_mac.yml @@ -74,6 +74,22 @@ jobs: working-directory: build - name: Display version information run: ./build/ccextractor --version + cmake_ocr_hardsubx: + runs-on: macos-latest + steps: + - uses: actions/checkout@v4 + - name: Install dependencies + run: brew install pkg-config autoconf automake libtool tesseract leptonica gpac ffmpeg + - name: cmake + run: | + mkdir build && cd build + cmake -DWITH_OCR=ON -DWITH_HARDSUBX=ON ../src + - name: build + run: | + make -j$(nproc) + working-directory: build + - name: Display version information + run: ./build/ccextractor --version build_rust: runs-on: macos-latest steps: diff --git a/docs/COMPILATION.MD b/docs/COMPILATION.MD index 67441040b..c51781fe1 100644 --- a/docs/COMPILATION.MD +++ b/docs/COMPILATION.MD @@ -10,6 +10,16 @@ Clone the latest repository from Github git clone https://github.com/CCExtractor/ccextractor.git ``` +### Hardsubx (Burned-in Subtitles) and FFmpeg Versions + +CCExtractor's hardsubx feature extracts burned-in subtitles from videos using OCR. It requires FFmpeg libraries. The build system automatically selects appropriate FFmpeg versions for each platform: + +- **Linux**: FFmpeg 6.x (default) +- **Windows**: FFmpeg 7.x (default) +- **macOS**: FFmpeg 8.x (default) + +You can override the default by setting the `FFMPEG_VERSION` environment variable to `ffmpeg6`, `ffmpeg7`, or `ffmpeg8` before building. This flexibility ensures compatibility with different FFmpeg installations across platforms. + ## Docker You can now use docker image to build latest source of CCExtractor without any environmental hustle. Follow these [instructions](https://github.com/CCExtractor/ccextractor/tree/master/docker/README.md) for building docker image & usage of it. @@ -62,12 +72,22 @@ cd ccextractor/linux # compile with debug info ./build -debug # same as ./builddebug -# compile with hardsubx -[Optional] You need to set these environment variables correctly according to your machine, - FFMPEG_INCLUDE_DIR=/usr/include - FFMPEG_PKG_CONFIG_PATH=/usr/lib/pkgconfig +# compile with hardsubx (burned-in subtitle extraction) +# Hardsubx requires FFmpeg libraries. Different FFmpeg versions are used by default: +# - Linux: FFmpeg 6.x (automatic) +# - Windows: FFmpeg 7.x (automatic) +# - macOS: FFmpeg 8.x (automatic) + +./build -hardsubx # uses platform-specific FFmpeg version -./build -hardsubx # same as ./build_hardsubx +# To override the default FFmpeg version, set FFMPEG_VERSION: +FFMPEG_VERSION=ffmpeg8 ./build -hardsubx # force FFmpeg 8 on any platform +FFMPEG_VERSION=ffmpeg6 ./build -hardsubx # force FFmpeg 6 on any platform +FFMPEG_VERSION=ffmpeg7 ./build -hardsubx # force FFmpeg 7 on any platform + +# [Optional] For custom FFmpeg installations, set these environment variables: +FFMPEG_INCLUDE_DIR=/usr/include +FFMPEG_PKG_CONFIG_PATH=/usr/lib/pkgconfig # compile in debug mode without rust ./build -debug -without-rust @@ -113,9 +133,15 @@ sudo make install `cmake` also accepts the options: `-DWITH_OCR=ON` to enable OCR - `-DWITH_HARDSUBX=ON` to enable burned-in subtitles + `-DWITH_HARDSUBX=ON` to enable burned-in subtitles (requires FFmpeg) -([OPTIONAL] For hardsubx, you also need to set these environment variables correctly according to your machine) +For hardsubx with specific FFmpeg versions: + Set `FFMPEG_VERSION=ffmpeg6` for FFmpeg 6.x + Set `FFMPEG_VERSION=ffmpeg7` for FFmpeg 7.x + Set `FFMPEG_VERSION=ffmpeg8` for FFmpeg 8.x + (Defaults: Linux=FFmpeg 6, Windows=FFmpeg 7, macOS=FFmpeg 8) + +([OPTIONAL] For custom FFmpeg installations, set these environment variables) FFMPEG_INCLUDE_DIR=/usr/include FFMPEG_PKG_CONFIG_PATH=/usr/lib/pkgconfig @@ -136,6 +162,8 @@ brew install cmake gpac # optional if you want OCR: brew install tesseract brew install leptonica +# optional if you want hardsubx (burned-in subtitle extraction): +brew install ffmpeg ``` If configuring OCR, use pkg-config to verify tesseract and leptonica dependencies, e.g. @@ -151,7 +179,12 @@ pkg-config --exists --print-errors lept ```bash cd ccextractor/mac -./build.command # OR ./build.command OCR +./build.command # basic build +./build.command -ocr # build with OCR support +./build.command -hardsubx # build with hardsubx (uses FFmpeg 8 by default on macOS) + +# Override FFmpeg version if needed: +FFMPEG_VERSION=ffmpeg7 ./build.command -hardsubx # test your build ./ccextractor @@ -220,6 +253,12 @@ Other dependencies are required through vcpkg, so you can follow below steps: ``` vcpkg install ffmpeg leptonica tesseract --triplet x64-windows-static ``` + Note: Windows builds use FFmpeg 7 by default. To override: + ``` + set FFMPEG_VERSION=ffmpeg8 + msbuild ccextractor.sln /p:Configuration=Debug-Full /p:Platform=x64 + ``` + otherwise if you have Debug, Release ``` vcpkg install libpng --triplet x64-windows-static diff --git a/linux/build b/linux/build index 022572952..51fb7932f 100755 --- a/linux/build +++ b/linux/build @@ -13,7 +13,12 @@ while [[ $# -gt 0 ]]; do ;; -hardsubx) HARDSUBX=true - RUST_FEATURES="--features hardsubx_ocr" + # Allow overriding FFmpeg version via environment variable + if [ -n "$FFMPEG_VERSION" ]; then + RUST_FEATURES="--features hardsubx_ocr,$FFMPEG_VERSION" + else + RUST_FEATURES="--features hardsubx_ocr" + fi BLD_FLAGS="$BLD_FLAGS -DENABLE_HARDSUBX" BLD_LINKER="$BLD_LINKER -lswscale -lavutil -pthread -lavformat -lavcodec -lavfilter -lxcb-shm -lxcb -lX11 -llzma -lswresample" shift diff --git a/mac/build.command b/mac/build.command index 77013a5e6..e0aab469a 100755 --- a/mac/build.command +++ b/mac/build.command @@ -20,7 +20,13 @@ while [[ $# -gt 0 ]]; do ;; -hardsubx) HARDSUBX=true - RUST_FEATURES="--features hardsubx_ocr" + ENABLE_OCR=true + # Allow overriding FFmpeg version via environment variable + if [ -n "$FFMPEG_VERSION" ]; then + RUST_FEATURES="--features hardsubx_ocr,$FFMPEG_VERSION" + else + RUST_FEATURES="--features hardsubx_ocr" + fi shift ;; -*) @@ -49,6 +55,53 @@ fi BLD_INCLUDE="-I../src/ -I../src/lib_ccx -I../src/lib_hash -I../src/thirdparty/libpng -I../src/thirdparty -I../src/thirdparty/zlib -I../src/thirdparty/freetype/include `pkg-config --cflags --silence-errors gpac`" +# Add FFmpeg include path for Mac +if [[ -d "/opt/homebrew/Cellar/ffmpeg" ]]; then + FFMPEG_VERSION=$(ls -1 /opt/homebrew/Cellar/ffmpeg | head -1) + if [[ -n "$FFMPEG_VERSION" ]]; then + BLD_INCLUDE="$BLD_INCLUDE -I/opt/homebrew/Cellar/ffmpeg/$FFMPEG_VERSION/include" + fi +elif [[ -d "/usr/local/Cellar/ffmpeg" ]]; then + FFMPEG_VERSION=$(ls -1 /usr/local/Cellar/ffmpeg | head -1) + if [[ -n "$FFMPEG_VERSION" ]]; then + BLD_INCLUDE="$BLD_INCLUDE -I/usr/local/Cellar/ffmpeg/$FFMPEG_VERSION/include" + fi +fi + +# Add Leptonica include path for Mac +if [[ -d "/opt/homebrew/Cellar/leptonica" ]]; then + LEPT_VERSION=$(ls -1 /opt/homebrew/Cellar/leptonica | head -1) + if [[ -n "$LEPT_VERSION" ]]; then + BLD_INCLUDE="$BLD_INCLUDE -I/opt/homebrew/Cellar/leptonica/$LEPT_VERSION/include" + fi +elif [[ -d "/usr/local/Cellar/leptonica" ]]; then + LEPT_VERSION=$(ls -1 /usr/local/Cellar/leptonica | head -1) + if [[ -n "$LEPT_VERSION" ]]; then + BLD_INCLUDE="$BLD_INCLUDE -I/usr/local/Cellar/leptonica/$LEPT_VERSION/include" + fi +elif [[ -d "/opt/homebrew/include/leptonica" ]]; then + BLD_INCLUDE="$BLD_INCLUDE -I/opt/homebrew/include" +elif [[ -d "/usr/local/include/leptonica" ]]; then + BLD_INCLUDE="$BLD_INCLUDE -I/usr/local/include" +fi + +# Add Tesseract include path for Mac +if [[ -d "/opt/homebrew/Cellar/tesseract" ]]; then + TESS_VERSION=$(ls -1 /opt/homebrew/Cellar/tesseract | head -1) + if [[ -n "$TESS_VERSION" ]]; then + BLD_INCLUDE="$BLD_INCLUDE -I/opt/homebrew/Cellar/tesseract/$TESS_VERSION/include" + fi +elif [[ -d "/usr/local/Cellar/tesseract" ]]; then + TESS_VERSION=$(ls -1 /usr/local/Cellar/tesseract | head -1) + if [[ -n "$TESS_VERSION" ]]; then + BLD_INCLUDE="$BLD_INCLUDE -I/usr/local/Cellar/tesseract/$TESS_VERSION/include" + fi +elif [[ -d "/opt/homebrew/include/tesseract" ]]; then + BLD_INCLUDE="$BLD_INCLUDE -I/opt/homebrew/include" +elif [[ -d "/usr/local/include/tesseract" ]]; then + BLD_INCLUDE="$BLD_INCLUDE -I/usr/local/include" +fi + if [[ "$ENABLE_OCR" == "true" ]]; then BLD_INCLUDE="$BLD_INCLUDE `pkg-config --cflags --silence-errors tesseract`" fi @@ -109,7 +162,42 @@ if [[ "$ENABLE_OCR" == "true" ]]; then fi if [[ "$HARDSUBX" == "true" ]]; then - BLD_LINKER="$BLD_LINKER -lswscale -lavutil -pthread -lavformat -lavcodec -lavfilter" + # Add FFmpeg library path for Mac + if [[ -d "/opt/homebrew/Cellar/ffmpeg" ]]; then + FFMPEG_VERSION=$(ls -1 /opt/homebrew/Cellar/ffmpeg | head -1) + if [[ -n "$FFMPEG_VERSION" ]]; then + BLD_LINKER="$BLD_LINKER -L/opt/homebrew/Cellar/ffmpeg/$FFMPEG_VERSION/lib" + fi + elif [[ -d "/usr/local/Cellar/ffmpeg" ]]; then + FFMPEG_VERSION=$(ls -1 /usr/local/Cellar/ffmpeg | head -1) + if [[ -n "$FFMPEG_VERSION" ]]; then + BLD_LINKER="$BLD_LINKER -L/usr/local/Cellar/ffmpeg/$FFMPEG_VERSION/lib" + fi + fi + + # Add library paths for Leptonica and Tesseract from Cellar + if [[ -d "/opt/homebrew/Cellar/leptonica" ]]; then + LEPT_VERSION=$(ls -1 /opt/homebrew/Cellar/leptonica | head -1) + if [[ -n "$LEPT_VERSION" ]]; then + BLD_LINKER="$BLD_LINKER -L/opt/homebrew/Cellar/leptonica/$LEPT_VERSION/lib" + fi + fi + + if [[ -d "/opt/homebrew/Cellar/tesseract" ]]; then + TESS_VERSION=$(ls -1 /opt/homebrew/Cellar/tesseract | head -1) + if [[ -n "$TESS_VERSION" ]]; then + BLD_LINKER="$BLD_LINKER -L/opt/homebrew/Cellar/tesseract/$TESS_VERSION/lib" + fi + fi + + # Also add homebrew lib path as fallback + if [[ -d "/opt/homebrew/lib" ]]; then + BLD_LINKER="$BLD_LINKER -L/opt/homebrew/lib" + elif [[ -d "/usr/local/lib" ]]; then + BLD_LINKER="$BLD_LINKER -L/usr/local/lib" + fi + + BLD_LINKER="$BLD_LINKER -lswscale -lavutil -pthread -lavformat -lavcodec -lavfilter -lleptonica -ltesseract" fi echo "Running pre-build script..." diff --git a/src/lib_ccx/ocr.c b/src/lib_ccx/ocr.c index 22c1716e2..263932cf9 100644 --- a/src/lib_ccx/ocr.c +++ b/src/lib_ccx/ocr.c @@ -103,6 +103,7 @@ char *probe_tessdata_location(const char *lang) "./", "/usr/share/", "/usr/local/share/", + "/opt/homebrew/share/", "/usr/share/tesseract-ocr/", "/usr/share/tesseract-ocr/4.00/", "/usr/share/tesseract-ocr/5/", diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 57ea477ab..659ebfc8e 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.19" +version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" +checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" dependencies = [ "anstyle", "anstyle-parse", @@ -43,22 +43,22 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.9" +version = "3.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -103,7 +103,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", "syn 1.0.109", "which", @@ -115,10 +115,10 @@ version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "cexpr", "clang-sys", - "itertools", + "itertools 0.12.1", "lazy_static", "lazycell", "log", @@ -126,12 +126,32 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", - "syn 2.0.104", + "syn 2.0.106", "which", ] +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +dependencies = [ + "bitflags 2.9.4", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 2.1.1", + "shlex", + "syn 2.0.106", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -140,15 +160,40 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.1" +version = "2.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" + +[[package]] +name = "bon" +version = "3.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +checksum = "c2529c31017402be841eb45892278a6c21a000c0a17643af326c73a73f83f0fb" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82020dadcb845a345591863adb65d74fa8dc5c18a0b6d408470e13b7adc7005" +dependencies = [ + "darling", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.106", +] [[package]] name = "camino" -version = "1.1.10" +version = "1.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0da45bc31171d8d6960122e222a67740df867c1dd53b4d51caa297084c185cab" +checksum = "dd0b03af37dad7a14518b7691d81acb0f8222604ad3d1b02f6b4bed5188c0cd5" [[package]] name = "ccx_rust" @@ -165,7 +210,8 @@ dependencies = [ "num-integer", "palette", "pkg-config", - "rsmpeg", + "rsmpeg 0.14.2+ffmpeg.6.1", + "rsmpeg 0.18.0+ffmpeg.8.0", "strum 0.25.0", "strum_macros 0.25.3", "tesseract-sys", @@ -184,9 +230,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "clang-sys" @@ -201,9 +247,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.42" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882" +checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931" dependencies = [ "clap_builder", "clap_derive", @@ -211,9 +257,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.42" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966" +checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6" dependencies = [ "anstream", "anstyle", @@ -223,14 +269,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.41" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" +checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -260,11 +306,46 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.106", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.106", +] + [[package]] name = "deranged" -version = "0.4.0" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc" dependencies = [ "powerfmt", ] @@ -279,7 +360,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -290,7 +371,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -346,26 +427,32 @@ dependencies = [ "toml", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "form_urlencoded" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" dependencies = [ "percent-encoding", ] [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "hashbrown" -version = "0.15.4" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" [[package]] name = "heck" @@ -489,11 +576,17 @@ dependencies = [ "zerovec", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", "smallvec", @@ -512,9 +605,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9" dependencies = [ "equivalent", "hashbrown", @@ -536,10 +629,13 @@ dependencies = [ ] [[package]] -name = "itoa" -version = "1.0.15" +name = "itertools" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] [[package]] name = "lazy_static" @@ -568,14 +664,14 @@ dependencies = [ name = "lib_ccxr" version = "0.1.0" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "crc32fast", "derive_more", "num_enum", "socket2", "strum 0.26.3", "strum_macros 0.26.4", - "thiserror", + "thiserror 1.0.69", "time", "url", ] @@ -610,9 +706,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" [[package]] name = "log" -version = "0.4.27" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "memchr" @@ -679,7 +775,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -732,9 +828,9 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "phf" @@ -766,7 +862,7 @@ dependencies = [ "phf_shared", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -786,9 +882,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "potential_utf" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" dependencies = [ "zerovec", ] @@ -801,12 +897,12 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "prettyplease" -version = "0.2.36" +version = "0.2.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -820,9 +916,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] @@ -853,9 +949,9 @@ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" [[package]] name = "regex" -version = "1.11.1" +version = "1.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" dependencies = [ "aho-corasick", "memchr", @@ -865,9 +961,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" dependencies = [ "aho-corasick", "memchr", @@ -876,9 +972,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" [[package]] name = "rsmpeg" @@ -888,8 +984,20 @@ checksum = "927012cd6ae43519f519741f4a69602ce3a47cf84750784da124dffd03527cc0" dependencies = [ "libc", "paste", - "rusty_ffmpeg", - "thiserror", + "rusty_ffmpeg 0.13.3+ffmpeg.6.1", + "thiserror 1.0.69", +] + +[[package]] +name = "rsmpeg" +version = "0.18.0+ffmpeg.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523351495c9ff0bf4b99ed1f42f1415fc709526ddb63526cff85022b387c5811" +dependencies = [ + "bon", + "paste", + "rusty_ffmpeg 0.16.7+ffmpeg.8", + "thiserror 2.0.16", ] [[package]] @@ -898,6 +1006,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" @@ -913,7 +1027,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "errno", "libc", "linux-raw-sys", @@ -922,9 +1036,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rusty_ffmpeg" @@ -940,6 +1054,18 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "rusty_ffmpeg" +version = "0.16.7+ffmpeg.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f25d40a46450059278c9f9f2616018910b647877a66a2093a83f115f59763967" +dependencies = [ + "bindgen 0.71.1", + "camino", + "once_cell", + "pkg-config", +] + [[package]] name = "semver" version = "1.0.26" @@ -963,7 +1089,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1028,7 +1154,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1041,7 +1167,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1057,9 +1183,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.104" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", @@ -1074,7 +1200,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1104,7 +1230,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" +dependencies = [ + "thiserror-impl 2.0.16", ] [[package]] @@ -1115,17 +1250,27 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] name = "time" -version = "0.3.41" +version = "0.3.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" +checksum = "83bde6f1ec10e72d583d91623c939f623002284ef622b87de38cfd546cbf2031" dependencies = [ "deranged", - "itoa", "num-conv", "powerfmt", "serde", @@ -1135,15 +1280,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.4" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" [[package]] name = "time-macros" -version = "0.2.22" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" +checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" dependencies = [ "num-conv", "time-core", @@ -1193,13 +1338,14 @@ checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "url" -version = "2.5.4" +version = "2.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" dependencies = [ "form_urlencoded", "idna", "percent-encoding", + "serde", ] [[package]] @@ -1250,11 +1396,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -1427,9 +1573,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" [[package]] name = "winnow" -version = "0.7.12" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" dependencies = [ "memchr", ] @@ -1460,7 +1606,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "synstructure", ] @@ -1481,7 +1627,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "synstructure", ] @@ -1498,9 +1644,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.3" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdbb9122ea75b11bf96e7492afb723e8a7fbe12c67417aa95e7e3d18144d37cd" +checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" dependencies = [ "yoke", "zerofrom", @@ -1515,5 +1661,5 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index af2b53740..65ba96216 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -14,9 +14,6 @@ crate-type = ["staticlib"] log = "0.4.26" env_logger = "0.8.4" palette = "0.6.1" -rsmpeg = { version = "0.14.2", optional = true, features = [ - "link_system_ffmpeg", -] } tesseract-sys = { version = "0.5.15", optional = true, default-features = false } leptonica-sys = { version = "= 0.4.6", optional = true, default-features = false } clap = { version = "4.5.31", features = ["derive"] } @@ -29,6 +26,17 @@ lib_ccxr = { path = "lib_ccxr" } url = "2.5.4" encoding_rs = "0.8.5" +# Use rsmpeg with platform-specific FFmpeg features and versions +[target.'cfg(target_os = "linux")'.dependencies] +rsmpeg = { version = "0.14.2", default-features = false, features = ["ffmpeg6", "link_system_ffmpeg"], optional = true } + +[target.'cfg(target_os = "windows")'.dependencies] +rsmpeg = { version = "0.14.2", default-features = false, features = ["ffmpeg6", "link_system_ffmpeg"], optional = true } + +# Fallback for other platforms (FreeBSD, etc.) +[target.'cfg(not(any(target_os = "linux", target_os = "windows")))'.dependencies] +rsmpeg = { version = "0.18.0", default-features = false, features = ["ffmpeg8", "link_system_ffmpeg"], optional = true } + [build-dependencies] bindgen = "0.64.0" pkg-config = "0.3.32" @@ -37,6 +45,8 @@ pkg-config = "0.3.32" wtv_debug = [] enable_ffmpeg = [] with_libcurl = [] + +# hardsubx_ocr enables OCR and the platform-appropriate rsmpeg hardsubx_ocr = ["rsmpeg", "tesseract-sys", "leptonica-sys"] [profile.release-with-debug] diff --git a/src/rust/build.rs b/src/rust/build.rs index 2b647e13a..157c351d1 100644 --- a/src/rust/build.rs +++ b/src/rust/build.rs @@ -65,6 +65,39 @@ fn main() { #[cfg(feature = "hardsubx_ocr")] { builder = builder.clang_arg("-DENABLE_HARDSUBX"); + + // Add FFmpeg include paths for Mac + if cfg!(target_os = "macos") { + // Try common Homebrew paths + if std::path::Path::new("/opt/homebrew/include").exists() { + builder = builder.clang_arg("-I/opt/homebrew/include"); + } else if std::path::Path::new("/usr/local/include").exists() { + builder = builder.clang_arg("-I/usr/local/include"); + } + + // Check Homebrew Cellar for FFmpeg + let cellar_ffmpeg = "/opt/homebrew/Cellar/ffmpeg"; + if std::path::Path::new(cellar_ffmpeg).exists() { + // Find the FFmpeg version directory + if let Ok(entries) = std::fs::read_dir(cellar_ffmpeg) { + for entry in entries { + if let Ok(entry) = entry { + let include_path = entry.path().join("include"); + if include_path.exists() { + builder = + builder.clang_arg(format!("-I{}", include_path.display())); + break; + } + } + } + } + } + + // Also check environment variable + if let Ok(ffmpeg_include) = env::var("FFMPEG_INCLUDE_DIR") { + builder = builder.clang_arg(format!("-I{}", ffmpeg_include)); + } + } } // Tell cargo to invalidate the built crate whenever any of the diff --git a/src/rust/src/args.rs b/src/rust/src/args.rs index bdc6bcca5..6da5fc411 100644 --- a/src/rust/src/args.rs +++ b/src/rust/src/args.rs @@ -47,7 +47,7 @@ http://www.ccextractor.org #[command( help_template = "{name} {version}, {author}.\n{about}\n {all-args} {tab}\n An example command for burned-in subtitle extraction is as follows: -ccextractor video.mp4 --hardsubx --subcolor white --detect_italics --whiteness_thresh 90 --conf_thresh 60 +ccextractor video.mp4 --hardsubx --subcolor white --detect-italics --whiteness-thresh 90 --conf-thresh 60 Notes on File name related options: You can pass as many input files as you need. They will be processed in order. @@ -876,7 +876,7 @@ pub struct Args { /// or letter wise. /// e.g. --ocr-mode frame (default), --ocr-mode word, /// --ocr-mode letter - #[arg(long, verbatim_doc_comment, value_name="mode", help_heading=BURNEDIN_SUBTITLE_EXTRACTION)] + #[arg(long = "ocr-mode", verbatim_doc_comment, value_name="mode", help_heading=BURNEDIN_SUBTITLE_EXTRACTION)] pub ocr_mode: Option, /// Specify the color of the subtitles /// Possible values are in the set @@ -893,21 +893,21 @@ pub struct Args { /// A lower value gives better results, but takes more /// processing time. /// The recommended value is 0.5 (default). - /// e.g. --min_sub_duration 1.0 (for a duration of 1 second) - #[arg(long, verbatim_doc_comment, value_name="duration", help_heading=BURNEDIN_SUBTITLE_EXTRACTION)] + /// e.g. --min-sub-duration 1.0 (for a duration of 1 second) + #[arg(long = "min-sub-duration", verbatim_doc_comment, value_name="duration", help_heading=BURNEDIN_SUBTITLE_EXTRACTION)] pub min_sub_duration: Option, /// Specify whether italics are to be detected from the /// OCR text. /// Italic detection automatically enforces the OCR mode /// to be word-wise - #[arg(long, verbatim_doc_comment, help_heading=BURNEDIN_SUBTITLE_EXTRACTION)] + #[arg(long = "detect-italics", verbatim_doc_comment, help_heading=BURNEDIN_SUBTITLE_EXTRACTION)] pub detect_italics: bool, /// Specify the classifier confidence threshold between /// 1 and 100. /// Try and use a threshold which works for you if you get /// a lot of garbage text. - /// e.g. --conf_thresh 50 - #[arg(long, verbatim_doc_comment, help_heading=BURNEDIN_SUBTITLE_EXTRACTION)] + /// e.g. --conf-thresh 50 + #[arg(long = "conf-thresh", verbatim_doc_comment, help_heading=BURNEDIN_SUBTITLE_EXTRACTION)] pub conf_thresh: Option, /// For white subtitles only, specify the luminance /// threshold between 1 and 100 @@ -915,7 +915,7 @@ pub struct Args { /// values may give you better results /// Recommended values are in the range 80 to 100. /// The default value is 95 - #[arg(long, verbatim_doc_comment, value_name="threshold", help_heading=BURNEDIN_SUBTITLE_EXTRACTION)] + #[arg(long = "whiteness-thresh", verbatim_doc_comment, value_name="threshold", help_heading=BURNEDIN_SUBTITLE_EXTRACTION)] pub whiteness_thresh: Option, /// This option will be used if the file should have both /// closed captions and burned in subtitles diff --git a/src/rust/src/avc/nal.rs b/src/rust/src/avc/nal.rs index 36517190e..612990ce9 100644 --- a/src/rust/src/avc/nal.rs +++ b/src/rust/src/avc/nal.rs @@ -596,7 +596,7 @@ pub unsafe fn slice_header( pic_order_cnt_lsb, (*dec_ctx.timing).current_tref, current_index, (*dec_ctx.avc_ctx).currref, (*dec_ctx.avc_ctx).lastmaxidx, (*dec_ctx.avc_ctx).maxtref); - let mut buf = [c_char::from(0i8); 64]; + let mut buf = [0 as c_char; 64]; debug!( msg_type = DebugMessageFlag::TIME; " sync_pts:{} ({:8})", diff --git a/src/rust/src/hardsubx/classifier.rs b/src/rust/src/hardsubx/classifier.rs index d8572832a..13525ed46 100644 --- a/src/rust/src/hardsubx/classifier.rs +++ b/src/rust/src/hardsubx/classifier.rs @@ -179,7 +179,11 @@ pub unsafe extern "C" fn get_ocr_text_wordwise_threshold( } } - text_out = format!("{} {}", text_out, word); + if text_out.is_empty() { + text_out = word; + } else { + text_out = format!("{} {}", text_out, word); + } } } @@ -234,7 +238,7 @@ pub unsafe extern "C" fn get_ocr_text_letterwise_threshold( let mut total_conf: std::os::raw::c_float = 0.0; let mut num_characters: std::os::raw::c_int = 0; - let mut first_iter: bool = false; + let mut first_iter: bool = true; if it != null::() as *mut TessResultIterator { loop { @@ -245,17 +249,25 @@ pub unsafe extern "C" fn get_ocr_text_letterwise_threshold( } let letter = _tess_string_helper(it, level); - text_out = format!("{}{}", text_out, letter); + if letter.is_empty() { + continue; + } + // Check confidence BEFORE adding to output + let mut should_add = true; if threshold > 0.0 { // we don't even want to bother with this call if threshold is 0 or less let conf: std::os::raw::c_float = TessResultIteratorConfidence(it, level); if conf < threshold { - continue; + should_add = false; + } else { + total_conf += conf; + num_characters += 1; } + } - total_conf += conf; - num_characters += 1; + if should_add { + text_out = format!("{}{}", text_out, letter); } } } diff --git a/src/rust/src/hardsubx/decoder.rs b/src/rust/src/hardsubx/decoder.rs index 472ca76bf..c7247a27d 100644 --- a/src/rust/src/hardsubx/decoder.rs +++ b/src/rust/src/hardsubx/decoder.rs @@ -41,10 +41,14 @@ pub unsafe fn dispatch_classifier_functions(ctx: *mut lib_hardsubx_ctx, im: *mut match (*ctx).ocr_mode { 0 => { let ret_char_arr = get_ocr_text_simple_threshold(ctx, im, (*ctx).conf_thresh); - let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string(); - match text_out_result { - Ok(T) => T, - Err(_E) => "".to_string(), + if ret_char_arr.is_null() { + "".to_string() + } else { + let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string(); + match text_out_result { + Ok(T) => T, + Err(_E) => "".to_string(), + } } } 1 => { @@ -59,10 +63,14 @@ pub unsafe fn dispatch_classifier_functions(ctx: *mut lib_hardsubx_ctx, im: *mut } 2 => { let ret_char_arr = get_ocr_text_letterwise_threshold(ctx, im, (*ctx).conf_thresh); - let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string(); - match text_out_result { - Ok(T) => T, - Err(_E) => "".to_string(), + if ret_char_arr.is_null() { + "".to_string() + } else { + let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string(); + match text_out_result { + Ok(T) => T, + Err(_E) => "".to_string(), + } } } diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index f5f38bf4c..67f92c79e 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -19,6 +19,7 @@ pub mod common; pub mod decoder; pub mod encoder; pub mod es; + #[cfg(feature = "hardsubx_ocr")] pub mod hardsubx; pub mod libccxr_exports; diff --git a/windows/rust.bat b/windows/rust.bat index dcb59b763..d82e53cfb 100644 --- a/windows/rust.bat +++ b/windows/rust.bat @@ -1,7 +1,12 @@ for /f "delims=" %%i in ('cd') do set output=%%i set CARGO_TARGET_DIR=%output% cd ..\src\rust -cargo build %1 --features "hardsubx_ocr" --target x86_64-pc-windows-msvc +REM Allow overriding FFmpeg version via environment variable +IF "%FFMPEG_VERSION%"=="" ( + cargo build %1 --features "hardsubx_ocr" --target x86_64-pc-windows-msvc +) ELSE ( + cargo build %1 --features "hardsubx_ocr,%FFMPEG_VERSION%" --target x86_64-pc-windows-msvc +) cd ..\..\windows IF "%~1"=="-r" ( copy x86_64-pc-windows-msvc\release\ccx_rust.lib .\ccx_rust.lib