|
| 1 | +# CCExtractor Docker Build |
| 2 | +# |
| 3 | +# Build variants via BUILD_TYPE argument: |
| 4 | +# - minimal: Basic CCExtractor without OCR |
| 5 | +# - ocr: CCExtractor with OCR support (default) |
| 6 | +# - hardsubx: CCExtractor with burned-in subtitle extraction (requires FFmpeg) |
| 7 | +# |
| 8 | +# Source options via USE_LOCAL_SOURCE argument: |
| 9 | +# - 0 (default): Clone from GitHub (standalone Dockerfile usage) |
| 10 | +# - 1: Use local source (when building from cloned repo) |
| 11 | +# |
| 12 | +# Build examples: |
| 13 | +# |
| 14 | +# # Standalone (just the Dockerfile, clones from GitHub): |
| 15 | +# docker build -t ccextractor docker/ |
| 16 | +# docker build --build-arg BUILD_TYPE=hardsubx -t ccextractor docker/ |
| 17 | +# |
| 18 | +# # From cloned repository (faster, uses local source): |
| 19 | +# docker build --build-arg USE_LOCAL_SOURCE=1 -f docker/Dockerfile -t ccextractor . |
| 20 | +# docker build --build-arg USE_LOCAL_SOURCE=1 --build-arg BUILD_TYPE=minimal -f docker/Dockerfile -t ccextractor . |
| 21 | + |
| 22 | +ARG DEBIAN_VERSION=bookworm-slim |
| 23 | + |
| 24 | +FROM debian:${DEBIAN_VERSION} AS base |
| 25 | + |
| 26 | +FROM base AS builder |
| 27 | + |
| 28 | +# Build arguments |
| 29 | +ARG BUILD_TYPE=ocr |
| 30 | +ARG USE_LOCAL_SOURCE=0 |
| 31 | +# BUILD_TYPE: minimal, ocr, hardsubx |
| 32 | +# USE_LOCAL_SOURCE: 0 = git clone, 1 = copy local source |
| 33 | + |
| 34 | +# Avoid interactive prompts during package installation |
| 35 | +ENV DEBIAN_FRONTEND=noninteractive |
| 36 | + |
| 37 | +# Install base build dependencies |
| 38 | +RUN apt-get update && apt-get install -y --no-install-recommends \ |
| 39 | + git \ |
| 40 | + curl \ |
| 41 | + ca-certificates \ |
| 42 | + gcc \ |
| 43 | + g++ \ |
| 44 | + cmake \ |
| 45 | + make \ |
| 46 | + pkg-config \ |
| 47 | + bash \ |
| 48 | + zlib1g-dev \ |
| 49 | + libpng-dev \ |
| 50 | + libjpeg-dev \ |
| 51 | + libssl-dev \ |
| 52 | + libfreetype-dev \ |
| 53 | + libxml2-dev \ |
| 54 | + libcurl4-gnutls-dev \ |
| 55 | + clang \ |
| 56 | + libclang-dev \ |
| 57 | + && rm -rf /var/lib/apt/lists/* |
| 58 | + |
| 59 | +# Install Rust toolchain |
| 60 | +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable |
| 61 | +ENV PATH="/root/.cargo/bin:${PATH}" |
| 62 | + |
| 63 | +# Install OCR dependencies (for ocr and hardsubx builds) |
| 64 | +RUN if [ "$BUILD_TYPE" = "ocr" ] || [ "$BUILD_TYPE" = "hardsubx" ]; then \ |
| 65 | + apt-get update && apt-get install -y --no-install-recommends \ |
| 66 | + tesseract-ocr \ |
| 67 | + libtesseract-dev \ |
| 68 | + libleptonica-dev \ |
| 69 | + && rm -rf /var/lib/apt/lists/*; \ |
| 70 | + fi |
| 71 | + |
| 72 | +# Install FFmpeg dependencies (for hardsubx build) |
| 73 | +RUN if [ "$BUILD_TYPE" = "hardsubx" ]; then \ |
| 74 | + apt-get update && apt-get install -y --no-install-recommends \ |
| 75 | + libavcodec-dev \ |
| 76 | + libavformat-dev \ |
| 77 | + libavutil-dev \ |
| 78 | + libswscale-dev \ |
| 79 | + libswresample-dev \ |
| 80 | + libavfilter-dev \ |
| 81 | + libavdevice-dev \ |
| 82 | + && rm -rf /var/lib/apt/lists/*; \ |
| 83 | + fi |
| 84 | + |
| 85 | +# Build and install GPAC library |
| 86 | +WORKDIR /root |
| 87 | +RUN git clone -b v2.4.0 --depth 1 https://github.com/gpac/gpac |
| 88 | +WORKDIR /root/gpac |
| 89 | +RUN ./configure && make -j$(nproc) lib && make install-lib && ldconfig |
| 90 | +WORKDIR /root |
| 91 | +RUN rm -rf /root/gpac |
| 92 | + |
| 93 | +# Get CCExtractor source (either clone or copy based on USE_LOCAL_SOURCE) |
| 94 | +WORKDIR /root |
| 95 | +# First, copy local source if provided (will be empty dir if building standalone) |
| 96 | +COPY . /root/ccextractor-local/ |
| 97 | + |
| 98 | +# Then get source: use local copy if USE_LOCAL_SOURCE=1 and source exists, |
| 99 | +# otherwise clone from GitHub |
| 100 | +RUN if [ "$USE_LOCAL_SOURCE" = "1" ] && [ -f /root/ccextractor-local/src/ccextractor.c ]; then \ |
| 101 | + echo "Using local source"; \ |
| 102 | + mv /root/ccextractor-local /root/ccextractor; \ |
| 103 | + else \ |
| 104 | + echo "Cloning from GitHub"; \ |
| 105 | + rm -rf /root/ccextractor-local; \ |
| 106 | + git clone --depth 1 https://github.com/CCExtractor/ccextractor.git /root/ccextractor; \ |
| 107 | + fi |
| 108 | + |
| 109 | +WORKDIR /root/ccextractor/linux |
| 110 | + |
| 111 | +# Generate build info |
| 112 | +RUN ./pre-build.sh |
| 113 | + |
| 114 | +# Build Rust library with appropriate features |
| 115 | +RUN if [ "$BUILD_TYPE" = "hardsubx" ]; then \ |
| 116 | + cd ../src/rust && \ |
| 117 | + CARGO_TARGET_DIR=../../linux/rust cargo build --release --features hardsubx_ocr; \ |
| 118 | + else \ |
| 119 | + cd ../src/rust && \ |
| 120 | + CARGO_TARGET_DIR=../../linux/rust cargo build --release; \ |
| 121 | + fi |
| 122 | + |
| 123 | +RUN cp rust/release/libccx_rust.a ./libccx_rust.a |
| 124 | + |
| 125 | +# Compile CCExtractor |
| 126 | +RUN if [ "$BUILD_TYPE" = "minimal" ]; then \ |
| 127 | + BLD_FLAGS="-std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -DFT2_BUILD_LIBRARY -DGPAC_DISABLE_VTT -DGPAC_DISABLE_OD_DUMP -DGPAC_DISABLE_REMOTERY -DNO_GZIP -DGPAC_64_BITS"; \ |
| 128 | + BLD_INCLUDE="-I../src -I../src/lib_ccx/ -I /usr/include/gpac/ -I../src/thirdparty/libpng -I../src/thirdparty/zlib -I../src/lib_ccx/zvbi -I../src/thirdparty/lib_hash -I../src/thirdparty -I../src/thirdparty/freetype/include"; \ |
| 129 | + BLD_LINKER="-lm -Wl,--allow-multiple-definition -lpthread -ldl -lgpac ./libccx_rust.a"; \ |
| 130 | + elif [ "$BUILD_TYPE" = "hardsubx" ]; then \ |
| 131 | + BLD_FLAGS="-std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -DENABLE_OCR -DENABLE_HARDSUBX -DFT2_BUILD_LIBRARY -DGPAC_DISABLE_VTT -DGPAC_DISABLE_OD_DUMP -DGPAC_DISABLE_REMOTERY -DNO_GZIP -DGPAC_64_BITS"; \ |
| 132 | + BLD_INCLUDE="-I../src -I /usr/include/leptonica/ -I /usr/include/tesseract/ -I../src/lib_ccx/ -I /usr/include/gpac/ -I../src/thirdparty/libpng -I../src/thirdparty/zlib -I../src/lib_ccx/zvbi -I../src/thirdparty/lib_hash -I../src/thirdparty -I../src/thirdparty/freetype/include"; \ |
| 133 | + BLD_LINKER="-lm -Wl,--allow-multiple-definition -ltesseract -lleptonica -lpthread -ldl -lgpac -lswscale -lavutil -lavformat -lavcodec -lavfilter -lswresample ./libccx_rust.a"; \ |
| 134 | + else \ |
| 135 | + BLD_FLAGS="-std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -DENABLE_OCR -DFT2_BUILD_LIBRARY -DGPAC_DISABLE_VTT -DGPAC_DISABLE_OD_DUMP -DGPAC_DISABLE_REMOTERY -DNO_GZIP -DGPAC_64_BITS"; \ |
| 136 | + BLD_INCLUDE="-I../src -I /usr/include/leptonica/ -I /usr/include/tesseract/ -I../src/lib_ccx/ -I /usr/include/gpac/ -I../src/thirdparty/libpng -I../src/thirdparty/zlib -I../src/lib_ccx/zvbi -I../src/thirdparty/lib_hash -I../src/thirdparty -I../src/thirdparty/freetype/include"; \ |
| 137 | + BLD_LINKER="-lm -Wl,--allow-multiple-definition -ltesseract -lleptonica -lpthread -ldl -lgpac ./libccx_rust.a"; \ |
| 138 | + fi && \ |
| 139 | + SRC_LIBPNG="$(find ../src/thirdparty/libpng/ -name '*.c')" && \ |
| 140 | + SRC_ZLIB="$(find ../src/thirdparty/zlib/ -name '*.c')" && \ |
| 141 | + SRC_CCX="$(find ../src/lib_ccx/ -name '*.c')" && \ |
| 142 | + SRC_GPAC="$(find /usr/include/gpac/ -name '*.c' 2>/dev/null || true)" && \ |
| 143 | + SRC_HASH="$(find ../src/thirdparty/lib_hash/ -name '*.c')" && \ |
| 144 | + SRC_UTF8PROC="../src/thirdparty/utf8proc/utf8proc.c" && \ |
| 145 | + SRC_FREETYPE="../src/thirdparty/freetype/autofit/autofit.c \ |
| 146 | + ../src/thirdparty/freetype/base/ftbase.c \ |
| 147 | + ../src/thirdparty/freetype/base/ftbbox.c \ |
| 148 | + ../src/thirdparty/freetype/base/ftbdf.c \ |
| 149 | + ../src/thirdparty/freetype/base/ftbitmap.c \ |
| 150 | + ../src/thirdparty/freetype/base/ftcid.c \ |
| 151 | + ../src/thirdparty/freetype/base/ftfntfmt.c \ |
| 152 | + ../src/thirdparty/freetype/base/ftfstype.c \ |
| 153 | + ../src/thirdparty/freetype/base/ftgasp.c \ |
| 154 | + ../src/thirdparty/freetype/base/ftglyph.c \ |
| 155 | + ../src/thirdparty/freetype/base/ftgxval.c \ |
| 156 | + ../src/thirdparty/freetype/base/ftinit.c \ |
| 157 | + ../src/thirdparty/freetype/base/ftlcdfil.c \ |
| 158 | + ../src/thirdparty/freetype/base/ftmm.c \ |
| 159 | + ../src/thirdparty/freetype/base/ftotval.c \ |
| 160 | + ../src/thirdparty/freetype/base/ftpatent.c \ |
| 161 | + ../src/thirdparty/freetype/base/ftpfr.c \ |
| 162 | + ../src/thirdparty/freetype/base/ftstroke.c \ |
| 163 | + ../src/thirdparty/freetype/base/ftsynth.c \ |
| 164 | + ../src/thirdparty/freetype/base/ftsystem.c \ |
| 165 | + ../src/thirdparty/freetype/base/fttype1.c \ |
| 166 | + ../src/thirdparty/freetype/base/ftwinfnt.c \ |
| 167 | + ../src/thirdparty/freetype/bdf/bdf.c \ |
| 168 | + ../src/thirdparty/freetype/bzip2/ftbzip2.c \ |
| 169 | + ../src/thirdparty/freetype/cache/ftcache.c \ |
| 170 | + ../src/thirdparty/freetype/cff/cff.c \ |
| 171 | + ../src/thirdparty/freetype/cid/type1cid.c \ |
| 172 | + ../src/thirdparty/freetype/gzip/ftgzip.c \ |
| 173 | + ../src/thirdparty/freetype/lzw/ftlzw.c \ |
| 174 | + ../src/thirdparty/freetype/pcf/pcf.c \ |
| 175 | + ../src/thirdparty/freetype/pfr/pfr.c \ |
| 176 | + ../src/thirdparty/freetype/psaux/psaux.c \ |
| 177 | + ../src/thirdparty/freetype/pshinter/pshinter.c \ |
| 178 | + ../src/thirdparty/freetype/psnames/psnames.c \ |
| 179 | + ../src/thirdparty/freetype/raster/raster.c \ |
| 180 | + ../src/thirdparty/freetype/sfnt/sfnt.c \ |
| 181 | + ../src/thirdparty/freetype/smooth/smooth.c \ |
| 182 | + ../src/thirdparty/freetype/truetype/truetype.c \ |
| 183 | + ../src/thirdparty/freetype/type1/type1.c \ |
| 184 | + ../src/thirdparty/freetype/type42/type42.c \ |
| 185 | + ../src/thirdparty/freetype/winfonts/winfnt.c" && \ |
| 186 | + BLD_SOURCES="../src/ccextractor.c $SRC_CCX $SRC_GPAC $SRC_ZLIB $SRC_LIBPNG $SRC_HASH $SRC_UTF8PROC $SRC_FREETYPE" && \ |
| 187 | + gcc $BLD_FLAGS $BLD_INCLUDE -o ccextractor $BLD_SOURCES $BLD_LINKER |
| 188 | + |
| 189 | +# Copy binary to known location |
| 190 | +RUN cp /root/ccextractor/linux/ccextractor /ccextractor |
| 191 | + |
| 192 | +# Final minimal image |
| 193 | +FROM base AS final |
| 194 | + |
| 195 | +ARG BUILD_TYPE=ocr |
| 196 | + |
| 197 | +# Avoid interactive prompts |
| 198 | +ENV DEBIAN_FRONTEND=noninteractive |
| 199 | + |
| 200 | +# Install runtime dependencies based on build type |
| 201 | +RUN apt-get update && apt-get install -y --no-install-recommends \ |
| 202 | + libpng16-16 \ |
| 203 | + libjpeg62-turbo \ |
| 204 | + zlib1g \ |
| 205 | + libssl3 \ |
| 206 | + libcurl4 \ |
| 207 | + && rm -rf /var/lib/apt/lists/* |
| 208 | + |
| 209 | +# OCR runtime dependencies |
| 210 | +RUN if [ "$BUILD_TYPE" = "ocr" ] || [ "$BUILD_TYPE" = "hardsubx" ]; then \ |
| 211 | + apt-get update && apt-get install -y --no-install-recommends \ |
| 212 | + tesseract-ocr \ |
| 213 | + liblept5 \ |
| 214 | + && rm -rf /var/lib/apt/lists/*; \ |
| 215 | + fi |
| 216 | + |
| 217 | +# HardSubX runtime dependencies |
| 218 | +RUN if [ "$BUILD_TYPE" = "hardsubx" ]; then \ |
| 219 | + apt-get update && apt-get install -y --no-install-recommends \ |
| 220 | + libavcodec59 \ |
| 221 | + libavformat59 \ |
| 222 | + libavutil57 \ |
| 223 | + libswscale6 \ |
| 224 | + libswresample4 \ |
| 225 | + libavfilter8 \ |
| 226 | + libavdevice59 \ |
| 227 | + && rm -rf /var/lib/apt/lists/*; \ |
| 228 | + fi |
| 229 | + |
| 230 | +# Copy GPAC library from builder |
| 231 | +COPY --from=builder /usr/local/lib/libgpac.so* /usr/local/lib/ |
| 232 | + |
| 233 | +# Update library cache |
| 234 | +RUN ldconfig |
| 235 | + |
| 236 | +# Copy CCExtractor binary |
| 237 | +COPY --from=builder /ccextractor /ccextractor |
| 238 | + |
| 239 | +ENTRYPOINT ["/ccextractor"] |
0 commit comments