Skip to content

Commit 436ef70

Browse files
committed
chore: ci fixes
1 parent 27564aa commit 436ef70

File tree

19 files changed

+2148
-834
lines changed

19 files changed

+2148
-834
lines changed

.github/actions/setup-paddle-ocr-models/action.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,21 +148,21 @@ runs:
148148
echo "Checking for PaddleOCR models in $CACHE_DIR"
149149
150150
if [ -f "$CACHE_DIR/det/model.onnx" ]; then
151-
SIZE=$(du -b "$CACHE_DIR/det/model.onnx" | cut -f1)
151+
SIZE=$(wc -c < "$CACHE_DIR/det/model.onnx" | tr -d ' ')
152152
AVAILABLE_MODELS+=("det")
153153
TOTAL_SIZE=$((TOTAL_SIZE + SIZE))
154154
echo " ✓ Detection model: $(numfmt --to=iec-i --suffix=B $SIZE 2>/dev/null || echo $SIZE bytes)"
155155
fi
156156
157157
if [ -f "$CACHE_DIR/cls/model.onnx" ]; then
158-
SIZE=$(du -b "$CACHE_DIR/cls/model.onnx" | cut -f1)
158+
SIZE=$(wc -c < "$CACHE_DIR/cls/model.onnx" | tr -d ' ')
159159
AVAILABLE_MODELS+=("cls")
160160
TOTAL_SIZE=$((TOTAL_SIZE + SIZE))
161161
echo " ✓ Classification model: $(numfmt --to=iec-i --suffix=B $SIZE 2>/dev/null || echo $SIZE bytes)"
162162
fi
163163
164164
if [ -f "$CACHE_DIR/rec/model.onnx" ]; then
165-
SIZE=$(du -b "$CACHE_DIR/rec/model.onnx" | cut -f1)
165+
SIZE=$(wc -c < "$CACHE_DIR/rec/model.onnx" | tr -d ' ')
166166
AVAILABLE_MODELS+=("rec")
167167
TOTAL_SIZE=$((TOTAL_SIZE + SIZE))
168168
echo " ✓ Recognition model: $(numfmt --to=iec-i --suffix=B $SIZE 2>/dev/null || echo $SIZE bytes)"

.task/languages/rust.yml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,10 @@ tasks:
7979
PDFIUM_VERSION: '{{.PDFIUM_VERSION}}'
8080
cmds:
8181
# Note: exclude benchmark-harness on Windows as jemalloc doesn't build with MSVC
82-
- cmd: cargo build --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm
82+
# Note: exclude kreuzberg-pdfium-render because --all-features enables mutually exclusive static/dynamic linking
83+
- cmd: cargo build --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm --exclude kreuzberg-pdfium-render
8384
platforms: [linux, darwin]
84-
- cmd: cargo build --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm --exclude benchmark-harness
85+
- cmd: cargo build --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm --exclude benchmark-harness --exclude kreuzberg-pdfium-render
8586
platforms: [windows]
8687

8788
build:release:
@@ -91,9 +92,9 @@ tasks:
9192
CMAKE: '{{.CMAKE_PATH}}'
9293
PDFIUM_VERSION: '{{.PDFIUM_VERSION}}'
9394
cmds:
94-
- cmd: cargo build --release --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm
95+
- cmd: cargo build --release --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm --exclude kreuzberg-pdfium-render
9596
platforms: [linux, darwin]
96-
- cmd: cargo build --release --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm --exclude benchmark-harness
97+
- cmd: cargo build --release --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm --exclude benchmark-harness --exclude kreuzberg-pdfium-render
9798
platforms: [windows]
9899

99100
build:ci:
@@ -104,9 +105,9 @@ tasks:
104105
PDFIUM_VERSION: '{{.PDFIUM_VERSION}}'
105106
RUSTFLAGS: '-C debuginfo=2'
106107
cmds:
107-
- cmd: cargo build --release --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm
108+
- cmd: cargo build --release --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm --exclude kreuzberg-pdfium-render
108109
platforms: [linux, darwin]
109-
- cmd: cargo build --release --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm --exclude benchmark-harness
110+
- cmd: cargo build --release --workspace --all-features --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm --exclude benchmark-harness --exclude kreuzberg-pdfium-render
110111
platforms: [windows]
111112

112113
build:profiling:

crates/kreuzberg-cli/src/commands/extract.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,16 @@ pub fn apply_extraction_overrides(
104104
Some("easyocr") => ("easyocr", "en"),
105105
_ => ("tesseract", "eng"),
106106
};
107+
// Preserve existing paddle_ocr_config and element_config from config file/inline JSON
108+
let existing_paddle_config = config.ocr.as_ref().and_then(|o| o.paddle_ocr_config.clone());
109+
let existing_element_config = config.ocr.as_ref().and_then(|o| o.element_config.clone());
107110
config.ocr = Some(OcrConfig {
108111
backend: backend.to_string(),
109112
language: language.to_string(),
110113
tesseract_config: None,
111114
output_format: None,
112-
paddle_ocr_config: None,
113-
element_config: None,
115+
paddle_ocr_config: existing_paddle_config,
116+
element_config: existing_element_config,
114117
});
115118
} else {
116119
config.ocr = None;

0 commit comments

Comments
 (0)