Skip to content

Commit 25e7448

Browse files
committed
fix(cli,fixtures): support .yml config extension and fix fixture document paths
- Add .yml as accepted YAML config file extension (alongside .yaml) - Update 63 fixture JSON files with correct test_documents paths after directory reorganization (pdfs/ -> pdf/, web/ -> html/, etc.) - Fix test_invalid_config_values test using nonexistent config field
1 parent 0936ff1 commit 25e7448

File tree

66 files changed

+80
-68
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+80
-68
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
---
99

10+
## [Unreleased]
11+
12+
### Fixed
13+
14+
#### CLI
15+
- **`.yml` config files rejected by `--config` flag**: The CLI only accepted `.yaml` for YAML config files, rejecting the common `.yml` extension with a "must have .toml, .yaml, or .json extension" error. The config loader now accepts both `.yml` and `.yaml` (case-insensitive).
16+
17+
#### CI
18+
- **Fixture document paths broken after test_documents reorganization**: All 63 fixture JSON files under `fixtures/` still referenced old directory names (`pdfs/`, `pdfs_with_tables/`, `web/`, `documents/`, `legacy_office/`, `presentations/`, `spreadsheets/`, `data_formats/`, `office/`) after the test_documents directory was restructured into per-format directories (`pdf/`, `html/`, `docx/`, `doc/`, `ppt/`, `pptx/`, `xlsx/`, `xls/`, `json/`, `yaml/`). This caused all Deno E2E tests and PDFium system tests to fail with "No such file or directory" errors.
19+
20+
---
21+
1022
## [4.2.12] - 2026-02-06
1123

1224
### Fixed

crates/kreuzberg-cli/src/commands/config.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ use std::path::PathBuf;
1818
///
1919
/// Supports three formats, determined by file extension:
2020
/// - `.toml`: TOML format (recommended for humans)
21-
/// - `.yaml`: YAML format
21+
/// - `.yaml` / `.yml`: YAML format
2222
/// - `.json`: JSON format
2323
///
2424
/// # Errors
2525
///
2626
/// Returns an error if:
27-
/// - Explicit config file has unsupported extension (must be .toml, .yaml, or .json)
27+
/// - Explicit config file has unsupported extension (must be .toml, .yaml, .yml, or .json)
2828
/// - Config file cannot be read or parsed
2929
/// - Config file contains invalid extraction settings
3030
pub fn load_config(config_path: Option<PathBuf>) -> Result<ExtractionConfig> {
@@ -33,12 +33,12 @@ pub fn load_config(config_path: Option<PathBuf>) -> Result<ExtractionConfig> {
3333
let path_lower = path_str.to_lowercase();
3434
let config = if path_lower.ends_with(".toml") {
3535
ExtractionConfig::from_toml_file(&path)
36-
} else if path_lower.ends_with(".yaml") {
36+
} else if path_lower.ends_with(".yaml") || path_lower.ends_with(".yml") {
3737
ExtractionConfig::from_yaml_file(&path)
3838
} else if path_lower.ends_with(".json") {
3939
ExtractionConfig::from_json_file(&path)
4040
} else {
41-
anyhow::bail!("Config file must have .toml, .yaml, or .json extension (case-insensitive)");
41+
anyhow::bail!("Config file must have .toml, .yaml, .yml, or .json extension (case-insensitive)");
4242
};
4343
config.with_context(|| format!("Failed to load configuration from '{}'. Ensure the file exists, is readable, and contains valid configuration.", path.display()))
4444
} else {

crates/kreuzberg-cli/tests/config_discovery_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ fn test_invalid_config_values() {
595595
fs::write(
596596
&config_path,
597597
r#"
598-
max_pages = -1
598+
use_cache = "not_a_bool"
599599
"#,
600600
)
601601
.unwrap();

fixtures/contract/api_batch_bytes_async.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"description": "Tests async batch bytes extraction API (batch_extract_bytes)",
44
"tags": ["contract", "api", "batch"],
55
"document": {
6-
"path": "pdfs/fake_memo.pdf"
6+
"path": "pdf/fake_memo.pdf"
77
},
88
"extraction": {
99
"method": "batch_async",

fixtures/contract/api_batch_bytes_sync.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"description": "Tests sync batch bytes extraction API (batch_extract_bytes_sync)",
44
"tags": ["contract", "api", "batch"],
55
"document": {
6-
"path": "pdfs/fake_memo.pdf"
6+
"path": "pdf/fake_memo.pdf"
77
},
88
"extraction": {
99
"method": "batch_sync",

fixtures/contract/api_batch_file_async.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"description": "Tests async batch file extraction API (batch_extract_file)",
44
"tags": ["contract", "api", "batch"],
55
"document": {
6-
"path": "pdfs/fake_memo.pdf"
6+
"path": "pdf/fake_memo.pdf"
77
},
88
"extraction": {
99
"method": "batch_async",

fixtures/contract/api_batch_file_sync.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"description": "Tests sync batch file extraction API (batch_extract_file_sync)",
44
"tags": ["contract", "api", "batch"],
55
"document": {
6-
"path": "pdfs/fake_memo.pdf"
6+
"path": "pdf/fake_memo.pdf"
77
},
88
"extraction": {
99
"method": "batch_sync",

fixtures/contract/api_extract_bytes_async.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"description": "Tests async bytes extraction API (extract_bytes)",
44
"tags": ["contract", "api"],
55
"document": {
6-
"path": "pdfs/fake_memo.pdf"
6+
"path": "pdf/fake_memo.pdf"
77
},
88
"extraction": {
99
"method": "async",

fixtures/contract/api_extract_bytes_sync.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"description": "Tests sync bytes extraction API (extract_bytes_sync)",
44
"tags": ["contract", "api"],
55
"document": {
6-
"path": "pdfs/fake_memo.pdf"
6+
"path": "pdf/fake_memo.pdf"
77
},
88
"extraction": {
99
"method": "sync",

fixtures/contract/api_extract_file_async.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"description": "Tests async file extraction API (extract_file)",
44
"tags": ["contract", "api"],
55
"document": {
6-
"path": "pdfs/fake_memo.pdf"
6+
"path": "pdf/fake_memo.pdf"
77
},
88
"extraction": {
99
"method": "async",

0 commit comments

Comments
 (0)