Skip to content

Commit 2c8b21b

Browse files
committed
fix(tests): update test document paths after directory refactoring
Update test file paths across all language bindings to reflect the reorganized test_documents directory structure: - pdfs_with_tables/ → pdf/ - pdfs/ → pdf/ - tables/ → images/ - legacy_office/ → doc/ - office/ → docx/ - orgmode/ → org/ - web/ → html/ - pandoc/ → markdown/ - misc/ → org/, epub/ - data_formats/ → json/, yaml/ - documents/ → docx/ Fix broken symlinks in test_documents/ directory. Add validate_ground_truth.py script for benchmark validation. Affected bindings: Rust, TypeScript, C#, PHP, Ruby, Python, Go, Java, Elixir, WASM
1 parent 6afdc2b commit 2c8b21b

File tree

85 files changed

+655
-502
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+655
-502
lines changed

crates/kreuzberg-node/tests/binding/batch-operations.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ let sampleTxtBytes: Uint8Array;
3434

3535
beforeAll(() => {
3636
// Get test documents
37-
samplePdfPath = getTestDocumentPath("pdfs/tiny.pdf");
37+
samplePdfPath = getTestDocumentPath("pdf/tiny.pdf");
3838
sampleDocxPath = getTestDocumentPath("documents/sample.docx");
3939
sampleTxtPath = getTestDocumentPath("text/sample.txt");
4040

crates/kreuzberg-node/tests/binding/extraction-functions.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ beforeAll(() => {
1919
// Resolve symlinks to get the actual file path (important for Windows compatibility)
2020
samplePdfBytes = new Uint8Array(readFileSync(realpathSync(samplePdfPath)));
2121

22-
sampleTextPath = getTestDocumentPath("pandoc/simple_metadata.md");
22+
sampleTextPath = getTestDocumentPath("markdown/simple_metadata.md");
2323
sampleTextBytes = new Uint8Array(readFileSync(realpathSync(sampleTextPath)));
2424
});
2525

crates/kreuzberg-node/tests/binding/helpers.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ describe("Helper Functions and Edge Cases", () => {
168168

169169
describe("Edge cases", () => {
170170
it("should handle very small files", () => {
171-
const textPath = getTestDocumentPath("pandoc/simple_metadata.md");
171+
const textPath = getTestDocumentPath("markdown/simple_metadata.md");
172172
const result = extractFileSync(textPath, null, null);
173173

174174
expect(result.content).toBeTruthy();

crates/kreuzberg-node/tests/binding/images.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ let pptxPath: string;
2626
let docxPath: string;
2727

2828
beforeAll(() => {
29-
samplePdfPath = getTestDocumentPath("pdfs/embedded_images_tables.pdf");
29+
samplePdfPath = getTestDocumentPath("pdf/embedded_images_tables.pdf");
3030
// Resolve symlinks to get the actual file path (important for Windows compatibility)
3131
samplePdfBytes = new Uint8Array(readFileSync(realpathSync(samplePdfPath)));
3232

crates/kreuzberg-node/tests/binding/metadata-types.spec.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,8 +392,8 @@ describe("Metadata Types - Type Compatibility Tests", () => {
392392
});
393393

394394
it("test_extract_html_file_integration", () => {
395-
const htmlPath = getTestDocumentPath("web/taylor_swift.html");
396-
const buffer = loadTestDocument("web/taylor_swift.html");
395+
const htmlPath = getTestDocumentPath("html/taylor_swift.html");
396+
const buffer = loadTestDocument("html/taylor_swift.html");
397397

398398
const result = extractBytesSync(buffer, "text/html", null);
399399

crates/kreuzberg-node/tests/binding/sync-async-coverage.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ beforeAll(() => {
3737
const resolvedPdfPath = realpathSync(samplePdfPath);
3838
samplePdfBytes = new Uint8Array(readFileSync(resolvedPdfPath));
3939

40-
sampleTextPath = getTestDocumentPath("pandoc/simple_metadata.md");
40+
sampleTextPath = getTestDocumentPath("markdown/simple_metadata.md");
4141
const resolvedTextPath = realpathSync(sampleTextPath);
4242
sampleTextBytes = new Uint8Array(readFileSync(resolvedTextPath));
4343
});

crates/kreuzberg-node/tests/binding/tables.spec.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ let tinyPdfBytes: Uint8Array;
2929

3030
beforeAll(() => {
3131
// Use PDFs with tables for testing
32-
tinyPdfPath = getTestDocumentPath("pdfs_with_tables/tiny.pdf");
33-
mediumPdfPath = getTestDocumentPath("pdfs_with_tables/medium.pdf");
34-
largePdfPath = getTestDocumentPath("pdfs_with_tables/large.pdf");
32+
tinyPdfPath = getTestDocumentPath("pdf/tiny.pdf");
33+
mediumPdfPath = getTestDocumentPath("pdf/medium.pdf");
34+
largePdfPath = getTestDocumentPath("pdf/large.pdf");
3535

3636
try {
3737
// Resolve symlinks to get the actual file path (important for Windows compatibility)

crates/kreuzberg-tesseract/tests/integration_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ fn test_ocr_on_table_image() {
108108
api.set_variable("tessedit_pageseg_mode", "1")
109109
.expect("Failed to set PSM");
110110

111-
let (image_data, width, height) = load_test_image("tables/simple_table.png").expect("Failed to load test image");
111+
let (image_data, width, height) = load_test_image("images/simple_table.png").expect("Failed to load test image");
112112
api.set_image(&image_data, width as i32, height as i32, 3, 3 * width as i32)
113113
.expect("Failed to set image");
114114

crates/kreuzberg-wasm/typescript/pages.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ beforeAll(async () => {
2222
await initWasm();
2323

2424
// Load test PDF file (path relative to crates/kreuzberg-wasm)
25-
const pdfPath = join(process.cwd(), "../../test_documents/pdfs/embedded_images_tables.pdf");
25+
const pdfPath = join(process.cwd(), "../../test_documents/pdf/embedded_images_tables.pdf");
2626
try {
2727
samplePdfBytes = new Uint8Array(readFileSync(pdfPath));
2828
} catch {

crates/kreuzberg-wasm/typescript/tables.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ let samplePdfBytes: Uint8Array;
1515
beforeAll(async () => {
1616
await initWasm();
1717
// Load test PDF file (path relative to crates/kreuzberg-wasm)
18-
const pdfPath = join(process.cwd(), "../../test_documents/pdfs/embedded_images_tables.pdf");
18+
const pdfPath = join(process.cwd(), "../../test_documents/pdf/embedded_images_tables.pdf");
1919
try {
2020
samplePdfBytes = new Uint8Array(readFileSync(pdfPath));
2121
} catch {

0 commit comments

Comments
 (0)