Skip to content

Commit e71f29b

Browse files
committed
fix(node): add PdfAnnotation type and annotations mapping to TypeScript wrapper
The Node binding's TypeScript layer was missing the PdfAnnotation interface and annotations field on ExtractionResult, causing pdf_annotations e2e test to fail with "expected undefined to be defined".
1 parent 317da88 commit e71f29b

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

crates/kreuzberg-node/typescript/core/type-converters.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,12 @@ function convertResult(rawResult: unknown): ExtractionResult {
330330
returnObj.processingWarnings = processingWarningsData as Array<{ source: string; message: string }>;
331331
}
332332

333+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
334+
const annotationsData = result["annotations"];
335+
if (Array.isArray(annotationsData)) {
336+
returnObj.annotations = annotationsData as import("../types.js").PdfAnnotation[];
337+
}
338+
333339
return returnObj;
334340
}
335341

crates/kreuzberg-node/typescript/types.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,20 @@ export interface BoundingBox {
11241124
y1: number;
11251125
}
11261126

1127+
/**
1128+
* A PDF annotation extracted from a document page.
1129+
*/
1130+
export interface PdfAnnotation {
1131+
/** Type of annotation (e.g., "text", "highlight", "link", "underline") */
1132+
annotationType: string;
1133+
/** Text content of the annotation, if available */
1134+
content?: string | null;
1135+
/** Page number (1-indexed) where the annotation appears */
1136+
pageNumber: number;
1137+
/** Bounding box of the annotation on the page */
1138+
boundingBox?: BoundingBox | null;
1139+
}
1140+
11271141
/**
11281142
* Metadata for a semantic element.
11291143
*
@@ -1207,6 +1221,9 @@ export interface ExtractionResult {
12071221

12081222
/** Structured document tree when include_document_structure is enabled, null otherwise */
12091223
document?: Record<string, unknown> | null;
1224+
1225+
/** PDF annotations when extract_annotations is enabled, null otherwise */
1226+
annotations?: PdfAnnotation[] | null;
12101227
}
12111228

12121229
/** Post-processor execution stage in the extraction pipeline. */

0 commit comments

Comments
 (0)