@@ -23,13 +23,13 @@ use crate::plugins::json_value_to_py;
2323///
2424/// Example:
2525/// >>> from kreuzberg import extract_file_sync, ExtractionConfig
26- /// >>> result = extract_file_sync("document.pdf", None, ExtractionConfig())
27- /// >>> print(result.content)
28- /// >>> print(result.metadata)
29- /// >>> print(len(result.tables))
30- /// >>> if result.detected_languages:
26+ /// >>> result = extract_file_sync("document.pdf", None, ExtractionConfig()) # doctest: +SKIP
27+ /// >>> print(result.content) # doctest: +SKIP
28+ /// >>> print(result.metadata) # doctest: +SKIP
29+ /// >>> print(len(result.tables)) # doctest: +SKIP
30+ /// >>> if result.detected_languages: # doctest: +SKIP
3131/// ... print(result.detected_languages)
32- /// >>> if result.document:
32+ /// >>> if result.document: # doctest: +SKIP
3333/// ... print(f"Document has {len(result.document['nodes'])} nodes")
3434#[ pyclass( name = "ExtractionResult" , module = "kreuzberg" ) ]
3535pub struct ExtractionResult {
@@ -167,9 +167,9 @@ impl ExtractionResult {
167167 /// int: Total page count
168168 ///
169169 /// Example:
170- /// >>> result = extract_file_sync("document.pdf", None, ExtractionConfig())
171- /// >>> page_count = result.get_page_count()
172- /// >>> print(f"Document has {page_count} pages")
170+ /// >>> result = extract_file_sync("document.pdf", None, ExtractionConfig()) # doctest: +SKIP
171+ /// >>> page_count = result.get_page_count() # doctest: +SKIP
172+ /// >>> print(f"Document has {page_count} pages") # doctest: +SKIP
173173 #[ pyo3( name = "get_page_count" ) ]
174174 fn get_page_count ( & self ) -> usize {
175175 Python :: attach ( |py| self . pages . as_ref ( ) . map ( |pages_py| pages_py. bind ( py) . len ( ) ) . unwrap_or ( 0 ) )
@@ -186,9 +186,9 @@ impl ExtractionResult {
186186 /// Example:
187187 /// >>> from kreuzberg import ChunkingConfig, ExtractionConfig
188188 /// >>> config = ExtractionConfig(chunking=ChunkingConfig(max_chars=500))
189- /// >>> result = extract_file_sync("document.pdf", None, config)
190- /// >>> chunk_count = result.get_chunk_count()
191- /// >>> print(f"Document has {chunk_count} chunks")
189+ /// >>> result = extract_file_sync("document.pdf", None, config) # doctest: +SKIP
190+ /// >>> chunk_count = result.get_chunk_count() # doctest: +SKIP
191+ /// >>> print(f"Document has {chunk_count} chunks") # doctest: +SKIP
192192 #[ pyo3( name = "get_chunk_count" ) ]
193193 fn get_chunk_count ( & self ) -> usize {
194194 Python :: attach ( |py| {
@@ -212,9 +212,9 @@ impl ExtractionResult {
212212 /// >>> config = ExtractionConfig(
213213 /// ... language_detection=LanguageDetectionConfig(enabled=True)
214214 /// ... )
215- /// >>> result = extract_file_sync("document.pdf", None, config)
216- /// >>> lang = result.get_detected_language()
217- /// >>> if lang:
215+ /// >>> result = extract_file_sync("document.pdf", None, config) # doctest: +SKIP
216+ /// >>> lang = result.get_detected_language() # doctest: +SKIP
217+ /// >>> if lang: # doctest: +SKIP
218218 /// ... print(f"Document language: {lang}")
219219 #[ pyo3( name = "get_detected_language" ) ]
220220 fn get_detected_language ( & self ) -> Option < String > {
@@ -242,12 +242,12 @@ impl ExtractionResult {
242242 /// Any | None: Field value (type depends on field), or None if not found
243243 ///
244244 /// Example:
245- /// >>> result = extract_file_sync("document.pdf", None, ExtractionConfig())
246- /// >>> title = result.get_metadata_field("title")
247- /// >>> if title:
245+ /// >>> result = extract_file_sync("document.pdf", None, ExtractionConfig()) # doctest: +SKIP
246+ /// >>> title = result.get_metadata_field("title") # doctest: +SKIP
247+ /// >>> if title: # doctest: +SKIP
248248 /// ... print(f"Title: {title}")
249- /// >>> authors = result.get_metadata_field("authors")
250- /// >>> if authors:
249+ /// >>> authors = result.get_metadata_field("authors") # doctest: +SKIP
250+ /// >>> if authors: # doctest: +SKIP
251251 /// ... print(f"Authors: {authors}")
252252 #[ pyo3( name = "get_metadata_field" ) ]
253253 fn get_metadata_field ( & self , field_name : & str ) -> PyResult < Option < Py < PyAny > > > {
@@ -786,8 +786,8 @@ mod tests {
786786/// Example:
787787/// >>> from kreuzberg import ChunkingConfig, ExtractionConfig
788788/// >>> config = ExtractionConfig(chunking=ChunkingConfig(max_chars=500))
789- /// >>> result = extract_file_sync("document.pdf", None, config)
790- /// >>> for chunk in result.chunks:
789+ /// >>> result = extract_file_sync("document.pdf", None, config) # doctest: +SKIP
790+ /// >>> for chunk in result.chunks: # doctest: +SKIP
791791/// ... print(f"Chunk: {chunk.content[:50]}...")
792792/// ... print(f"Metadata: {chunk.metadata}")
793793#[ pyclass( name = "Chunk" , module = "kreuzberg" ) ]
@@ -842,8 +842,8 @@ impl PyChunk {
842842/// page_number (int): Page number where table was found
843843///
844844/// Example:
845- /// >>> result = extract_file_sync("document.pdf", None, ExtractionConfig())
846- /// >>> for table in result.tables:
845+ /// >>> result = extract_file_sync("document.pdf", None, ExtractionConfig()) # doctest: +SKIP
846+ /// >>> for table in result.tables: # doctest: +SKIP
847847/// ... print(f"Table on page {table.page_number}:")
848848/// ... print(table.markdown)
849849/// ... print(f"Dimensions: {len(table.cells)} rows x {len(table.cells[0])} cols")
0 commit comments