11package ai .docling .serve .api ;
22
3- import java .util .concurrent .CompletableFuture ;
3+ import java .nio .file .Path ;
4+ import java .util .Optional ;
5+ import java .util .concurrent .CompletionStage ;
6+
7+ import org .jspecify .annotations .Nullable ;
48
59import ai .docling .serve .api .chunk .request .HierarchicalChunkDocumentRequest ;
610import ai .docling .serve .api .chunk .request .HybridChunkDocumentRequest ;
711import ai .docling .serve .api .chunk .response .ChunkDocumentResponse ;
12+ import ai .docling .serve .api .util .FileUtils ;
13+ import ai .docling .serve .api .util .ValidationUtils ;
814
915/**
1016 * Represents the Docling Serve Chunk API, providing methods for processing document sources
@@ -19,21 +25,120 @@ public interface DoclingServeChunkApi {
1925 ChunkDocumentResponse chunkSourceWithHierarchicalChunker (HierarchicalChunkDocumentRequest request );
2026
2127 /**
22- * Converts and chunks the provided document source(s) into a processed document based on the specified options
23- * and using a hybrid chunker for splitting the document into smaller chunks.
28+ * Processes and chunks the specified files into smaller, structured pieces
29+ * using a hierarchical chunker. This method internally delegates the processing
30+ * to another overloaded method with default options for the hierarchical chunker.
31+ *
32+ * @param files the files to be processed and chunked using the hierarchical chunker
33+ * @return a {@link ChunkDocumentResponse} containing the processed chunks, optionally the
34+ * converted documents, and associated metadata
35+ */
36+ default ChunkDocumentResponse chunkFilesWithHierarchicalChunker (Path ... files ) {
37+ return chunkFilesWithHierarchicalChunker (null , files );
38+ }
39+
40+ /**
41+ * Processes and chunks the specified files into smaller, structured pieces
42+ * using a hierarchical chunker. This method utilizes a provided hierarchical
43+ * chunk request, applying additional configurations if needed during the
44+ * creation of the chunking request.
45+ *
46+ * @param request the request containing configurations and options for hierarchical
47+ * chunking. It may include settings for conversion, chunking parameters,
48+ * and optional output specifications. Can be null to use default options.
49+ * @param files the files to be processed and chunked using the hierarchical chunker.
50+ * @return a {@link ChunkDocumentResponse} containing the processed chunks, optionally the
51+ * converted documents, and associated metadata.
52+ */
53+ default ChunkDocumentResponse chunkFilesWithHierarchicalChunker (@ Nullable HierarchicalChunkDocumentRequest request , Path ... files ) {
54+ return chunkSourceWithHierarchicalChunker (createHierarchicalChunkRequest (request , files ));
55+ }
56+
57+ /**
58+ * Processes and chunks the provided document source(s) into smaller documents
59+ * using a hybrid chunking strategy. The method utilizes the specified hybrid
60+ * chunker options to split and process the input request.
61+ *
62+ * @param request the request containing the document source(s), conversion options, hybrid
63+ * chunker configurations, and optional specifications for output targets
64+ * @return a {@link ChunkDocumentResponse} containing the processed chunks, optionally the
65+ * converted document, and other relevant metadata
2466 */
2567 ChunkDocumentResponse chunkSourceWithHybridChunker (HybridChunkDocumentRequest request );
2668
69+ /**
70+ * Processes and chunks the specified files into smaller, structured pieces
71+ * using a hybrid chunking strategy. This method delegates the processing
72+ * to another overloaded method with default options for the hybrid chunker.
73+ *
74+ * @param files the files to be processed and chunked using the hybrid chunker
75+ * @return a {@link ChunkDocumentResponse} containing the processed chunks,
76+ * optionally the converted documents, and associated metadata
77+ */
78+ default ChunkDocumentResponse chunkFilesWithHybridChunker (Path ... files ) {
79+ return chunkFilesWithHybridChunker (null , files );
80+ }
81+
82+ /**
83+ * Processes and chunks the specified files into smaller, structured pieces
84+ * using a hybrid chunking strategy. The method converts the input files into
85+ * a hybrid chunk request and processes them to generate a structured representation
86+ * of the content.
87+ *
88+ * @param request the request containing configurations for processing, including
89+ * conversion options, hybrid chunking parameters, and optional
90+ * specifications for output targets. Can be null to use default options.
91+ * @param files the files to be processed and chunked using the hybrid chunking strategy.
92+ * @return a {@code ChunkDocumentResponse} containing the processed chunks, optionally the
93+ * converted documents, and associated metadata.
94+ */
95+ default ChunkDocumentResponse chunkFilesWithHybridChunker (@ Nullable HybridChunkDocumentRequest request , Path ... files ) {
96+ return chunkSourceWithHybridChunker (createHybridChunkRequest (request , files ));
97+ }
98+
2799 /**
28100 * Asynchronously processes the provided document source(s) by converting and chunking them
29101 * into smaller documents using the hierarchical chunker. This operation allows for handling
30102 * large document processing tasks without blocking the caller thread.
31103 *
32104 * @param request the request containing the document source(s) and options for hierarchical chunking
33- * @return a CompletableFuture that resolves to a {@link ChunkDocumentResponse}, which contains
105+ * @return a {@link CompletionStage} that resolves to a {@link ChunkDocumentResponse}, which contains
34106 * the processed chunks, optionally the converted document, and processing metadata
35107 */
36- CompletableFuture <ChunkDocumentResponse > chunkSourceWithHierarchicalChunkerAsync (HierarchicalChunkDocumentRequest request );
108+ CompletionStage <ChunkDocumentResponse > chunkSourceWithHierarchicalChunkerAsync (HierarchicalChunkDocumentRequest request );
109+
110+ /**
111+ * Asynchronously processes and chunks the specified files into smaller, structured pieces
112+ * using a hierarchical chunker. This method delegates the processing to another overloaded
113+ * method with default options for the hierarchical chunker, leveraging non-blocking
114+ * asynchronous execution.
115+ *
116+ * @param files the files to be processed and chunked using the hierarchical chunker
117+ * @return a {@link CompletionStage} resolving to a {@link ChunkDocumentResponse}, which
118+ * includes the processed chunks, optionally the converted documents, and associated
119+ * metadata
120+ */
121+ default CompletionStage <ChunkDocumentResponse > chunkFilesWithHierarchicalChunkerAsync (Path ... files ) {
122+ return chunkFilesWithHierarchicalChunkerAsync (null , files );
123+ }
124+
125+ /**
126+ * Asynchronously processes and chunks the specified files into smaller, structured pieces
127+ * using a hierarchical chunker. This method allows for non-blocking execution by delegating
128+ * the processing to an underlying method that handles hierarchical chunking configurations
129+ * and file chunking.
130+ *
131+ * @param request the request object containing configurations, options for hierarchical
132+ * chunking, and optional specifications for output targets. Can be null
133+ * to use default options for processing.
134+ * @param files the files to be processed and chunked using the hierarchical chunker.
135+ * @return a {@link CompletionStage} that resolves to a {@link ChunkDocumentResponse},
136+ * which includes the processed chunks, optionally the converted documents,
137+ * and associated metadata.
138+ */
139+ default CompletionStage <ChunkDocumentResponse > chunkFilesWithHierarchicalChunkerAsync (@ Nullable HierarchicalChunkDocumentRequest request , Path ... files ) {
140+ return chunkSourceWithHierarchicalChunkerAsync (createHierarchicalChunkRequest (request , files ));
141+ }
37142
38143 /**
39144 * Asynchronously processes the provided document source(s) by converting and chunking them
@@ -42,8 +147,60 @@ public interface DoclingServeChunkApi {
42147 *
43148 * @param request the request containing the document source(s), options for conversion,
44149 * hybrid chunking parameters, and optional specifications for output targets
45- * @return a CompletableFuture that resolves to a {@link ChunkDocumentResponse}, which includes
150+ * @return a {@link CompletionStage} that resolves to a {@link ChunkDocumentResponse}, which includes
46151 * the processed chunks, optionally the converted document, and relevant processing metadata
47152 */
48- CompletableFuture <ChunkDocumentResponse > chunkSourceWithHybridChunkerAsync (HybridChunkDocumentRequest request );
153+ CompletionStage <ChunkDocumentResponse > chunkSourceWithHybridChunkerAsync (HybridChunkDocumentRequest request );
154+
155+ /**
156+ * Asynchronously processes and chunks the provided files using a hybrid chunking strategy.
157+ *
158+ * @param files An array of file paths to be processed and chunked. Each path should represent
159+ * a valid file location.
160+ * @return A CompletionStage that, when completed, holds a ChunkDocumentResponse containing
161+ * the results of the chunking operation.
162+ */
163+ default CompletionStage <ChunkDocumentResponse > chunkFilesWithHybridChunkerAsync (Path ... files ) {
164+ return chunkFilesWithHybridChunkerAsync (null , files );
165+ }
166+
167+ /**
168+ * Asynchronously processes and chunks the given files using a hybrid chunking mechanism.
169+ *
170+ * @param request An optional {@code HybridChunkDocumentRequest} containing configuration details for chunking.
171+ * If {@code null}, default settings will be applied.
172+ * @param files A varargs array of {@code Path} objects representing the files to be chunked.
173+ * Must not be null or empty.
174+ * @return A {@code CompletionStage<ChunkDocumentResponse>} that completes with the resulting
175+ * {@code ChunkDocumentResponse} once the chunking operation is finished.
176+ */
177+ default CompletionStage <ChunkDocumentResponse > chunkFilesWithHybridChunkerAsync (@ Nullable HybridChunkDocumentRequest request , Path ... files ) {
178+ return chunkSourceWithHybridChunkerAsync (createHybridChunkRequest (request , files ));
179+ }
180+
181+ private HierarchicalChunkDocumentRequest createHierarchicalChunkRequest (@ Nullable HierarchicalChunkDocumentRequest request , Path ... files ) {
182+ ValidationUtils .ensureNotEmpty (files , "files" );
183+
184+ var builder = Optional .ofNullable (request )
185+ .map (HierarchicalChunkDocumentRequest ::toBuilder )
186+ .orElseGet (HierarchicalChunkDocumentRequest ::builder );
187+
188+ FileUtils .createFileSources (files )
189+ .forEach (builder ::source );
190+
191+ return builder .build ();
192+ }
193+
194+ private HybridChunkDocumentRequest createHybridChunkRequest (@ Nullable HybridChunkDocumentRequest request , Path ... files ) {
195+ ValidationUtils .ensureNotEmpty (files , "files" );
196+
197+ var builder = Optional .ofNullable (request )
198+ .map (HybridChunkDocumentRequest ::toBuilder )
199+ .orElseGet (HybridChunkDocumentRequest ::builder );
200+
201+ FileUtils .createFileSources (files )
202+ .forEach (builder ::source );
203+
204+ return builder .build ();
205+ }
49206}
0 commit comments