|
15 | 15 | * Generic implementation of FileLoader. Delegates to a DocumentFileReader for reading from a set of file paths, and |
16 | 16 | * delegates to a BatchWriter for writing to MarkLogic (where that BatchWriter could use XCC, the REST API, or the |
17 | 17 | * Data Movement SDK in ML9). |
| 18 | + * |
| 19 | + * The batchSize property defaults to null, which means all files are written in one call via the BatchWriter. Setting |
| 20 | + * this means that the List of DocumentFile objects read from the DocumentFileReader will be written in batches, each |
| 21 | + * the size of the batchSize property, except for the final one that may be less than this size. |
18 | 22 | */ |
19 | 23 | public class GenericFileLoader extends LoggingObject implements FileLoader { |
20 | 24 |
|
21 | 25 | private DocumentFileReader documentFileReader; |
22 | 26 | private BatchWriter batchWriter; |
23 | 27 | private boolean waitForCompletion = true; |
24 | 28 | private boolean logFileUris = true; |
| 29 | + private Integer batchSize; |
25 | 30 |
|
26 | 31 | // These are passed on to the DefaultDocumentFileReader that is created if one isn't set |
27 | 32 | private List<FileFilter> fileFilters; |
@@ -69,20 +74,54 @@ public List<DocumentFile> loadFiles(String... paths) { |
69 | 74 |
|
70 | 75 | List<DocumentFile> documentFiles = documentFileReader.readDocumentFiles(paths); |
71 | 76 | if (documentFiles != null && !documentFiles.isEmpty()) { |
| 77 | + writeBatchOfDocuments(documentFiles, 0); |
| 78 | + if (waitForCompletion) { |
| 79 | + batchWriter.waitForCompletion(); |
| 80 | + } |
| 81 | + } |
| 82 | + return documentFiles; |
| 83 | + } |
| 84 | + |
| 85 | + /** |
| 86 | + * If batchSize is not set, then this method will load all the documents in one call to the BatchWriter. Otherwise, |
| 87 | + * this will divide up the list of documentFiles into batches matching the value of batchSize, with the last batch |
| 88 | + * possibly being less than batchSize. |
| 89 | + * |
| 90 | + * @param documentFiles |
| 91 | + * @param startPosition |
| 92 | + */ |
| 93 | + protected void writeBatchOfDocuments(List<DocumentFile> documentFiles, final int startPosition) { |
| 94 | + final int documentFilesSize = documentFiles.size(); |
| 95 | + if (startPosition >= documentFilesSize) { |
| 96 | + return; |
| 97 | + } |
| 98 | + |
| 99 | + if (batchSize != null && batchSize < 1) { |
| 100 | + batchSize = null; |
| 101 | + } |
| 102 | + |
| 103 | + // The "end" param to subList below is exclusive, so the highest valid value is the list size |
| 104 | + int endPosition = batchSize == null ? documentFilesSize : startPosition + batchSize; |
| 105 | + if (endPosition > documentFilesSize) { |
| 106 | + endPosition = documentFilesSize; |
| 107 | + } |
| 108 | + |
| 109 | + List<DocumentFile> batch = documentFiles.subList(startPosition, endPosition); |
| 110 | + if (!batch.isEmpty()) { |
72 | 111 | if (logger.isInfoEnabled()) { |
73 | | - logger.info(format("Writing %d files", documentFiles.size())); |
74 | | - if (logFileUris ) { |
75 | | - for (DocumentFile df : documentFiles) { |
| 112 | + logger.info(format("Writing %d files", batch.size())); |
| 113 | + if (logFileUris) { |
| 114 | + for (DocumentFile df : batch) { |
76 | 115 | logger.info("Writing: " + df.getUri()); |
77 | 116 | } |
78 | 117 | } |
79 | 118 | } |
80 | | - batchWriter.write(documentFiles); |
81 | | - if (waitForCompletion) { |
82 | | - batchWriter.waitForCompletion(); |
83 | | - } |
| 119 | + batchWriter.write(batch); |
| 120 | + } |
| 121 | + |
| 122 | + if (endPosition < documentFilesSize) { |
| 123 | + writeBatchOfDocuments(documentFiles, endPosition); |
84 | 124 | } |
85 | | - return documentFiles; |
86 | 125 | } |
87 | 126 |
|
88 | 127 | /** |
@@ -228,4 +267,8 @@ public List<FileFilter> getFileFilters() { |
228 | 267 | public List<DocumentFileProcessor> getDocumentFileProcessors() { |
229 | 268 | return documentFileProcessors; |
230 | 269 | } |
| 270 | + |
| 271 | + public void setBatchSize(Integer batchSize) { |
| 272 | + this.batchSize = batchSize; |
| 273 | + } |
231 | 274 | } |
0 commit comments