Skip to content
This repository was archived by the owner on Sep 16, 2024. It is now read-only.

Commit e7f6bff

Browse files
committed
#88 AssetFileLoader now supports a batch size
1 parent 00ccc23 commit e7f6bff

File tree

2 files changed

+74
-8
lines changed

2 files changed

+74
-8
lines changed

src/main/java/com/marklogic/client/ext/file/GenericFileLoader.java

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,18 @@
1515
* Generic implementation of FileLoader. Delegates to a DocumentFileReader for reading from a set of file paths, and
1616
* delegates to a BatchWriter for writing to MarkLogic (where that BatchWriter could use XCC, the REST API, or the
1717
* Data Movement SDK in ML9).
18+
*
19+
* The batchSize property defaults to null, which means all files are written in one call via the BatchWriter. Setting
20+
* this means that the List of DocumentFile objects read from the DocumentFileReader will be written in batches, each
21+
* the size of the batchSize property, except for the final one that may be less than this size.
1822
*/
1923
public class GenericFileLoader extends LoggingObject implements FileLoader {
2024

2125
private DocumentFileReader documentFileReader;
2226
private BatchWriter batchWriter;
2327
private boolean waitForCompletion = true;
2428
private boolean logFileUris = true;
29+
private Integer batchSize;
2530

2631
// These are passed on to the DefaultDocumentFileReader that is created if one isn't set
2732
private List<FileFilter> fileFilters;
@@ -69,20 +74,54 @@ public List<DocumentFile> loadFiles(String... paths) {
6974

7075
List<DocumentFile> documentFiles = documentFileReader.readDocumentFiles(paths);
7176
if (documentFiles != null && !documentFiles.isEmpty()) {
77+
writeBatchOfDocuments(documentFiles, 0);
78+
if (waitForCompletion) {
79+
batchWriter.waitForCompletion();
80+
}
81+
}
82+
return documentFiles;
83+
}
84+
85+
/**
86+
* If batchSize is not set, then this method will load all the documents in one call to the BatchWriter. Otherwise,
87+
* this will divide up the list of documentFiles into batches matching the value of batchSize, with the last batch
88+
* possibly being less than batchSize.
89+
*
90+
* @param documentFiles
91+
* @param startPosition
92+
*/
93+
protected void writeBatchOfDocuments(List<DocumentFile> documentFiles, final int startPosition) {
94+
final int documentFilesSize = documentFiles.size();
95+
if (startPosition >= documentFilesSize) {
96+
return;
97+
}
98+
99+
if (batchSize != null && batchSize < 1) {
100+
batchSize = null;
101+
}
102+
103+
// The "end" param to subList below is exclusive, so the highest valid value is the list size
104+
int endPosition = batchSize == null ? documentFilesSize : startPosition + batchSize;
105+
if (endPosition > documentFilesSize) {
106+
endPosition = documentFilesSize;
107+
}
108+
109+
List<DocumentFile> batch = documentFiles.subList(startPosition, endPosition);
110+
if (!batch.isEmpty()) {
72111
if (logger.isInfoEnabled()) {
73-
logger.info(format("Writing %d files", documentFiles.size()));
74-
if (logFileUris ) {
75-
for (DocumentFile df : documentFiles) {
112+
logger.info(format("Writing %d files", batch.size()));
113+
if (logFileUris) {
114+
for (DocumentFile df : batch) {
76115
logger.info("Writing: " + df.getUri());
77116
}
78117
}
79118
}
80-
batchWriter.write(documentFiles);
81-
if (waitForCompletion) {
82-
batchWriter.waitForCompletion();
83-
}
119+
batchWriter.write(batch);
120+
}
121+
122+
if (endPosition < documentFilesSize) {
123+
writeBatchOfDocuments(documentFiles, endPosition);
84124
}
85-
return documentFiles;
86125
}
87126

88127
/**
@@ -228,4 +267,8 @@ public List<FileFilter> getFileFilters() {
228267
public List<DocumentFileProcessor> getDocumentFileProcessors() {
229268
return documentFileProcessors;
230269
}
270+
271+
public void setBatchSize(Integer batchSize) {
272+
this.batchSize = batchSize;
273+
}
231274
}

src/test/java/com/marklogic/client/ext/modulesloader/impl/LoadModulesTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,21 @@ public void replaceTokens() {
8585
assertTrue(transformText.contains("xdmp:log(\"hello-world\")"));
8686
}
8787

88+
@Test
89+
public void customBatchSize() {
90+
initializeModulesLoaderWithAssetBatchSize(2);
91+
verifyModuleCountWithPattern(".*/ext/.*", "Should load every file", 7);
92+
}
93+
94+
/**
95+
* Just ignoring an invalid batch size, which is anything less than 1.
96+
*/
97+
@Test
98+
public void invalidBatchSize() {
99+
initializeModulesLoaderWithAssetBatchSize(-1);
100+
verifyModuleCountWithPattern(".*/ext/.*", "Should load every file", 7);
101+
}
102+
88103
@Test
89104
public void withFilenamePattern() {
90105
verifyModuleCountWithPattern(".*options.*(xml)", "Should only load the single XML options file", 1);
@@ -160,4 +175,12 @@ private void assertModuleExists(String uri) {
160175
modulesClient.newServerEval().xquery(String.format("fn:doc-available('%s')", uri)).evalAs(String.class)
161176
);
162177
}
178+
179+
private void initializeModulesLoaderWithAssetBatchSize(int batchSize) {
180+
AssetFileLoader assetFileLoader = new AssetFileLoader(modulesClient);
181+
assetFileLoader.setBatchSize(batchSize);
182+
modulesLoader = new DefaultModulesLoader(assetFileLoader);
183+
modulesLoader.setModulesManager(null);
184+
}
185+
163186
}

0 commit comments

Comments
 (0)