Skip to content

Commit f1d54f7

Browse files
authored
Parallel processing (#132)
* Added a explicit Flush Task to flush data at Thread level once it completes the processing * Included explicit flush per Thread level * Done changes for parallel processing * Removed extra brace * Removed unused variable * Removed unused variable initialization * Did the required formating * Refactored the code and added required comments & checks
1 parent db26215 commit f1d54f7

File tree

4 files changed

+61
-18
lines changed

4 files changed

+61
-18
lines changed

lucene/benchmark/src/java/org/apache/lucene/benchmark/Constants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,6 @@ public class Constants {
2626
public static Boolean[] BOOLEANS = new Boolean[] {Boolean.FALSE, Boolean.TRUE};
2727

2828
public static final int DEFAULT_MAXIMUM_DOCUMENTS = Integer.MAX_VALUE;
29+
30+
public static final String PARALLEL_TASK_THREAD_NAME_PREFIX = "ParallelTaskThread";
2931
}

lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.ArrayList;
2929
import java.util.Date;
3030
import java.util.Locale;
31+
import org.apache.lucene.benchmark.Constants;
3132
import org.apache.lucene.benchmark.byTask.utils.Config;
3233

3334
/**
@@ -50,8 +51,8 @@ private static final class DateFormatInfo {
5051
private ThreadLocal<DateFormatInfo> dateFormat = new ThreadLocal<>();
5152
private Path dataDir = null;
5253
private ArrayList<Path> inputFiles = new ArrayList<>();
53-
private int nextFile = 0;
54-
private int iteration = 0;
54+
private int[] docCountArr;
55+
private volatile boolean docCountArrCreated;
5556

5657
@Override
5758
public void setConfig(Config config) {
@@ -100,21 +101,35 @@ public void close() throws IOException {
100101

101102
@Override
102103
public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
103-
Path f = null;
104-
String name = null;
105-
synchronized (this) {
106-
if (nextFile >= inputFiles.size()) {
107-
// exhausted files, start a new round, unless forever set to false.
108-
if (!forever) {
109-
throw new NoMoreDataException();
110-
}
111-
nextFile = 0;
112-
iteration++;
113-
}
114-
f = inputFiles.get(nextFile++);
115-
name = f.toRealPath() + "_" + iteration;
104+
if (docCountArrCreated == false) {
105+
docCountArrInit();
116106
}
117107

108+
int threadIndexSize = Thread.currentThread().getName().length();
109+
int parallelTaskThreadSize = Constants.PARALLEL_TASK_THREAD_NAME_PREFIX.length();
110+
111+
// Extract ThreadIndex from unique ThreadName which is set with '"ParallelTaskThread-"+index',
112+
// in TaskSequence.java's doParallelTasks()
113+
int threadIndex =
114+
Integer.parseInt(
115+
Thread.currentThread()
116+
.getName()
117+
.substring(parallelTaskThreadSize + 1, threadIndexSize));
118+
119+
assert (threadIndex >= 0 && threadIndex < docCountArr.length)
120+
: "Please check threadIndex or docCountArr length";
121+
int stride = threadIndex + docCountArr[threadIndex] * docCountArr.length;
122+
int inFileSize = inputFiles.size();
123+
124+
// Modulo Operator covers all three possible senarios i.e. 1. If inputFiles.size() < Num Of
125+
// Threads 2.inputFiles.size() == Num Of Threads 3.inputFiles.size() > Num Of Threads
126+
int fileIndex = stride % inFileSize;
127+
int iteration = stride / inFileSize;
128+
docCountArr[threadIndex]++;
129+
130+
Path f = inputFiles.get(fileIndex);
131+
String name = f.toRealPath() + "_" + iteration;
132+
118133
try (BufferedReader reader = Files.newBufferedReader(f, StandardCharsets.UTF_8)) {
119134
// First line is the date, 3rd is the title, rest is body
120135
String dateStr = reader.readLine();
@@ -143,7 +158,12 @@ public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOExc
143158
@Override
144159
public synchronized void resetInputs() throws IOException {
145160
super.resetInputs();
146-
nextFile = 0;
147-
iteration = 0;
161+
}
162+
163+
private synchronized void docCountArrInit() {
164+
if (docCountArrCreated == false) {
165+
docCountArr = new int[getConfig().getNumThreads()];
166+
docCountArrCreated = true;
167+
}
148168
}
149169
}

lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.ArrayList;
2121
import java.util.List;
2222
import java.util.Locale;
23+
import org.apache.lucene.benchmark.Constants;
2324
import org.apache.lucene.benchmark.byTask.PerfRunData;
2425
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
2526
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
@@ -340,12 +341,23 @@ private int doParallelTasks() throws Exception {
340341

341342
initTasksArray();
342343
ParallelTask t[] = runningParallelTasks = new ParallelTask[repetitions * tasks.size()];
344+
// Get number of parallel threads from algo file and set it to use in ReuersContentSource.java's
345+
// docCountArrInit()
346+
this.getRunData().getConfig().setNumThreads(t.length);
343347
// prepare threads
344348
int index = 0;
345349
for (int k = 0; k < repetitions; k++) {
346350
for (int i = 0; i < tasksArray.length; i++) {
347351
final PerfTask task = tasksArray[i].clone();
348-
t[index++] = new ParallelTask(task);
352+
t[index] = new ParallelTask(task);
353+
// Setting unique ThreadName with index value which is used in ReuersContentSource.java's
354+
// getNextDocData().Please make changes
355+
// in ReuersContentSource.java's getNextDocData() for
356+
// Integer.parseInt(Thread.currentThread().getName().substring(parallelTaskThreadSize + 1,
357+
// threadIndexSize))
358+
// before making any modifications here
359+
t[index].setName(Constants.PARALLEL_TASK_THREAD_NAME_PREFIX + "-" + index);
360+
index++;
349361
}
350362
}
351363
// run threads

lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public class Config {
5454
private HashMap<String, Object> valByRound = new HashMap<>();
5555
private HashMap<String, String> colForValByRound = new HashMap<>();
5656
private String algorithmText;
57+
private int numThreads = 1;
5758

5859
/**
5960
* Read both algorithm and config properties.
@@ -113,6 +114,14 @@ public Config(Properties props) {
113114
}
114115
}
115116

117+
public void setNumThreads(int numThreads) {
118+
this.numThreads = numThreads;
119+
}
120+
121+
public int getNumThreads() {
122+
return numThreads;
123+
}
124+
116125
@SuppressWarnings({"unchecked", "rawtypes"})
117126
private void printProps() {
118127
System.out.println("------------> config properties:");

0 commit comments

Comments
 (0)