28
28
import java .util .ArrayList ;
29
29
import java .util .Date ;
30
30
import java .util .Locale ;
31
+ import org .apache .lucene .benchmark .Constants ;
31
32
import org .apache .lucene .benchmark .byTask .utils .Config ;
32
33
33
34
/**
@@ -50,8 +51,8 @@ private static final class DateFormatInfo {
50
51
private ThreadLocal <DateFormatInfo > dateFormat = new ThreadLocal <>();
51
52
private Path dataDir = null ;
52
53
private ArrayList <Path > inputFiles = new ArrayList <>();
53
- private int nextFile = 0 ;
54
- private int iteration = 0 ;
54
+ private int [] docCountArr ;
55
+ private volatile boolean docCountArrCreated ;
55
56
56
57
@ Override
57
58
public void setConfig (Config config ) {
@@ -100,21 +101,35 @@ public void close() throws IOException {
100
101
101
102
@ Override
102
103
public DocData getNextDocData (DocData docData ) throws NoMoreDataException , IOException {
103
- Path f = null ;
104
- String name = null ;
105
- synchronized (this ) {
106
- if (nextFile >= inputFiles .size ()) {
107
- // exhausted files, start a new round, unless forever set to false.
108
- if (!forever ) {
109
- throw new NoMoreDataException ();
110
- }
111
- nextFile = 0 ;
112
- iteration ++;
113
- }
114
- f = inputFiles .get (nextFile ++);
115
- name = f .toRealPath () + "_" + iteration ;
104
+ if (docCountArrCreated == false ) {
105
+ docCountArrInit ();
116
106
}
117
107
108
+ int threadIndexSize = Thread .currentThread ().getName ().length ();
109
+ int parallelTaskThreadSize = Constants .PARALLEL_TASK_THREAD_NAME_PREFIX .length ();
110
+
111
+ // Extract ThreadIndex from unique ThreadName which is set with '"ParallelTaskThread-"+index',
112
+ // in TaskSequence.java's doParallelTasks()
113
+ int threadIndex =
114
+ Integer .parseInt (
115
+ Thread .currentThread ()
116
+ .getName ()
117
+ .substring (parallelTaskThreadSize + 1 , threadIndexSize ));
118
+
119
+ assert (threadIndex >= 0 && threadIndex < docCountArr .length )
120
+ : "Please check threadIndex or docCountArr length" ;
121
+ int stride = threadIndex + docCountArr [threadIndex ] * docCountArr .length ;
122
+ int inFileSize = inputFiles .size ();
123
+
124
+ // Modulo Operator covers all three possible senarios i.e. 1. If inputFiles.size() < Num Of
125
+ // Threads 2.inputFiles.size() == Num Of Threads 3.inputFiles.size() > Num Of Threads
126
+ int fileIndex = stride % inFileSize ;
127
+ int iteration = stride / inFileSize ;
128
+ docCountArr [threadIndex ]++;
129
+
130
+ Path f = inputFiles .get (fileIndex );
131
+ String name = f .toRealPath () + "_" + iteration ;
132
+
118
133
try (BufferedReader reader = Files .newBufferedReader (f , StandardCharsets .UTF_8 )) {
119
134
// First line is the date, 3rd is the title, rest is body
120
135
String dateStr = reader .readLine ();
@@ -143,7 +158,12 @@ public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOExc
143
158
@ Override
144
159
public synchronized void resetInputs () throws IOException {
145
160
super .resetInputs ();
146
- nextFile = 0 ;
147
- iteration = 0 ;
161
+ }
162
+
163
+ private synchronized void docCountArrInit () {
164
+ if (docCountArrCreated == false ) {
165
+ docCountArr = new int [getConfig ().getNumThreads ()];
166
+ docCountArrCreated = true ;
167
+ }
148
168
}
149
169
}
0 commit comments