3939import java .util .ArrayList ;
4040import java .util .Arrays ;
4141import java .util .List ;
42+ import java .util .Locale ;
4243import java .util .Optional ;
4344import java .util .Random ;
4445import java .util .function .Function ;
@@ -138,7 +139,11 @@ public ThresholdedRandomCutForest(Builder<?> builder) {
138139 lastAnomalyDescriptor = new RCFComputeDescriptor (null , 0 , builder .forestMode , builder .transformMethod ,
139140 builder .imputationMethod );
140141
141- predictorCorrector .setAbsoluteThreshold (builder .lowerThreshold .orElse (DEFAULT_ABSOLUTE_THRESHOLD ));
142+ // when autoAdjust is true, the lowerThreshold is dynamically calculated
143+ if (!builder .autoAdjust ) {
144+ predictorCorrector .setAbsoluteThreshold (builder .lowerThreshold .orElse (DEFAULT_ABSOLUTE_THRESHOLD ));
145+ }
146+
142147 predictorCorrector .setZfactor (builder .zFactor );
143148
144149 predictorCorrector .setScoreDifferencing (builder .scoreDifferencing .orElse (DEFAULT_SCORE_DIFFERENCING ));
@@ -279,8 +284,7 @@ public AnomalyDescriptor process(double[] inputPoint, long timestamp, int[] miss
279284 * of the word batch -- the entire goal of this procedure is to provide
280285 * sequential processing and not standard batch processing). The procedure
281286 * avoids transfer of ephemeral transient objects for non-anomalies and thereby
282- * can have additional benefits. At the moment the operation does not support
283- * external timestamps.
287+ * can have additional benefits.
284288 *
285289 * @param data a vectors of vectors (each of which has to have the same
286290 * inputLength)
@@ -289,6 +293,66 @@ public AnomalyDescriptor process(double[] inputPoint, long timestamp, int[] miss
289293 * @return collection of descriptors of the anomalies filtered by the condition
290294 */
291295 public List <AnomalyDescriptor > processSequentially (double [][] data , Function <AnomalyDescriptor , Boolean > filter ) {
296+ if (data == null || data .length == 0 ) {
297+ return new ArrayList <>();
298+ }
299+
300+ long timestamp = preprocessor .getInternalTimeStamp ();
301+ long [] timestamps = new long [data .length ];
302+ for (int i = 0 ; i < data .length ; i ++) {
303+ timestamps [i ] = ++timestamp ;
304+ }
305+
306+ return processSequentially (data , timestamps , filter );
307+ }
308+
309+ /**
310+ * the following function processes a list of vectors sequentially; the main
311+ * benefit of this invocation is the caching is persisted from one data point to
312+ * another and thus the execution is efficient. Moreover in many scenarios where
313+ * serialization deserialization is expensive then it may be of benefit of
314+ * invoking sequential process on a contiguous chunk of input (we avoid the use
315+ * of the word batch -- the entire goal of this procedure is to provide
316+ * sequential processing and not standard batch processing). The procedure
317+ * avoids transfer of ephemeral transient objects for non-anomalies and thereby
318+ * can have additional benefits. At the moment the operation does not support
319+ * external timestamps.
320+ *
321+ * @param data a vectors of vectors (each of which has to have the same
322+ * inputLength)
323+ * @param timestamps a vector of timestamps (in the same order as the data, has
324+ * to be same length as data, and ascending)
325+ * @param filter a condition to drop desriptor (recommended filter:
326+ * anomalyGrade positive)
327+ * @return collection of descriptors of the anomalies filtered by the condition
328+ * @throws IllegalArgumentException if
329+ * <ul>
330+ * <li>data is non-null but timestamps is
331+ * null</li>
332+ * <li>timestamps.length != data.length</li>
333+ * <li>timestamps is not strictly
334+ * ascending</li>
335+ * <li>any data[i].length !=
336+ * preprocessor.getInputLength()</li>
337+ * </ul>
338+ */
339+ public List <AnomalyDescriptor > processSequentially (double [][] data , long [] timestamps ,
340+ Function <AnomalyDescriptor , Boolean > filter ) {
341+ // Precondition checks
342+ checkArgument (filter != null , "filter must not be null" );
343+ if (data != null && data .length > 0 ) {
344+ checkArgument (timestamps != null , "timestamps must not be null when data is non-empty" );
345+ checkArgument (timestamps .length == data .length , String .format (Locale .ROOT ,
346+ "timestamps length (%s) must equal data length (%s)" , timestamps .length , data .length ));
347+ for (int i = 1 ; i < timestamps .length ; i ++) {
348+ checkArgument (timestamps [i ] > timestamps [i - 1 ],
349+ String .format (Locale .ROOT ,
350+ "timestamps must be strictly ascending: "
351+ + "timestamps[%s]=%s is not > timestamps[%s]=%s" ,
352+ i , timestamps [i ], i - 1 , timestamps [i - 1 ]));
353+ }
354+ }
355+
292356 ArrayList <AnomalyDescriptor > answer = new ArrayList <>();
293357
294358 if (data != null && data .length > 0 ) {
@@ -297,11 +361,13 @@ public List<AnomalyDescriptor> processSequentially(double[][] data, Function<Ano
297361 if (cacheDisabled ) { // turn caching on temporarily
298362 forest .setBoundingBoxCacheFraction (1.0 );
299363 }
300- long timestamp = preprocessor .getInternalTimeStamp ();
301364 int length = preprocessor .getInputLength ();
302- for (double [] point : data ) {
365+ for (int i = 0 ; i < data .length ; i ++) {
366+ double [] point = data [i ];
367+ long timestamp = timestamps [i ];
368+ checkArgument (point != null , " data should not be null " );
303369 checkArgument (point .length == length , " nonuniform lengths " );
304- AnomalyDescriptor description = new AnomalyDescriptor (point , timestamp ++ );
370+ AnomalyDescriptor description = new AnomalyDescriptor (point , timestamp );
305371 augment (description );
306372 if (saveDescriptor (description )) {
307373 lastAnomalyDescriptor = description .copyOf ();
@@ -519,7 +585,11 @@ <P extends AnomalyDescriptor> void postProcess(P result) {
519585 reference = preprocessor .getShingledInput (shingleSize + index );
520586 result .setPastTimeStamp (preprocessor .getTimeStamp (shingleSize + index ));
521587 }
588+
589+ // relative index is the source of truth. Past values always have value:
590+ // either current input or previous input.
522591 result .setPastValues (reference );
592+
523593 if (newPoint != null ) {
524594 double [] values = preprocessor .getExpectedValue (index , reference , point , newPoint );
525595 if (forestMode == ForestMode .TIME_AUGMENTED ) {
0 commit comments