aws
diff --git a/‎Java/README.md‎
Lines changed: 5 additions & 5 deletions b/‎Java/README.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎Java/benchmark/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎Java/benchmark/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Java/core/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎Java/core/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Java/core/src/test/java/com/amazon/randomcutforest/SampleSummaryTest.java‎
Lines changed: 2 additions & 4 deletions b/‎Java/core/src/test/java/com/amazon/randomcutforest/SampleSummaryTest.java‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎Java/examples/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎Java/examples/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Java/parkservices/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎Java/parkservices/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Java/parkservices/src/main/java/com/amazon/randomcutforest/parkservices/RCFCaster.java‎
Lines changed: 35 additions & 3 deletions b/‎Java/parkservices/src/main/java/com/amazon/randomcutforest/parkservices/RCFCaster.java‎
Lines changed: 35 additions & 3 deletions
diff --git a/‎Java/parkservices/src/main/java/com/amazon/randomcutforest/parkservices/ThresholdedRandomCutForest.java‎
Lines changed: 76 additions & 6 deletions b/‎Java/parkservices/src/main/java/com/amazon/randomcutforest/parkservices/ThresholdedRandomCutForest.java‎
Lines changed: 76 additions & 6 deletions
@@ -157,7 +157,7 @@ vector data point, scores the data point, and then updates the model with this
 point. The program output appends a column of anomaly scores to the input:
 
 ```text
-$ java -cp core/target/randomcutforest-core-4.2.0.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner < ../example-data/rcf-paper.csv > example_output.csv
+$ java -cp core/target/randomcutforest-core-4.3.0.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner < ../example-data/rcf-paper.csv > example_output.csv
 $ tail example_output.csv
 -5.0029,0.0170,-0.0057,0.8129401629464965
 -4.9975,-0.0102,-0.0065,0.6591046054520615
@@ -176,8 +176,8 @@ read additional usage instructions, including options for setting model
 hyperparameters, using the `--help` flag:
 
 ```text
-$ java -cp core/target/randomcutforest-core-4.2.0.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner --help
-Usage: java -cp target/random-cut-forest-4.2.0.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner [options] < input_file > output_file
+$ java -cp core/target/randomcutforest-core-4.3.0.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner --help
+Usage: java -cp target/random-cut-forest-4.3.0.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner [options] < input_file > output_file
 
 Compute scalar anomaly scores from the input rows and append them to the output rows.
 
@@ -239,14 +239,14 @@ framework. Build an executable jar containing the benchmark code by running
 To invoke the full benchmark suite:
 
 ```text
-% java -jar benchmark/target/randomcutforest-benchmark-4.2.0-jar-with-dependencies.jar
+% java -jar benchmark/target/randomcutforest-benchmark-4.3.0-jar-with-dependencies.jar
 ```
 
 The full benchmark suite takes a long time to run. You can also pass a regex at the command-line, then only matching
 benchmark methods will be executed.
 
 ```text
-% java -jar benchmark/target/randomcutforest-benchmark-4.2.0-jar-with-dependencies.jar RandomCutForestBenchmark\.updateAndGetAnomalyScore
+% java -jar benchmark/target/randomcutforest-benchmark-4.3.0-jar-with-dependencies.jar RandomCutForestBenchmark\.updateAndGetAnomalyScore
 ```
 
 [rcf-paper]: http://proceedings.mlr.press/v48/guha16.pdf
@@ -6,7 +6,7 @@
   <parent>
     <groupId>software.amazon.randomcutforest</groupId>
     <artifactId>randomcutforest-parent</artifactId>
-    <version>4.2.0</version>
+    <version>4.3.0</version>
   </parent>
 
   <artifactId>randomcutforest-benchmark</artifactId>
 
@@ -6,7 +6,7 @@
   <parent>
     <groupId>software.amazon.randomcutforest</groupId>
     <artifactId>randomcutforest-parent</artifactId>
-    <version>4.2.0</version>
+    <version>4.3.0</version>
   </parent>
 
   <artifactId>randomcutforest-core</artifactId>
 
@@ -345,10 +345,8 @@ public void ParallelTest(BiFunction<float[], float[], Double> distance) {
         assertEquals(summary2.summaryPoints.length, summary1.summaryPoints.length,
                 " incorrect length of typical points");
         // due to randomization, they might not equal
-        assertTrue(
-                Math.abs(clusters.size() - summary1.summaryPoints.length) <= 1,
-                "The difference between clusters.size() and summary1.summaryPoints.length should be at most 1"
-        );
+        assertTrue(Math.abs(clusters.size() - summary1.summaryPoints.length) <= 1,
+                "The difference between clusters.size() and summary1.summaryPoints.length should be at most 1");
         double total = clusters.stream().map(ICluster::getWeight).reduce(0.0, Double::sum);
         assertEquals(total, summary1.weightOfSamples, 1e-3);
         // parallelization can produce reordering of merges
 
@@ -7,7 +7,7 @@
     <parent>
         <groupId>software.amazon.randomcutforest</groupId>
         <artifactId>randomcutforest-parent</artifactId>
-        <version>4.2.0</version>
+        <version>4.3.0</version>
     </parent>
 
     <artifactId>randomcutforest-examples</artifactId>
 
@@ -6,7 +6,7 @@
   <parent>
     <groupId>software.amazon.randomcutforest</groupId>
     <artifactId>randomcutforest-parent</artifactId>
-    <version>4.2.0</version>
+    <version>4.3.0</version>
   </parent>
 
   <artifactId>randomcutforest-parkservices</artifactId>
 
@@ -20,6 +20,7 @@
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Locale;
 import java.util.Optional;
 import java.util.function.Function;
 
@@ -246,6 +247,36 @@ public TimedRangeVector extrapolate(Calibration calibration, int horizon, boolea
 
     @Override
     public List<AnomalyDescriptor> processSequentially(double[][] data, Function<AnomalyDescriptor, Boolean> filter) {
+        if (data == null || data.length == 0) {
+            return new ArrayList<>();
+        }
+
+        long timestamp = preprocessor.getInternalTimeStamp();
+        long[] timestamps = new long[data.length];
+        for (int i = 0; i < data.length; i++) {
+            timestamps[i] = ++timestamp;
+        }
+
+        return processSequentially(data, timestamps, filter);
+    }
+
+    public List<AnomalyDescriptor> processSequentially(double[][] data, long[] timestamps,
+            Function<AnomalyDescriptor, Boolean> filter) {
+        // Precondition checks
+        checkArgument(filter != null, "filter must not be null");
+        if (data != null && data.length > 0) {
+            checkArgument(timestamps != null, "timestamps must not be null when data is non-empty");
+            checkArgument(timestamps.length == data.length, String.format(Locale.ROOT,
+                    "timestamps length (%s) must equal data length (%s)", timestamps.length, data.length));
+            for (int i = 1; i < timestamps.length; i++) {
+                checkArgument(timestamps[i] > timestamps[i - 1],
+                        String.format(Locale.ROOT,
+                                "timestamps must be strictly ascending: "
+                                        + "timestamps[%s]=%s is not > timestamps[%s]=%s",
+                                i, timestamps[i], i - 1, timestamps[i - 1]));
+            }
+        }
+
         ArrayList<AnomalyDescriptor> answer = new ArrayList<>();
         if (data != null) {
             if (data.length > 0) {
@@ -254,11 +285,12 @@ public List<AnomalyDescriptor> processSequentially(double[][] data, Function<Ano
                     if (cacheDisabled) { // turn caching on temporarily
                         forest.setBoundingBoxCacheFraction(1.0);
                     }
-                    long timestamp = preprocessor.getInternalTimeStamp();
                     int length = preprocessor.getInputLength();
-                    for (double[] point : data) {
+                    for (int i = 0; i < data.length; i++) {
+                        double[] point = data[i];
+                        checkArgument(point != null, " data should not be null ");
                         checkArgument(point.length == length, " nonuniform lengths ");
-                        ForecastDescriptor description = new ForecastDescriptor(point, timestamp++, forecastHorizon);
+                        ForecastDescriptor description = new ForecastDescriptor(point, timestamps[i], forecastHorizon);
                         augment(description);
                         if (filter.apply(description)) {
                             answer.add(description);
 
@@ -39,6 +39,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Locale;
 import java.util.Optional;
 import java.util.Random;
 import java.util.function.Function;
@@ -138,7 +139,11 @@ public ThresholdedRandomCutForest(Builder<?> builder) {
         lastAnomalyDescriptor = new RCFComputeDescriptor(null, 0, builder.forestMode, builder.transformMethod,
                 builder.imputationMethod);
 
-        predictorCorrector.setAbsoluteThreshold(builder.lowerThreshold.orElse(DEFAULT_ABSOLUTE_THRESHOLD));
+        // when autoAdjust is true, the lowerThreshold is dynamically calculated
+        if (!builder.autoAdjust) {
+            predictorCorrector.setAbsoluteThreshold(builder.lowerThreshold.orElse(DEFAULT_ABSOLUTE_THRESHOLD));
+        }
+
         predictorCorrector.setZfactor(builder.zFactor);
 
         predictorCorrector.setScoreDifferencing(builder.scoreDifferencing.orElse(DEFAULT_SCORE_DIFFERENCING));
@@ -279,8 +284,7 @@ public AnomalyDescriptor process(double[] inputPoint, long timestamp, int[] miss
      * of the word batch -- the entire goal of this procedure is to provide
      * sequential processing and not standard batch processing). The procedure
      * avoids transfer of ephemeral transient objects for non-anomalies and thereby
-     * can have additional benefits. At the moment the operation does not support
-     * external timestamps.
+     * can have additional benefits.
      *
      * @param data   a vectors of vectors (each of which has to have the same
      *               inputLength)
@@ -289,6 +293,66 @@ public AnomalyDescriptor process(double[] inputPoint, long timestamp, int[] miss
      * @return collection of descriptors of the anomalies filtered by the condition
      */
     public List<AnomalyDescriptor> processSequentially(double[][] data, Function<AnomalyDescriptor, Boolean> filter) {
+        if (data == null || data.length == 0) {
+            return new ArrayList<>();
+        }
+
+        long timestamp = preprocessor.getInternalTimeStamp();
+        long[] timestamps = new long[data.length];
+        for (int i = 0; i < data.length; i++) {
+            timestamps[i] = ++timestamp;
+        }
+
+        return processSequentially(data, timestamps, filter);
+    }
+
+    /**
+     * the following function processes a list of vectors sequentially; the main
+     * benefit of this invocation is the caching is persisted from one data point to
+     * another and thus the execution is efficient. Moreover in many scenarios where
+     * serialization deserialization is expensive then it may be of benefit of
+     * invoking sequential process on a contiguous chunk of input (we avoid the use
+     * of the word batch -- the entire goal of this procedure is to provide
+     * sequential processing and not standard batch processing). The procedure
+     * avoids transfer of ephemeral transient objects for non-anomalies and thereby
+     * can have additional benefits. At the moment the operation does not support
+     * external timestamps.
+     *
+     * @param data       a vectors of vectors (each of which has to have the same
+     *                   inputLength)
+     * @param timestamps a vector of timestamps (in the same order as the data, has
+     *                   to be same length as data, and ascending)
+     * @param filter     a condition to drop desriptor (recommended filter:
+     *                   anomalyGrade positive)
+     * @return collection of descriptors of the anomalies filtered by the condition
+     * @throws IllegalArgumentException if
+     *                                  <ul>
+     *                                  <li>data is non-null but timestamps is
+     *                                  null</li>
+     *                                  <li>timestamps.length != data.length</li>
+     *                                  <li>timestamps is not strictly
+     *                                  ascending</li>
+     *                                  <li>any data[i].length !=
+     *                                  preprocessor.getInputLength()</li>
+     *                                  </ul>
+     */
+    public List<AnomalyDescriptor> processSequentially(double[][] data, long[] timestamps,
+            Function<AnomalyDescriptor, Boolean> filter) {
+        // Precondition checks
+        checkArgument(filter != null, "filter must not be null");
+        if (data != null && data.length > 0) {
+            checkArgument(timestamps != null, "timestamps must not be null when data is non-empty");
+            checkArgument(timestamps.length == data.length, String.format(Locale.ROOT,
+                    "timestamps length (%s) must equal data length (%s)", timestamps.length, data.length));
+            for (int i = 1; i < timestamps.length; i++) {
+                checkArgument(timestamps[i] > timestamps[i - 1],
+                        String.format(Locale.ROOT,
+                                "timestamps must be strictly ascending: "
+                                        + "timestamps[%s]=%s is not > timestamps[%s]=%s",
+                                i, timestamps[i], i - 1, timestamps[i - 1]));
+            }
+        }
+
         ArrayList<AnomalyDescriptor> answer = new ArrayList<>();
 
         if (data != null && data.length > 0) {
@@ -297,11 +361,13 @@ public List<AnomalyDescriptor> processSequentially(double[][] data, Function<Ano
                 if (cacheDisabled) { // turn caching on temporarily
                     forest.setBoundingBoxCacheFraction(1.0);
                 }
-                long timestamp = preprocessor.getInternalTimeStamp();
                 int length = preprocessor.getInputLength();
-                for (double[] point : data) {
+                for (int i = 0; i < data.length; i++) {
+                    double[] point = data[i];
+                    long timestamp = timestamps[i];
+                    checkArgument(point != null, " data should not be null ");
                     checkArgument(point.length == length, " nonuniform lengths ");
-                    AnomalyDescriptor description = new AnomalyDescriptor(point, timestamp++);
+                    AnomalyDescriptor description = new AnomalyDescriptor(point, timestamp);
                     augment(description);
                     if (saveDescriptor(description)) {
                         lastAnomalyDescriptor = description.copyOf();
@@ -519,7 +585,11 @@ <P extends AnomalyDescriptor> void postProcess(P result) {
                     reference = preprocessor.getShingledInput(shingleSize + index);
                     result.setPastTimeStamp(preprocessor.getTimeStamp(shingleSize + index));
                 }
+
+                // relative index is the source of truth. Past values always have value:
+                // either current input or previous input.
                 result.setPastValues(reference);
+
                 if (newPoint != null) {
                     double[] values = preprocessor.getExpectedValue(index, reference, point, newPoint);
                     if (forestMode == ForestMode.TIME_AUGMENTED) {