epfl-lara
diff --git a/‎.github/workflows/bolts-CI-verification.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/bolts-CI-verification.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎data-structures/maps/mutablemaps/src/main/scala/com/mutablemaps/map/MutableHashMap.scala‎
Lines changed: 3 additions & 4 deletions b/‎data-structures/maps/mutablemaps/src/main/scala/com/mutablemaps/map/MutableHashMap.scala‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎data-structures/maps/mutablemaps/src/main/scala/com/mutablemaps/map/MutableLongMap.scala‎
Lines changed: 1 addition & 1 deletion b/‎data-structures/maps/mutablemaps/src/main/scala/com/mutablemaps/map/MutableLongMap.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lexers/regex/verifiedlexer/README.md‎
Lines changed: 69 additions & 8 deletions b/‎lexers/regex/verifiedlexer/README.md‎
Lines changed: 69 additions & 8 deletions
diff --git a/‎lexers/regex/verifiedlexer/asprof-run.sh‎
Lines changed: 17 additions & 3 deletions b/‎lexers/regex/verifiedlexer/asprof-run.sh‎
Lines changed: 17 additions & 3 deletions
@@ -6,8 +6,8 @@ on:
       - main
 env:
   # define Java options for both official sbt and sbt-extras
-  JAVA_OPTS: -Dsbt.io.implicit.relative.glob.conversion=allow -Xss512M -Xms1024M -Xmx12G -XX:MaxMetaspaceSize=2G -XX:+UseCodeCacheFlushing -XX:ReservedCodeCacheSize=768M 
-  JVM_OPTS:  -Dsbt.io.implicit.relative.glob.conversion=allow -Xss512M -Xms1024M -Xmx12G -XX:MaxMetaspaceSize=2G -XX:+UseCodeCacheFlushing -XX:ReservedCodeCacheSize=768M 
+  JAVA_OPTS: -Dsbt.io.implicit.relative.glob.conversion=allow -Xss512M -Xms1024M -Xmx32G -XX:MaxMetaspaceSize=2G -XX:+UseCodeCacheFlushing -XX:ReservedCodeCacheSize=768M 
+  JVM_OPTS:  -Dsbt.io.implicit.relative.glob.conversion=allow -Xss512M -Xms1024M -Xmx32G -XX:MaxMetaspaceSize=2G -XX:+UseCodeCacheFlushing -XX:ReservedCodeCacheSize=768M 
   JAVA_OPTS_TMP_DIR: /tmp/tmp_${{ github.run_id }}_${{ github.run_attempt }}
 jobs:
   bolts-verification:
 
@@ -20,3 +20,4 @@ smt-sessions
 # bloop etc
 .bsp
 .scala-build
+lexers/regex/.DS_Store
@@ -17,7 +17,7 @@ import stainless.proof.check
 // END uncomment for verification --------------------------------------------
 // BEGIN imports for benchmarking -------------------------------------------
 // import stainless.lang.{ghost => _, decreases => _, unfold => _, _}
-// import com.mutablemaps.map.OptimisedChecks.*
+// import com.stainless.OptimisedChecks.*
 // import Predef.{assert => _, Ensuring => _, require => _}
 
 // @tailrec
@@ -35,14 +35,13 @@ trait Hashable[K] {
 }
 
 object MutableHashMap {
-
   /** Helper method to create a new empty HashMap
     *
     * @param defaultValue
     * @return
     */
-  def getEmptyHashMap[K, V](defaultValue: K => V, hashF: Hashable[K]): HashMap[K, V] = {
-    val initialSize = 16
+  def getEmptyHashMap[K, V](defaultValue: K => V, hashF: Hashable[K], initialSize: Int = 16): HashMap[K, V] = {
+    require(validMask(initialSize - 1))
     HashMap(Cell(MutableLongMap.getEmptyLongMap[List[(K, V)]]((l: Long) => Nil[(K, V)](), initialSize)), hashF, 0, defaultValue)
   }.ensuring (res => res.valid && res.size == 0)
 
 
@@ -18,7 +18,7 @@ import stainless.proof.check
 // END uncomment for verification --------------------------------------------
 // BEGIN imports for benchmarking -------------------------------------------
 // import stainless.lang.{ghost => _, decreases => _, unfold => _, _}
-// import com.mutablemaps.map.OptimisedChecks.*
+// import com.stainless.OptimisedChecks.*
 // import Predef.{assert => _, Ensuring => _, require => _}
 
 // @tailrec
 
@@ -2,9 +2,19 @@
 
 ## Setup
 
+### Verification
+
+To verify the project, you need to install the Stainless verifier. You can find the installation instructions [on the official Github page](https://github.com/epfl-lara/stainless). Use the version 0.9.9.1.
+
+The complete instructions can be found on [this page](https://epfl-lara.github.io/stainless/installation.html), but we recommend using the package manager way if your system is supported. Otherwise, you can download the release from Github.
+
+To use Stainless, you need to download SMT solvers separately. For this project, we recommend using Z3 and cvc5 as the `verify.sh` script assumes these two solvers are available in your PATH.
+
+### Running the project
+
 To run this project, you need to install the Stainless sbt plugin. To do so, follow these steps:
 
-1. Download the following archive: [Stainless SBT plugin download]("https://github.com/epfl-lara/stainless/releases/download/v0.9.9.1/sbt-stainless.zip")
+1. Download the following archive: [Stainless SBT plugin download](https://github.com/epfl-lara/stainless/releases/download/v0.9.9.1/sbt-stainless.zip)
 2. Unzip the archive, it should contain a `project` folder and a `stainless` folder.
 3. copy the `project/lib` folder into the `project` of this sbt project.
 4. copy the `stainless` folder into the root of this sbt project.
@@ -25,27 +35,76 @@ mutablemap
 
 Now the project is ready to run, both the main class and the benchmarks.
 
-If it does not work, please refer to [this manual]("https://epfl-lara.github.io/stainless/installation.html#usage-within-an-existing-project").
+If it does not work, please refer to [this manual](https://epfl-lara.github.io/stainless/installation.html#usage-within-an-existing-project).
+
+## Verification
+
+To verify the whole project, run the `verify.sh` script at the root of the project:
+
+```bash
+  ./verify.sh
+```
+
+This script assumes that `stainless` is available in your PATH, along with `z3` and `cvc5`; see Section Setup above for more information about how to install it.
+
+### Generate report and SMT queries for analysis
+
+To generate Stainless json report and the SMT queries that analyzed in the `Benchmark Data Analysis.ipynb` Jupyter notebook, run:
+
+```bash
+  ./verify.sh "--json --debug=smt"
+```
+
+### SMT Queries
+
+We provide the SMT queries generated during verification in the `smt_queries` folder. These queries were generated using the command above, then filtered out to keep only the query corresponding to the solver verifying the VC. Indeed, Stainless runs multiple solvers in parallel, and we are only interested in the one that actually verified the VC. Other queries are incomplete, as Stainless calls the solver multiple times for each VC, with different functions gradully unfolded, and stops as soon as one of the solvers manages to verify the VC.
 
 ## Run benchmarks
 
 ### Run all Scala benchmarks
 
+#### Prepare scala files
+
+Before running the benchmarks, some imports must be modified to import special versions of ghost functions that are properly erased to enable
+optimizations like "tailrec" to be applied by the Scala compiler. To do so, run the following command at the root of the project:
+
+```bash
+  git apply prepare_to_run_benchmark.patch
+```
+
+This will modify the imports of all executed files. After running the benchmarks, you can revert the changes by running:
+
+```bash
+  git checkout -- .
+```
+
+#### Execute benchmarks
+
 To run all benchmarks, simply run the following command at the root of the project:
 
 ```bash
-  ./run-benchmarks.sh
+  ./run_benchmarks.sh
 ```
 
 This will create a `benchmark_results/raw/<current-date>` folder containing the results of the benchmarks.
 
-### Run Coqlex benchmarks
+### Coqlex benchmarks
+
+To run the Coqlex benchmarks, we use the original evaluation pipeline[Coqlex repository](https://gitlab.inria.fr/wouedrao/coqlex/-/tree/master).
 
-To run the Coqlex benchmarks, you need to clone the Coqlex repository and run the benchmarks following their documentation. You can find the repository here: [Coqlex repository](https://gitlab.inria.fr/wouedrao/coqlex/-/tree/master).
+Because we added new grammars to the Coqlex benchmark suite, we provide a copy of the repository with the new lexers in `coqlex-fork/Comparison/AAStarB`. We added this `coqlex-fork/Comparison/AAStarB/` folder and modified the `Makefile` to run the benchmarks for the grammar `a` and `a*b`.
+
+For the grammar `a` and `a*b`, we provide a new coqlex lexer in `coqlex-fork/Comparison/AAStarB/Lexers/Coqlex`, and a new Verbatim++ lexer in `coqlex-fork/Comparison/AAStarB/Lexers/Verbatim`. The Coqlex lexer is built automatically when running the benchmark. The Verbatim++ lexer is generated using the Verbatim++ official repository in `Verbatim`. The lexer for the grammar `a` and `a*b` is located in `Verbatim/ExamuB/Lexer` and is compiled by running `make` in `Verbatim`. The generated `.ml` files must then be copied to `coqlex-fork/Comparison/AAStarB/Lexers/Verbatim` before running the benchmarks (we already compiled and copy the files).
+
+To run the Coqlex benchmarks, follow these steps:
+
+1. Install the dependencies listed in the Coqlex repository README.
+2. Navigate to the `coqlex-fork` folder and run `make`. 
+3. Run the benchmarks by running `make compare_json` and `make compare_aastarb` in `./coqlex-fork/`.
 
 You can also rely on the data already present in the `from_coqlex` folder, which contains the results of the Coqlex benchmarks run on our machine.
 
-If you decide to run the benchmarks yourself, make sure to copy the results in the `from_coqlex/Comparison/JSON/results` folder.
+If you decide to run the benchmarks yourself, make sure to copy the results in the `from_coqlex/Comparison/JSON/results` and `from_coqlex/Comparison/AAStarB/results`folders.
 
 ### Prepare data for analysis
 
@@ -55,8 +114,10 @@ To prepare the data for analysis, run the following command in the `benchmark_re
   ./extract_data.sh benchmark_results/raw/<date-of-benchmark>
 ```
 
-This will process the raw logs in usable data files and move them in the `benchmark_results/latest` folder. This also write data in the correct format in the `from_coqlex/Comparison/JSON/results/ZipLex` folder to compare with the Coqlex benchmark suite. For the analysis script to work, you need to have the Coqlex results in the `from_coqlex/Comparison/JSON/results` folder for the other lexers, see previous section.
+This will process the raw logs in usable data files and move them in the `benchmark_results/latest` folder. This also write data in the correct format in the `from_coqlex/Comparison/JSON/results/ZipLex` folder to compare with the Coqlex benchmark suite. For the analysis script to work, you need to have the Coqlex results in the `from_coqlex/Comparison/JSON/results` and `from_coqlex/Comparison/AAStarB/results` folders for the other lexers, see previous section.
+
+This will also extract the data from the `flex` benchmark results, which are located in `flex_benchmarks` and are run by the `run_benchmarks.sh` script.
 
 ### Analyze data
 
-The analysis of the data is done in the `Benchmark Data Analysis.ipynb` notebook. Make sure to install the required dependencies listed in `benchmark_results/requirements.txt` using pip. `Benchmark Data Analysis.ipynb` loads the data from the `benchmark_results/latest` folder and the `from_coqlex/Comparison/JSON/results` folder to produce the analysis and plots.
+The analysis of the data is done in the `Benchmark Data Analysis.ipynb` notebook. Make sure to install the required dependencies listed in `benchmark_results/requirements.txt` using pip. `Benchmark Data Analysis.ipynb` loads the data from the `benchmark_results/latest` folder and the `from_coqlex` folder to produce the analysis and plots. It also analyzes the Stainless report and SMT queries generated using the `./verify.sh "--json --debug=smt"` command, if they are placed in the `benchmark_results/latest` folder.
@@ -1,11 +1,15 @@
 #!/usr/bin/env bash
 # asprof-run.sh — minimal async-profiler wrapper (flamegraph by default)
+#
+# Example usage for the lexer:
+# ./asprof-run.sh --cmd "java -jar target/scala-3.7.2/ZipLex-assembly-0.1.0-SNAPSHOT.jar" -d 20 -o lexer_mem.html
 
 set -euo pipefail
 
 CMD=""
 DUR=""                                # if empty → profile for lifetime
 OUT="profile-$(date +%Y%m%d-%H%M%S).html"
+REVERSED=false                        # If true, pass --reverse to asprof for inverted flamegraph
 
 usage(){ echo "Usage: $0 --cmd \"<command>\" [-d <seconds>] [-o <out.html>]"; exit 1; }
 
@@ -15,6 +19,7 @@ while [[ $# -gt 0 ]]; do
     --cmd) CMD="$2"; shift 2;;
     -d) DUR="$2"; shift 2;;
     -o) OUT="$2"; shift 2;;
+    --reverse) REVERSED=true; shift;;
     -h|--help) usage;;
     *) echo "Unknown option: $1"; usage;;
   esac
@@ -42,15 +47,24 @@ trap 'asprof stop -f "$OUT" "$PID" >/dev/null 2>&1 || true' INT TERM
 if [[ -n "$DUR" ]]; then
   echo "Profiling for ${DUR}s → $OUT"
   if attach; then
-    asprof -d "$DUR" -o flamegraph -f "$OUT" "$PID" || true
+    if $REVERSED; then
+      asprof start -o flamegraph --reverse -f "$OUT" "$PID"
+    else
+      asprof start -o flamegraph -f "$OUT" "$PID"
+    fi
+    sleep "$DUR"
   else
     echo "Target finished before attach; no profile written."
   fi
   wait "$PID" || true
 else
   echo "Profiling for process lifetime → $OUT"
   if attach; then
-    asprof start -o flamegraph -f "$OUT" "$PID" || true
+    if $REVERSED; then
+      asprof start -o flamegraph --reverse -f "$OUT" "$PID" || true
+    else
+      asprof start -o flamegraph -f "$OUT" "$PID" || true
+    fi
     wait "$PID" || true
     asprof stop  -o flamegraph -f "$OUT" "$PID" >/dev/null 2>&1 || true
   else
@@ -59,4 +73,4 @@ else
   fi
 fi
 
-echo "Done. Output: $OUT"
+echo "Done. Output: $OUT"