Skip to content

Commit e7bd548

Browse files
Memoization in Lex, in particular for the computation of find longest match (#149)
* new implementation of find longest match, ready for memoization, with memoization. TODO: add a lex version that uses this one * fix verification * fix verification and FIX REGEX SPEC which didn't use itself recursively properly * new maxPrefix and lex using v2 * fix compilation * move import out of specific to verification * fix benhcmark commented out imports * fix bugs in the implementation, mainly calling old version of functions in recursive call positions * update, try to verify on the CI * fix 2 verif issues * fix verif * update * new versions with memoization only for derivatives for V2 * new hashing function + random json to test + profiling script example * add a and a*b lexer * less threads * implement and prove equivalent a tailrec version of longest match that will be memoized, called findLongestMatchZipperSequenceV3 * greater timeout * bigger heap * bigger heap * new tailrec version of find longest match with memoization, implemented up to the lexMem interface * changes to new caches for lexMem * remove useless import * use res instead of head.res which is ghost * allow to pass initial cache sizes * add heuristic memoizing only 1/10 if the input is larger than 1MB * use 512 as array length in the balance conc * new benchmark for a a*b * better script for profiling * parameterize the cache tradefoff (threshold and fraction k) * catch up on benchmark branch * catch up benhcmark branch * ds store * catch up on benchmark branch * format * remove weird workaround of lazy val * remove commented out code * benhcmark results and analysis from benchmark branch * remove smt files * from benchmarks * bring all as ressource to this branch * change constants for caching to be computed within the cache class * add greater than 0 condition for cache rate * benchmark data to match the other branch * update benchmarks * newline * res files * data * new readme * remove raw data
1 parent 8864d30 commit e7bd548

File tree

452 files changed

+888389
-88126
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

452 files changed

+888389
-88126
lines changed

.github/workflows/bolts-CI-verification.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ on:
66
- main
77
env:
88
# define Java options for both official sbt and sbt-extras
9-
JAVA_OPTS: -Dsbt.io.implicit.relative.glob.conversion=allow -Xss512M -Xms1024M -Xmx12G -XX:MaxMetaspaceSize=2G -XX:+UseCodeCacheFlushing -XX:ReservedCodeCacheSize=768M
10-
JVM_OPTS: -Dsbt.io.implicit.relative.glob.conversion=allow -Xss512M -Xms1024M -Xmx12G -XX:MaxMetaspaceSize=2G -XX:+UseCodeCacheFlushing -XX:ReservedCodeCacheSize=768M
9+
JAVA_OPTS: -Dsbt.io.implicit.relative.glob.conversion=allow -Xss512M -Xms1024M -Xmx32G -XX:MaxMetaspaceSize=2G -XX:+UseCodeCacheFlushing -XX:ReservedCodeCacheSize=768M
10+
JVM_OPTS: -Dsbt.io.implicit.relative.glob.conversion=allow -Xss512M -Xms1024M -Xmx32G -XX:MaxMetaspaceSize=2G -XX:+UseCodeCacheFlushing -XX:ReservedCodeCacheSize=768M
1111
JAVA_OPTS_TMP_DIR: /tmp/tmp_${{ github.run_id }}_${{ github.run_attempt }}
1212
jobs:
1313
bolts-verification:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ smt-sessions
2020
# bloop etc
2121
.bsp
2222
.scala-build
23+
lexers/regex/.DS_Store

data-structures/maps/mutablemaps/src/main/scala/com/mutablemaps/map/MutableHashMap.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import stainless.proof.check
1717
// END uncomment for verification --------------------------------------------
1818
// BEGIN imports for benchmarking -------------------------------------------
1919
// import stainless.lang.{ghost => _, decreases => _, unfold => _, _}
20-
// import com.mutablemaps.map.OptimisedChecks.*
20+
// import com.stainless.OptimisedChecks.*
2121
// import Predef.{assert => _, Ensuring => _, require => _}
2222

2323
// @tailrec
@@ -35,14 +35,13 @@ trait Hashable[K] {
3535
}
3636

3737
object MutableHashMap {
38-
3938
/** Helper method to create a new empty HashMap
4039
*
4140
* @param defaultValue
4241
* @return
4342
*/
44-
def getEmptyHashMap[K, V](defaultValue: K => V, hashF: Hashable[K]): HashMap[K, V] = {
45-
val initialSize = 16
43+
def getEmptyHashMap[K, V](defaultValue: K => V, hashF: Hashable[K], initialSize: Int = 16): HashMap[K, V] = {
44+
require(validMask(initialSize - 1))
4645
HashMap(Cell(MutableLongMap.getEmptyLongMap[List[(K, V)]]((l: Long) => Nil[(K, V)](), initialSize)), hashF, 0, defaultValue)
4746
}.ensuring (res => res.valid && res.size == 0)
4847

data-structures/maps/mutablemaps/src/main/scala/com/mutablemaps/map/MutableLongMap.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import stainless.proof.check
1818
// END uncomment for verification --------------------------------------------
1919
// BEGIN imports for benchmarking -------------------------------------------
2020
// import stainless.lang.{ghost => _, decreases => _, unfold => _, _}
21-
// import com.mutablemaps.map.OptimisedChecks.*
21+
// import com.stainless.OptimisedChecks.*
2222
// import Predef.{assert => _, Ensuring => _, require => _}
2323

2424
// @tailrec

lexers/regex/verifiedlexer/README.md

Lines changed: 69 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,19 @@
22

33
## Setup
44

5+
### Verification
6+
7+
To verify the project, you need to install the Stainless verifier. You can find the installation instructions [on the official Github page](https://github.com/epfl-lara/stainless). Use the version 0.9.9.1.
8+
9+
The complete instructions can be found on [this page](https://epfl-lara.github.io/stainless/installation.html), but we recommend using the package manager way if your system is supported. Otherwise, you can download the release from Github.
10+
11+
To use Stainless, you need to download SMT solvers separately. For this project, we recommend using Z3 and cvc5 as the `verify.sh` script assumes these two solvers are available in your PATH.
12+
13+
### Running the project
14+
515
To run this project, you need to install the Stainless sbt plugin. To do so, follow these steps:
616

7-
1. Download the following archive: [Stainless SBT plugin download]("https://github.com/epfl-lara/stainless/releases/download/v0.9.9.1/sbt-stainless.zip")
17+
1. Download the following archive: [Stainless SBT plugin download](https://github.com/epfl-lara/stainless/releases/download/v0.9.9.1/sbt-stainless.zip)
818
2. Unzip the archive, it should contain a `project` folder and a `stainless` folder.
919
3. copy the `project/lib` folder into the `project` of this sbt project.
1020
4. copy the `stainless` folder into the root of this sbt project.
@@ -25,27 +35,76 @@ mutablemap
2535

2636
Now the project is ready to run, both the main class and the benchmarks.
2737

28-
If it does not work, please refer to [this manual]("https://epfl-lara.github.io/stainless/installation.html#usage-within-an-existing-project").
38+
If it does not work, please refer to [this manual](https://epfl-lara.github.io/stainless/installation.html#usage-within-an-existing-project).
39+
40+
## Verification
41+
42+
To verify the whole project, run the `verify.sh` script at the root of the project:
43+
44+
```bash
45+
./verify.sh
46+
```
47+
48+
This script assumes that `stainless` is available in your PATH, along with `z3` and `cvc5`; see Section Setup above for more information about how to install it.
49+
50+
### Generate report and SMT queries for analysis
51+
52+
To generate Stainless json report and the SMT queries that analyzed in the `Benchmark Data Analysis.ipynb` Jupyter notebook, run:
53+
54+
```bash
55+
./verify.sh "--json --debug=smt"
56+
```
57+
58+
### SMT Queries
59+
60+
We provide the SMT queries generated during verification in the `smt_queries` folder. These queries were generated using the command above, then filtered out to keep only the query corresponding to the solver verifying the VC. Indeed, Stainless runs multiple solvers in parallel, and we are only interested in the one that actually verified the VC. Other queries are incomplete, as Stainless calls the solver multiple times for each VC, with different functions gradully unfolded, and stops as soon as one of the solvers manages to verify the VC.
2961

3062
## Run benchmarks
3163

3264
### Run all Scala benchmarks
3365

66+
#### Prepare scala files
67+
68+
Before running the benchmarks, some imports must be modified to import special versions of ghost functions that are properly erased to enable
69+
optimizations like "tailrec" to be applied by the Scala compiler. To do so, run the following command at the root of the project:
70+
71+
```bash
72+
git apply prepare_to_run_benchmark.patch
73+
```
74+
75+
This will modify the imports of all executed files. After running the benchmarks, you can revert the changes by running:
76+
77+
```bash
78+
git checkout -- .
79+
```
80+
81+
#### Execute benchmarks
82+
3483
To run all benchmarks, simply run the following command at the root of the project:
3584

3685
```bash
37-
./run-benchmarks.sh
86+
./run_benchmarks.sh
3887
```
3988

4089
This will create a `benchmark_results/raw/<current-date>` folder containing the results of the benchmarks.
4190

42-
### Run Coqlex benchmarks
91+
### Coqlex benchmarks
92+
93+
To run the Coqlex benchmarks, we use the original evaluation pipeline[Coqlex repository](https://gitlab.inria.fr/wouedrao/coqlex/-/tree/master).
4394

44-
To run the Coqlex benchmarks, you need to clone the Coqlex repository and run the benchmarks following their documentation. You can find the repository here: [Coqlex repository](https://gitlab.inria.fr/wouedrao/coqlex/-/tree/master).
95+
Because we added new grammars to the Coqlex benchmark suite, we provide a copy of the repository with the new lexers in `coqlex-fork/Comparison/AAStarB`. We added this `coqlex-fork/Comparison/AAStarB/` folder and modified the `Makefile` to run the benchmarks for the grammar `a` and `a*b`.
96+
97+
For the grammar `a` and `a*b`, we provide a new coqlex lexer in `coqlex-fork/Comparison/AAStarB/Lexers/Coqlex`, and a new Verbatim++ lexer in `coqlex-fork/Comparison/AAStarB/Lexers/Verbatim`. The Coqlex lexer is built automatically when running the benchmark. The Verbatim++ lexer is generated using the Verbatim++ official repository in `Verbatim`. The lexer for the grammar `a` and `a*b` is located in `Verbatim/ExamuB/Lexer` and is compiled by running `make` in `Verbatim`. The generated `.ml` files must then be copied to `coqlex-fork/Comparison/AAStarB/Lexers/Verbatim` before running the benchmarks (we already compiled and copy the files).
98+
99+
To run the Coqlex benchmarks, follow these steps:
100+
101+
1. Install the dependencies listed in the Coqlex repository README.
102+
2. Navigate to the `coqlex-fork` folder and run `make`.
103+
3. Run the benchmarks by running `make compare_json` and `make compare_aastarb` in `./coqlex-fork/`.
45104

46105
You can also rely on the data already present in the `from_coqlex` folder, which contains the results of the Coqlex benchmarks run on our machine.
47106

48-
If you decide to run the benchmarks yourself, make sure to copy the results in the `from_coqlex/Comparison/JSON/results` folder.
107+
If you decide to run the benchmarks yourself, make sure to copy the results in the `from_coqlex/Comparison/JSON/results` and `from_coqlex/Comparison/AAStarB/results`folders.
49108

50109
### Prepare data for analysis
51110

@@ -55,8 +114,10 @@ To prepare the data for analysis, run the following command in the `benchmark_re
55114
./extract_data.sh benchmark_results/raw/<date-of-benchmark>
56115
```
57116

58-
This will process the raw logs in usable data files and move them in the `benchmark_results/latest` folder. This also write data in the correct format in the `from_coqlex/Comparison/JSON/results/ZipLex` folder to compare with the Coqlex benchmark suite. For the analysis script to work, you need to have the Coqlex results in the `from_coqlex/Comparison/JSON/results` folder for the other lexers, see previous section.
117+
This will process the raw logs in usable data files and move them in the `benchmark_results/latest` folder. This also write data in the correct format in the `from_coqlex/Comparison/JSON/results/ZipLex` folder to compare with the Coqlex benchmark suite. For the analysis script to work, you need to have the Coqlex results in the `from_coqlex/Comparison/JSON/results` and `from_coqlex/Comparison/AAStarB/results` folders for the other lexers, see previous section.
118+
119+
This will also extract the data from the `flex` benchmark results, which are located in `flex_benchmarks` and are run by the `run_benchmarks.sh` script.
59120

60121
### Analyze data
61122

62-
The analysis of the data is done in the `Benchmark Data Analysis.ipynb` notebook. Make sure to install the required dependencies listed in `benchmark_results/requirements.txt` using pip. `Benchmark Data Analysis.ipynb` loads the data from the `benchmark_results/latest` folder and the `from_coqlex/Comparison/JSON/results` folder to produce the analysis and plots.
123+
The analysis of the data is done in the `Benchmark Data Analysis.ipynb` notebook. Make sure to install the required dependencies listed in `benchmark_results/requirements.txt` using pip. `Benchmark Data Analysis.ipynb` loads the data from the `benchmark_results/latest` folder and the `from_coqlex` folder to produce the analysis and plots. It also analyzes the Stainless report and SMT queries generated using the `./verify.sh "--json --debug=smt"` command, if they are placed in the `benchmark_results/latest` folder.

lexers/regex/verifiedlexer/asprof-run.sh

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
#!/usr/bin/env bash
22
# asprof-run.sh — minimal async-profiler wrapper (flamegraph by default)
3+
#
4+
# Example usage for the lexer:
5+
# ./asprof-run.sh --cmd "java -jar target/scala-3.7.2/ZipLex-assembly-0.1.0-SNAPSHOT.jar" -d 20 -o lexer_mem.html
36

47
set -euo pipefail
58

69
CMD=""
710
DUR="" # if empty → profile for lifetime
811
OUT="profile-$(date +%Y%m%d-%H%M%S).html"
12+
REVERSED=false # If true, pass --reverse to asprof for inverted flamegraph
913

1014
usage(){ echo "Usage: $0 --cmd \"<command>\" [-d <seconds>] [-o <out.html>]"; exit 1; }
1115

@@ -15,6 +19,7 @@ while [[ $# -gt 0 ]]; do
1519
--cmd) CMD="$2"; shift 2;;
1620
-d) DUR="$2"; shift 2;;
1721
-o) OUT="$2"; shift 2;;
22+
--reverse) REVERSED=true; shift;;
1823
-h|--help) usage;;
1924
*) echo "Unknown option: $1"; usage;;
2025
esac
@@ -42,15 +47,24 @@ trap 'asprof stop -f "$OUT" "$PID" >/dev/null 2>&1 || true' INT TERM
4247
if [[ -n "$DUR" ]]; then
4348
echo "Profiling for ${DUR}s → $OUT"
4449
if attach; then
45-
asprof -d "$DUR" -o flamegraph -f "$OUT" "$PID" || true
50+
if $REVERSED; then
51+
asprof start -o flamegraph --reverse -f "$OUT" "$PID"
52+
else
53+
asprof start -o flamegraph -f "$OUT" "$PID"
54+
fi
55+
sleep "$DUR"
4656
else
4757
echo "Target finished before attach; no profile written."
4858
fi
4959
wait "$PID" || true
5060
else
5161
echo "Profiling for process lifetime → $OUT"
5262
if attach; then
53-
asprof start -o flamegraph -f "$OUT" "$PID" || true
63+
if $REVERSED; then
64+
asprof start -o flamegraph --reverse -f "$OUT" "$PID" || true
65+
else
66+
asprof start -o flamegraph -f "$OUT" "$PID" || true
67+
fi
5468
wait "$PID" || true
5569
asprof stop -o flamegraph -f "$OUT" "$PID" >/dev/null 2>&1 || true
5670
else
@@ -59,4 +73,4 @@ else
5973
fi
6074
fi
6175

62-
echo "Done. Output: $OUT"
76+
echo "Done. Output: $OUT"

0 commit comments

Comments
 (0)