Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 56 additions & 49 deletions java/src/TestANTLR4.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@
import java.lang.reflect.Method;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.*;

/** Parse a file or directory of files using the generated parser
* ANTLR builds from a grammar.
Expand Down Expand Up @@ -68,21 +66,18 @@ public Option(String fieldName, String name, OptionArgType argType, String descr

public String inputFilePattern = ".*\\.c";
public boolean showTokens, showFileNames, wipeDFA, wipeFileDFA, showDocTiming, buildTrees=false, nodfa=false;
public int ntimes = 1;
public static int skip = 3;


public List<Long> timings = new ArrayList<Long>();
public List<Long> liveHeapInstanceCounts = new ArrayList<Long>();
public List<Long> liveHeapSizes = new ArrayList<Long>();
public int nbatches = 5;
public int batchSize = 5;
public static int skip = 1;

public static Option[] optionDefs = {
new Option("inputFilePattern", "-files", OptionArgType.STRING, "input files; e.g., '.*\\\\.java'"),
new Option("showFileNames", "-showfiles", "show file names as they are parsed"),
new Option("showTokens", "-tokens", "show input tokens"),
new Option("showDocTiming", "-timing", "dump time in ms to parse each file"),
new Option("ntimes", "-n", OptionArgType.INT, "parse input n times w/o wiping DFA cache"),
new Option("skip", "-skip", OptionArgType.INT, "how many runs to skip to allow JIT to warm up"),
new Option("nbatches", "-nbatches", OptionArgType.INT, "run how many trials"),
new Option("batchSize", "-batchsize", OptionArgType.INT, "parse input n times w/o wiping DFA cache"),
new Option("skip", "-skip", OptionArgType.INT, "how many batches to skip to allow JIT to warm up"),
new Option("wipeDFA", "-wipedfa", "wipe DFA before each lex/parse pass"),
new Option("wipeFileDFA", "-wipefiledfa", "wipe DFA before each lex/parse of a file"),
new Option("nodfa", "-nodfa", "don't use DFA lookahead cache"),
Expand All @@ -95,8 +90,6 @@ public Option(String fieldName, String name, OptionArgType argType, String descr
Class<? extends Lexer> lexerClass;
Class<? extends Parser> parserClass;

List<InputDocument> documents;

public static void main(String[] args) throws Exception {
TestANTLR4 tester = new TestANTLR4();
if ( args.length == 0 ) { tester.help(); System.exit(0); }
Expand All @@ -116,55 +109,69 @@ public void go(String[] args) throws Exception {
}
loadLexerAndParser();
List<InputDocument> docs = load(allFiles);
long minTime = Long.MAX_VALUE;
for (int i=1; i<=ntimes; i++) {
if ( wipeDFA ) { // wipe DFA for lexer/parser before each pass?
wipe();

List<Double> batchAverages = new ArrayList<>();
for (int i = 0; i < nbatches; i++) {
System.out.printf("BATCH %d\n", i+1);
List<Long> timings = runBatch(docs, batchSize);
long minTime = Collections.min(timings);
// timing info will show results of last pass over documents
if ( showDocTiming ) {
for (InputDocument doc : docs) {
System.out.printf("%d %d %d %d %s\n",
doc.content.length, doc.time, doc.beforeDFASize, doc.afterDFASize, doc.fileName);
}
}
Triple<Long, Long, Long> stats = parseDocs(docs);
long time = stats.a;
long instances = stats.b;
long heapSize = stats.c;
minTime = Math.min(time, minTime);
timings.add((long)(time/1000.0/1000.0)); // get into ms
liveHeapInstanceCounts.add(instances);
liveHeapSizes.add(heapSize);

double mean = avg(timings);
double timingStd = std(mean, timings);
System.out.printf("average parse %.3fms, min %.3fms, stddev=%.3fms (First %d runs skipped for JIT warmup)\n", mean, (minTime / (1000.00 * 1000.00)), timingStd, skip);
batchAverages.add(mean);
}
// timing info will show results of last pass over documents
if ( showDocTiming ) {
for (InputDocument doc : docs) {
System.out.printf("%d %d %d %d %s\n",
doc.content.length, doc.time, doc.beforeDFASize, doc.afterDFASize, doc.fileName);
}

// skip first batch or a few for JIT compiler warmup
if ( batchAverages.size()> skip) {
batchAverages = batchAverages.subList(skip, batchAverages.size());
}

// skip first few for compiler
if ( timings.size()> skip) {
timings = timings.subList(skip, timings.size());
double overallAvg = avg(batchAverages);
double overallStd = std(overallAvg, batchAverages);
System.out.printf("Overall average parse %.3fms, stddev=%.3fms\n", overallAvg, overallStd);
}

/** Run a batch of trials and return the list of ms timings */
private List<Long> runBatch(List<InputDocument> docs, int batchSize) throws Exception {
List<Long> timings = new ArrayList<>();
for (int i=1; i<=batchSize; i++) {
if ( wipeDFA ) { // wipe DFA for lexer/parser before each pass?
wipe();
}
Triple<Long, Long, Long> timeStats = runTrial(docs);
long time = timeStats.a;
timings.add((long)(time/1000.0/1000.0)); // get into ms
}
double mean = avg(timings);
double timingStd = std(mean, timings);
System.out.printf("average parse %.3fms, min %.3fms, stddev=%.3fms (First %d runs skipped for JIT warmup)\n", mean, (minTime / (1000.00 * 1000.00)), timingStd, skip);
return timings;
}

public double avg(List<Long> values) {
public <T extends Number> double avg(List<T> values) {
double sum = 0.0;
for (Long v : values) {
sum += v;
for (T v : values) {
sum += v.doubleValue();
}
return sum / values.size();
}

public double std(double mean, List<Long> values) { // unbiased std dev
public <T extends Number> double std(double mean, List<T> values) { // unbiased std dev
double sum = 0.0;
for (Long v : values) {
sum += (v-mean)*(v-mean);
for (T v : values) {
double d = v.doubleValue() - mean;
sum += d * d;
}
return Math.sqrt(sum / (values.size() - 1));
}

// return num ns to parse all docs
public Triple<Long, Long, Long> parseDocs(List<InputDocument> docs) throws Exception {
/** return num ns to parse all docs */
public Triple<Long, Long, Long> runTrial(List<InputDocument> docs) throws Exception {
System.gc();
String heapFileName = dumpHeap("/tmp/test-heapdump", true);
Pair<Long, Long> beforeStats = heapStats(heapFileName);
Expand All @@ -187,11 +194,11 @@ public Triple<Long, Long, Long> parseDocs(List<InputDocument> docs) throws Excep
System.gc();
heapFileName = dumpHeap("/tmp/test-heapdump", true);
Pair<Long, Long> afterStats = heapStats(heapFileName);
Pair<Long, Long> stats = new Pair<>(afterStats.a - beforeStats.a, afterStats.b - beforeStats.b);
Pair<Long, Long> heapStats = new Pair<>(afterStats.a - beforeStats.a, afterStats.b - beforeStats.b);

System.out.printf("Parsed %d files %,d lines %,d bytes in %4dms at %,9d lines/sec %,10d chars/sec instances %,9d heap %,13d bytes\n",
docs.size(), nlines, nchar, tms, (int)(nlines / ts), (int)(nchar/ts), stats.a, stats.b);
return new Triple<>(stop - start, stats.a, stats.b);
docs.size(), nlines, nchar, tms, (int)(nlines / ts), (int)(nchar/ts), heapStats.a, heapStats.b);
return new Triple<>(stop - start, heapStats.a, heapStats.b);
}

/** Dump heap tracing (live or all) objects to a .hprof file.
Expand Down
17 changes: 9 additions & 8 deletions java/throughput.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
ANTLR_JAR="`pwd`/lib/antlr-4.11.2-SNAPSHOT-complete.jar"
HEAP_JAR="`pwd`/lib/hprof-heap-0.16.jar"

N="20"
SKIP="5"
BATCHSIZE="5"
SKIP="1"
NBATCHES="20"

echo "SPARQL"
java -Xmx10G -cp "out:$ANTLR_JAR:$HEAP_JAR" TestANTLR4 Sparql query -files '.*\.txt' -n $N -skip $SKIP input/sparql
#echo "----------- SPARQL ------------"
#java -Xmx10G -cp "out:$ANTLR_JAR:$HEAP_JAR" TestANTLR4 Sparql query -files '.*\.txt' -batchsize $BATCHSIZE -skip $SKIP -nbatches $NBATCHES input/sparql

echo "Java"
java -Xmx10G -cp "out:$ANTLR_JAR:$HEAP_JAR" TestANTLR4 Java compilationUnit -files '.*\.java' -n $N -skip $SKIP grammars
echo "----------- Java ------------"
java -Xmx10G -cp "out:$ANTLR_JAR:$HEAP_JAR" TestANTLR4 Java compilationUnit -files '.*\.java' -batchsize $BATCHSIZE -skip $SKIP -nbatches $NBATCHES grammars

echo "postgresql"
java -Xmx10G -cp "out:$ANTLR_JAR:$HEAP_JAR" TestANTLR4 PostgreSQL root -files '.*\.sql' -n $N -skip $SKIP input/postgresql
#echo "----------- postgresql ------------"
#java -Xmx10G -cp "out:$ANTLR_JAR:$HEAP_JAR" TestANTLR4 PostgreSQL root -files '.*\.sql' -batchsize $BATCHSIZE -skip $SKIP -nbatches $NBATCHES input/postgresql