Skip to content

Commit 86dfce7

Browse files
committed
Halved preprocessor runtime.
On a local ~140.000 character (3305 line) script, 3000 runs on each script with one untimed run for possible initialization resulted in a runtime of ~1.19ms on average for the old code and ~0.60ms on average for this new code.
1 parent fafc90f commit 86dfce7

File tree

1 file changed

+164
-134
lines changed

1 file changed

+164
-134
lines changed

src/main/java/com/laytonsmith/core/MethodScriptCompiler.java

Lines changed: 164 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import java.util.EnumSet;
5353
import java.util.HashMap;
5454
import java.util.HashSet;
55+
import java.util.Iterator;
5556
import java.util.List;
5657
import java.util.Map;
5758
import java.util.Set;
@@ -855,136 +856,167 @@ public static TokenStream lex(String script, File file, boolean inPureMScript, b
855856
return new TokenStream(token_list, fileOptions.toString());
856857
}
857858

858-
/**
859-
* This function breaks the token stream into parts, separating the aliases/MethodScript from the command triggers
860-
*
861-
* @param tokenStream
862-
* @return
863-
* @throws ConfigCompileException
864-
*/
865-
public static List<Script> preprocess(TokenStream tokenStream) throws ConfigCompileException {
866-
if (tokenStream == null || tokenStream.isEmpty()) {
867-
return new ArrayList<>();
868-
}
869-
//First, pull out the duplicate newlines
870-
ArrayList<Token> temp = new ArrayList<>();
871-
for (int i = 0; i < tokenStream.size(); i++) {
872-
try {
873-
if (tokenStream.get(i).type.equals(TType.NEWLINE)) {
874-
temp.add(new Token(TType.NEWLINE, "\n", tokenStream.get(i).target));
875-
while (tokenStream.get(++i).type.equals(TType.NEWLINE)) {
876-
}
877-
}
878-
if (tokenStream.get(i).type != TType.WHITESPACE) {
879-
temp.add(tokenStream.get(i));
880-
}
881-
} catch (IndexOutOfBoundsException e) {
882-
}
883-
}
884-
885-
if (temp.size() > 0 && temp.get(0).type.equals(TType.NEWLINE)) {
886-
temp.remove(0);
887-
}
888-
889-
tokenStream.clear();
890-
tokenStream.addAll(temp);
891-
892-
//Handle multiline constructs
893-
ArrayList<Token> tokens1_1 = new ArrayList<>();
894-
boolean inside_multiline = false;
895-
Token thisToken = null;
896-
for (int i = 0; i < tokenStream.size(); i++) {
897-
Token prevToken = i - 1 >= tokenStream.size() ? tokenStream.get(i - 1) : new Token(TType.UNKNOWN, "", Target.UNKNOWN);
898-
thisToken = tokenStream.get(i);
899-
Token nextToken = i + 1 < tokenStream.size() ? tokenStream.get(i + 1) : new Token(TType.UNKNOWN, "", Target.UNKNOWN);
900-
//take out newlines between the = >>> and <<< tokens (also the tokens)
901-
if (thisToken.type.equals(TType.ALIAS_END) && nextToken.val().equals(">>>")) {
902-
inside_multiline = true;
903-
tokens1_1.add(thisToken);
904-
i++;
905-
continue;
906-
}
907-
if (thisToken.val().equals("<<<")) {
908-
if (!inside_multiline) {
909-
throw new ConfigCompileException("Found multiline end symbol, and no multiline start found",
910-
thisToken.target);
911-
}
912-
inside_multiline = false;
913-
continue;
914-
}
915-
if (thisToken.val().equals(">>>") && inside_multiline) {
916-
throw new ConfigCompileException("Did not expect a multiline start symbol here, are you missing a multiline end symbol above this line?", thisToken.target);
917-
}
918-
if (thisToken.val().equals(">>>") && !prevToken.type.equals(TType.ALIAS_END)) {
919-
throw new ConfigCompileException("Multiline symbol must follow the alias_end (=) symbol", thisToken.target);
920-
}
921-
922-
//If we're not in a multiline construct, or we are in it and it's not a newline, add
923-
//it
924-
if (!inside_multiline || !thisToken.type.equals(TType.NEWLINE)) {
925-
tokens1_1.add(thisToken);
926-
}
927-
}
928-
929-
assert thisToken != null;
930-
931-
if (inside_multiline) {
932-
throw new ConfigCompileException("Expecting a multiline end symbol, but your last multiline alias appears to be missing one.", thisToken.target);
933-
}
934-
935-
//take out newlines that are behind a \
936-
ArrayList<Token> tokens2 = new ArrayList<>();
937-
for (int i = 0; i < tokens1_1.size(); i++) {
938-
// For now, just remove comments
939-
if (tokens1_1.get(i).type.isComment()) {
940-
tokens1_1.remove(i);
941-
i--;
942-
continue;
943-
}
944-
if (!tokens1_1.get(i).type.equals(TType.STRING) && tokens1_1.get(i).val().equals("\\") && tokens1_1.size() > i
945-
&& tokens1_1.get(i + 1).type.equals(TType.NEWLINE)) {
946-
tokens2.add(tokens1_1.get(i));
947-
i++;
948-
continue;
949-
}
950-
tokens2.add(tokens1_1.get(i));
951-
}
952-
953-
//Now that we have all lines minified, we should be able to split
954-
//on newlines, and easily find the left and right sides
955-
List<Token> left = new ArrayList<>();
956-
List<Token> right = new ArrayList<>();
957-
List<Script> scripts = new ArrayList<>();
958-
boolean inLeft = true;
959-
for (Token t : tokens2) {
960-
if (inLeft) {
961-
if (t.type == TType.ALIAS_END) {
962-
inLeft = false;
963-
} else {
964-
left.add(t);
965-
}
966-
} else if (t.type == TType.NEWLINE) {
967-
inLeft = true;
968-
// Check for spurious symbols, which indicate an issue with the
969-
// script, but ignore any whitespace.
970-
for (int j = left.size() - 1; j >= 0; j--) {
971-
if (left.get(j).type == TType.NEWLINE) {
972-
if (j > 0 && left.get(j - 1).type != TType.WHITESPACE) {
973-
throw new ConfigCompileException("Unexpected token: " + left.get(j - 1).val(), left.get(j - 1).getTarget());
974-
}
975-
}
976-
}
977-
Script s = new Script(left, right, null, tokenStream.getFileOptions());
978-
scripts.add(s);
979-
left = new ArrayList<>();
980-
right = new ArrayList<>();
981-
} else {
982-
right.add(t);
983-
}
984-
}
985-
return scripts;
986-
}
987-
859+
/**
860+
* This function breaks the token stream into parts, separating the aliases/MethodScript from the command triggers
861+
*
862+
* @param tokenStream
863+
* @return
864+
* @throws ConfigCompileException
865+
*/
866+
public static List<Script> preprocess(TokenStream tokenStream) throws ConfigCompileException {
867+
if(tokenStream == null || tokenStream.isEmpty()) {
868+
return new ArrayList<>();
869+
}
870+
871+
// Remove leading and duplicate newlines.
872+
int index = 0;
873+
int startIndex = 0;
874+
while(startIndex < tokenStream.size() && tokenStream.get(startIndex).type == TType.NEWLINE) {
875+
startIndex++; // Skip leading newlines.
876+
}
877+
for(int i = startIndex; i < tokenStream.size(); i++) {
878+
Token token = tokenStream.get(i);
879+
if(token.type == TType.NEWLINE) {
880+
while(i + 1 < tokenStream.size() && tokenStream.get(i + 1).type == TType.NEWLINE) {
881+
i++; // Skip duplicate newlines.
882+
}
883+
tokenStream.set(index++, token);
884+
} else if(token.type != TType.WHITESPACE) {
885+
tokenStream.set(index++, token);
886+
}
887+
}
888+
for(int i = tokenStream.size() - 1; i >= index; i--) {
889+
tokenStream.remove(i); // Remove remaining handled tokens.
890+
}
891+
892+
// Handle multiline constructs.
893+
// Take out newlines between the '= >>>' and '<<<' tokens (also removing the '>>>' and '<<<' tokens).
894+
// Also remove comments and also remove newlines that are behind a '\'.
895+
boolean inside_multiline = false;
896+
Token token = null;
897+
for(int i = 0; i < tokenStream.size(); i++) {
898+
token = tokenStream.get(i);
899+
900+
switch(token.type) {
901+
case ALIAS_END: { // "=".
902+
if(i + 1 < tokenStream.size() && tokenStream.get(i + 1).type == TType.MULTILINE_START) { // "= >>>".
903+
inside_multiline = true;
904+
tokenStream.remove(i + 1); // Remove multiline start (>>>).
905+
}
906+
continue;
907+
}
908+
case MULTILINE_END: { // "<<<".
909+
910+
// Handle multiline end token (<<<) without start.
911+
if(!inside_multiline) {
912+
throw new ConfigCompileException(
913+
"Found multiline end symbol, and no multiline start found", token.target);
914+
}
915+
916+
inside_multiline = false;
917+
tokenStream.remove(i--); // Remove multiline end (<<<) and compensate for it in i.
918+
continue;
919+
}
920+
case MULTILINE_START: { // ">>>".
921+
922+
// Handle multiline start token (>>>) while already in multiline.
923+
if(inside_multiline) {
924+
throw new ConfigCompileException("Did not expect a multiline start symbol here,"
925+
+ " are you missing a multiline end symbol above this line?", token.target);
926+
}
927+
928+
// Handle multiline start token (>>>) without alias end (=) in front.
929+
if(i > 0 && tokenStream.get(i - 1).type != TType.ALIAS_END) {
930+
throw new ConfigCompileException(
931+
"Multiline symbol must follow the alias_end (=) symbol", token.target);
932+
}
933+
continue;
934+
}
935+
case NEWLINE: { // "\n".
936+
937+
// Skip newlines that are inside a multiline construct.
938+
if(inside_multiline) {
939+
tokenStream.remove(i--); // Remove newline and compensate for it in i.
940+
}
941+
continue;
942+
}
943+
944+
// Remove comments.
945+
case COMMENT:
946+
case SMART_COMMENT: {
947+
tokenStream.remove(i--); // Remove comment and compensate for it in i.
948+
continue;
949+
}
950+
default: {
951+
952+
// Remove newlines that are behind a '\'.
953+
if(token.type != TType.STRING && token.val().equals("\\") && i + 1 < tokenStream.size()
954+
&& tokenStream.get(i + 1).type == TType.NEWLINE) {
955+
tokenStream.remove(i + 1); // Remove newline.
956+
}
957+
}
958+
}
959+
}
960+
961+
assert token != null;
962+
963+
// Handle missing multiline end token.
964+
if(inside_multiline) {
965+
throw new ConfigCompileException("Expecting a multiline end symbol, but your last multiline alias appears to be missing one.", token.target);
966+
}
967+
968+
// Now that we have all lines minified, we should be able to split on newlines
969+
// and easily find the left and right sides.
970+
List<Token> left = new ArrayList<>();
971+
List<Token> right = new ArrayList<>();
972+
List<Script> scripts = new ArrayList<>();
973+
tokenLoop:
974+
for(Iterator<Token> it = tokenStream.iterator(); it.hasNext();) {
975+
Token t = it.next();
976+
977+
// Add all tokens until ALIAS_END (=) or end of stream.
978+
while(t.type != TType.ALIAS_END) {
979+
if(!it.hasNext()) {
980+
break tokenLoop; // End of stream.
981+
}
982+
left.add(t);
983+
t = it.next();
984+
}
985+
986+
// Add all tokens until NEWLINE (\n).
987+
while(t.type != TType.NEWLINE) {
988+
assert it.hasNext(); // All files end with a newline, so end of stream should be impossible here.
989+
right.add(t);
990+
t = it.next();
991+
}
992+
993+
// Create a new script for the obtained left and right if end of stream has not been reached.
994+
if(t.type == TType.NEWLINE) {
995+
996+
// Check for spurious symbols, which indicate an issue with the script, but ignore any whitespace.
997+
for(int j = left.size() - 1; j >= 0; j--) {
998+
if(left.get(j).type == TType.NEWLINE) {
999+
if(j > 0 && left.get(j - 1).type != TType.WHITESPACE) {
1000+
throw new ConfigCompileException(
1001+
"Unexpected token: " + left.get(j - 1).val(), left.get(j - 1).getTarget());
1002+
}
1003+
}
1004+
}
1005+
1006+
// Create a new script from the command descriptor (left) and code (right) and add it to the list.
1007+
Script s = new Script(left, right, null, tokenStream.getFileOptions());
1008+
scripts.add(s);
1009+
1010+
// Create new left and right array for the next script.
1011+
left = new ArrayList<>();
1012+
right = new ArrayList<>();
1013+
}
1014+
}
1015+
1016+
// Return the scripts.
1017+
return scripts;
1018+
}
1019+
9881020
/**
9891021
* Compiles the token stream into a valid ParseTree. This also includes optimization and reduction.
9901022
*
@@ -995,7 +1027,6 @@ public static List<Script> preprocess(TokenStream tokenStream) throws ConfigComp
9951027
* methods may cause compile errors. Any function that can optimize static occurrences and throws a
9961028
* {@link ConfigRuntimeException} will have that exception converted to a ConfigCompileException.
9971029
*/
998-
@SuppressWarnings("UnnecessaryContinue")
9991030
public static ParseTree compile(TokenStream stream) throws ConfigCompileException, ConfigCompileGroupException {
10001031
Set<ConfigCompileException> compilerErrors = new HashSet<>();
10011032
if (stream == null || stream.isEmpty()) {
@@ -1215,7 +1246,7 @@ public static ParseTree compile(TokenStream stream) throws ConfigCompileExceptio
12151246
throw new ConfigCompileException("Unexpected parenthesis", t.target);
12161247
}
12171248
parens--;
1218-
ParseTree function = parents.pop();
1249+
parents.pop(); // Pop function.
12191250
if (constructCount.peek().get() > 1) {
12201251
//We need to autoconcat some stuff
12211252
int stacks = constructCount.peek().get();
@@ -1654,11 +1685,10 @@ private static void link(ParseTree tree, Set<ConfigCompileException> compilerErr
16541685
// Walk the children
16551686
for (ParseTree child : tree.getChildren()) {
16561687
if (child.getData() instanceof CFunction) {
1657-
FunctionBase f = null;
16581688
if (child.getData().val().charAt(0) != '_' || child.getData().val().charAt(1) == '_') {
16591689
// This will throw an exception if the function doesn't exist.
16601690
try {
1661-
f = FunctionList.getFunction(child.getData());
1691+
FunctionList.getFunction(child.getData());
16621692
} catch (ConfigCompileException ex) {
16631693
compilerErrors.add(ex);
16641694
}

0 commit comments

Comments
 (0)