Skip to content

Commit 320830f

Browse files
committed
Lex + preprocess + compile optimization.
Replaced ArrayList usage by a LinkedList. This has the following effect: - get() goes from O(1) to O(n) unless an iterator is used, in which case it remains O(1). - remove() in an iterator over the list goes from O(n) to O(1). This commit also changes all iterations over the list to iterations using an iterator, making both get() and remove() O(1).
1 parent 86dfce7 commit 320830f

File tree

4 files changed

+176
-85
lines changed

4 files changed

+176
-85
lines changed

src/main/java/com/laytonsmith/core/MethodScriptCompiler.java

Lines changed: 157 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,11 @@
5353
import java.util.HashMap;
5454
import java.util.HashSet;
5555
import java.util.Iterator;
56+
import java.util.LinkedList;
5657
import java.util.List;
58+
import java.util.ListIterator;
5759
import java.util.Map;
60+
import java.util.NoSuchElementException;
5861
import java.util.Set;
5962
import java.util.Stack;
6063
import java.util.concurrent.atomic.AtomicInteger;
@@ -109,6 +112,7 @@ public static TokenStream lex(String script, File file, boolean inPureMScript) t
109112
public static TokenStream lex(String script, File file, boolean inPureMScript, boolean saveAllTokens) throws ConfigCompileException {
110113
if(script.isEmpty()) {
111114
return new TokenStream(new ArrayList<>(), "");
115+
return new TokenStream(new LinkedList<>(), "");
112116
}
113117
if((int) script.charAt(0) == 65279) {
114118
// Remove the UTF-8 Byte Order Mark, if present.
@@ -119,7 +123,7 @@ public static TokenStream lex(String script, File file, boolean inPureMScript, b
119123
script = script.replaceAll("\r\n", "\n");
120124
script = script + "\n";
121125
final Set<String> keywords = KeywordList.getKeywordNames();
122-
final List<Token> token_list = new ArrayList<>();
126+
final TokenStream token_list = new TokenStream();
123127

124128
// Set our state variables.
125129
boolean state_in_quote = false;
@@ -559,21 +563,23 @@ public static TokenStream lex(String script, File file, boolean inPureMScript, b
559563
// unknown, we may be doing standalone parenthesis, so auto tack on the __autoconcat__
560564
// function.
561565
try {
562-
int count = 1;
563-
while(token_list.get(token_list.size() - count).type == TType.WHITESPACE) {
566+
int count = 0;
567+
Iterator<Token> it = token_list.descendingIterator();
568+
Token t;
569+
while((t = it.next()).type == TType.WHITESPACE) {
564570
count++;
565571
}
566-
if(token_list.get(token_list.size() - count).type == TType.UNKNOWN) {
567-
token_list.get(token_list.size() - count).type = TType.FUNC_NAME;
572+
if(t.type == TType.UNKNOWN) {
573+
t.type = TType.FUNC_NAME;
568574
// Go ahead and remove the whitespace here too, they break things.
569575
count--;
570576
for(int a = 0; a < count; a++) {
571-
token_list.remove(token_list.size() - 1);
577+
token_list.removeLast();
572578
}
573579
} else {
574580
token_list.add(new Token(TType.FUNC_NAME, "__autoconcat__", target));
575581
}
576-
} catch (IndexOutOfBoundsException e) {
582+
} catch (NoSuchElementException e) {
577583
// This is the first element on the list, so, it's another autoconcat.
578584
token_list.add(new Token(TType.FUNC_NAME, "__autoconcat__", target));
579585
}
@@ -792,31 +798,48 @@ public static TokenStream lex(String script, File file, boolean inPureMScript, b
792798

793799
// Look at the tokens and get meaning from them. Also, look for improper symbol locations
794800
// and go ahead and absorb unary +- into the token.
795-
for(int i = 0; i < token_list.size(); i++) {
796-
Token t = token_list.get(i);
797-
Token prev2 = i - 2 >= 0 ? token_list.get(i - 2) : new Token(TType.UNKNOWN, "", t.target);
798-
Token prev1 = i - 1 >= 0 ? token_list.get(i - 1) : new Token(TType.UNKNOWN, "", t.target);
799-
Token next = i + 1 < token_list.size() ? token_list.get(i + 1) : new Token(TType.UNKNOWN, "", t.target);
800-
801+
ListIterator<Token> it = token_list.listIterator(0);
802+
while(it.hasNext()) {
803+
Token t = it.next();
804+
801805
// Combine whitespace tokens into one.
802-
if(t.type == TType.WHITESPACE && next.type == TType.WHITESPACE) {
803-
t.value += next.val();
804-
token_list.remove(i + 1);
805-
i--; // rescan this token
806-
continue;
806+
if(t.type == TType.WHITESPACE && it.hasNext()) {
807+
Token next;
808+
if((next = it.next()).type == TType.WHITESPACE) {
809+
t.value += next.val();
810+
it.remove(); // Remove 'next'.
811+
} else {
812+
it.previous(); // Select 'next' <--.
813+
}
814+
it.previous(); // Select 't' <--.
815+
it.next(); // Select 't' -->.
807816
}
808817

809818
// Convert "-" + number to -number if allowed.
810-
if(t.type == TType.UNKNOWN && prev1.type.isPlusMinus() // Convert "± UNKNOWN".
811-
&& !prev2.type.isIdentifier() // Don't convert "number/string/var ± ...".
812-
&& !prev2.type.equals(TType.FUNC_END) // Don't convert "func() ± ...".
813-
&& !IVAR_PATTERN.matcher(t.val()).matches() // Don't convert "± @var".
814-
&& !VAR_PATTERN.matcher(t.val()).matches()) { // Don't convert "± $var".
815-
// It is a negative/positive number: Absorb the sign.
816-
t.value = prev1.value + t.value;
817-
token_list.remove(i - 1);
818-
i--;
819+
it.previous(); // Select 't' <--.
820+
if(it.hasPrevious()) {
821+
Token prev1 = it.previous(); // Select 'prev1' <--.
822+
if(it.hasPrevious()) {
823+
Token prev2 = it.previous(); // Select 'prev2' <--.
824+
if(t.type == TType.UNKNOWN && prev1.type.isPlusMinus() // Convert "± UNKNOWN".
825+
&& !prev2.type.isIdentifier() // Don't convert "number/string/var ± ...".
826+
&& prev2.type != TType.FUNC_END // Don't convert "func() ± ...".
827+
&& !IVAR_PATTERN.matcher(t.val()).matches() // Don't convert "± @var".
828+
&& !VAR_PATTERN.matcher(t.val()).matches()) { // Don't convert "± $var".
829+
// It is a negative/positive number: Absorb the sign.
830+
t.value = prev1.value + t.value;
831+
it.next(); // Select 'prev2' -->.
832+
it.next(); // Select 'prev1' -->.
833+
it.remove(); // Remove 'prev1'.
834+
} else {
835+
it.next(); // Select 'prev2' -->.
836+
it.next(); // Select 'prev1' -->.
837+
}
838+
} else {
839+
it.next(); // Select 'prev1' -->.
840+
}
819841
}
842+
it.next(); // Select 't' -->.
820843

821844
// Assign a type to all UNKNOWN tokens.
822845
if(t.type == TType.UNKNOWN) {
@@ -841,19 +864,30 @@ public static TokenStream lex(String script, File file, boolean inPureMScript, b
841864

842865
// Skip this check if we're not in pure mscript.
843866
if(inPureMScript) {
844-
if(t.type.isSymbol() && !t.type.isUnary() && !next.type.isUnary()) {
845-
if(prev1.type.equals(TType.FUNC_START) || prev1.type.equals(TType.COMMA)
846-
|| next.type.equals(TType.FUNC_END) || next.type.equals(TType.COMMA)
847-
|| prev1.type.isSymbol() || next.type.isSymbol()) {
848-
throw new ConfigCompileException("Unexpected symbol (" + t.val() + ")", t.getTarget());
867+
if(it.hasNext()) {
868+
Token next = it.next(); // Select 'next' -->.
869+
it.previous(); // Select 'next' <--.
870+
it.previous(); // Select 't' <--.
871+
if(t.type.isSymbol() && !t.type.isUnary() && !next.type.isUnary()) {
872+
if(it.hasPrevious()) {
873+
Token prev1 = it.previous(); // Select 'prev1' <--.
874+
if(prev1.type.equals(TType.FUNC_START) || prev1.type.equals(TType.COMMA)
875+
|| next.type.equals(TType.FUNC_END) || next.type.equals(TType.COMMA)
876+
|| prev1.type.isSymbol() || next.type.isSymbol()) {
877+
throw new ConfigCompileException("Unexpected symbol (" + t.val() + ")", t.getTarget());
878+
}
879+
it.next(); // Select 'prev1' -->.
880+
}
849881
}
882+
it.next(); // Select 't' -->.
850883
}
851884
}
852885

853886
}
854887

855-
// Return the result.
856-
return new TokenStream(token_list, fileOptions.toString());
888+
// Set file options and return the result.
889+
token_list.setFileOptions(fileOptions.toString());
890+
return token_list;
857891
}
858892

859893
/**
@@ -868,40 +902,74 @@ public static List<Script> preprocess(TokenStream tokenStream) throws ConfigComp
868902
return new ArrayList<>();
869903
}
870904

871-
// Remove leading and duplicate newlines.
872-
int index = 0;
873905
int startIndex = 0;
874-
while(startIndex < tokenStream.size() && tokenStream.get(startIndex).type == TType.NEWLINE) {
875-
startIndex++; // Skip leading newlines.
906+
// Remove leading newlines.
907+
while(!tokenStream.isEmpty() && tokenStream.getFirst().type == TType.NEWLINE) {
908+
tokenStream.removeFirst(); // Remove leading newlines.
909+
}
910+
911+
// Return an empty list if there were only newlines.
912+
if(tokenStream.isEmpty()) {
913+
return new ArrayList<>();
876914
}
877-
for(int i = startIndex; i < tokenStream.size(); i++) {
878-
Token token = tokenStream.get(i);
879-
if(token.type == TType.NEWLINE) {
880-
while(i + 1 < tokenStream.size() && tokenStream.get(i + 1).type == TType.NEWLINE) {
881-
i++; // Skip duplicate newlines.
915+
916+
// Remove whitespaces and duplicate newlines.
917+
{
918+
ListIterator<Token> it = tokenStream.listIterator(0);
919+
Token token = it.next();
920+
outerLoop:
921+
while(true) {
922+
switch(token.type) {
923+
case WHITESPACE: {
924+
it.remove(); // Remove whitespaces.
925+
if(!it.hasNext()) {
926+
break outerLoop;
927+
}
928+
token = it.next();
929+
continue outerLoop;
930+
}
931+
case NEWLINE: {
932+
while(true) {
933+
if(!it.hasNext()) {
934+
break outerLoop;
935+
} else if((token = it.next()).type == TType.NEWLINE) {
936+
it.remove(); // Remove duplicate newlines.
937+
} else {
938+
continue outerLoop;
939+
}
940+
}
941+
}
942+
default: {
943+
if(!it.hasNext()) {
944+
break outerLoop;
945+
}
946+
token = it.next();
947+
continue outerLoop;
948+
}
882949
}
883-
tokenStream.set(index++, token);
884-
} else if(token.type != TType.WHITESPACE) {
885-
tokenStream.set(index++, token);
886950
}
887951
}
888-
for(int i = tokenStream.size() - 1; i >= index; i--) {
889-
tokenStream.remove(i); // Remove remaining handled tokens.
890-
}
891952

892953
// Handle multiline constructs.
893954
// Take out newlines between the '= >>>' and '<<<' tokens (also removing the '>>>' and '<<<' tokens).
894955
// Also remove comments and also remove newlines that are behind a '\'.
895956
boolean inside_multiline = false;
957+
ListIterator<Token> it = tokenStream.listIterator(0);
896958
Token token = null;
897-
for(int i = 0; i < tokenStream.size(); i++) {
898-
token = tokenStream.get(i);
959+
while(it.hasNext()) {
960+
token = it.next();
899961

900962
switch(token.type) {
901963
case ALIAS_END: { // "=".
902-
if(i + 1 < tokenStream.size() && tokenStream.get(i + 1).type == TType.MULTILINE_START) { // "= >>>".
903-
inside_multiline = true;
904-
tokenStream.remove(i + 1); // Remove multiline start (>>>).
964+
if(it.hasNext()) {
965+
if(it.next().type == TType.MULTILINE_START) { // "= >>>".
966+
inside_multiline = true;
967+
it.remove(); // Remove multiline start (>>>).
968+
it.previous(); // Select 'token' <---.
969+
it.next(); // Select 'token' -->.
970+
} else {
971+
it.previous(); // Select 'next' <---.
972+
}
905973
}
906974
continue;
907975
}
@@ -914,7 +982,7 @@ public static List<Script> preprocess(TokenStream tokenStream) throws ConfigComp
914982
}
915983

916984
inside_multiline = false;
917-
tokenStream.remove(i--); // Remove multiline end (<<<) and compensate for it in i.
985+
it.remove(); // Remove multiline end (<<<).
918986
continue;
919987
}
920988
case MULTILINE_START: { // ">>>".
@@ -926,33 +994,41 @@ public static List<Script> preprocess(TokenStream tokenStream) throws ConfigComp
926994
}
927995

928996
// Handle multiline start token (>>>) without alias end (=) in front.
929-
if(i > 0 && tokenStream.get(i - 1).type != TType.ALIAS_END) {
997+
it.previous(); // Select 'token' <--.
998+
if(!it.hasPrevious() || it.previous().type != TType.ALIAS_END) {
930999
throw new ConfigCompileException(
9311000
"Multiline symbol must follow the alias_end (=) symbol", token.target);
9321001
}
1002+
it.next(); // Select 'prev' -->.
1003+
it.next(); // Select 'token' -->.
9331004
continue;
9341005
}
9351006
case NEWLINE: { // "\n".
9361007

9371008
// Skip newlines that are inside a multiline construct.
9381009
if(inside_multiline) {
939-
tokenStream.remove(i--); // Remove newline and compensate for it in i.
1010+
it.remove(); // Remove newline.
9401011
}
9411012
continue;
9421013
}
9431014

9441015
// Remove comments.
9451016
case COMMENT:
9461017
case SMART_COMMENT: {
947-
tokenStream.remove(i--); // Remove comment and compensate for it in i.
1018+
it.remove(); // Remove comment.
9481019
continue;
9491020
}
9501021
default: {
9511022

9521023
// Remove newlines that are behind a '\'.
953-
if(token.type != TType.STRING && token.val().equals("\\") && i + 1 < tokenStream.size()
954-
&& tokenStream.get(i + 1).type == TType.NEWLINE) {
955-
tokenStream.remove(i + 1); // Remove newline.
1024+
if(token.type != TType.STRING && token.val().equals("\\") && it.hasNext()) {
1025+
if(it.next().type == TType.NEWLINE) {
1026+
it.remove(); // Remove newline.
1027+
it.previous(); // Select 'token' <--.
1028+
it.next(); // Select 'token' -->.
1029+
} else {
1030+
it.previous(); // Select 'next' <--.
1031+
}
9561032
}
9571033
}
9581034
}
@@ -971,7 +1047,7 @@ public static List<Script> preprocess(TokenStream tokenStream) throws ConfigComp
9711047
List<Token> right = new ArrayList<>();
9721048
List<Script> scripts = new ArrayList<>();
9731049
tokenLoop:
974-
for(Iterator<Token> it = tokenStream.iterator(); it.hasNext();) {
1050+
for(it = tokenStream.listIterator(0); it.hasNext();) {
9751051
Token t = it.next();
9761052

9771053
// Add all tokens until ALIAS_END (=) or end of stream.
@@ -1039,16 +1115,17 @@ public static ParseTree compile(TokenStream stream) throws ConfigCompileExceptio
10391115
} catch (Exception e) {
10401116
unknown = Target.UNKNOWN;
10411117
}
1042-
1043-
List<Token> tempStream = new ArrayList<>(stream.size());
1044-
for (Token t : stream) {
1045-
if (!t.type.isWhitespace()) {
1046-
tempStream.add(t);
1047-
}
1118+
1119+
// Remove all newlines and whitespaces.
1120+
ListIterator<Token> it = stream.listIterator(0);
1121+
while(it.hasNext()) {
1122+
if(it.next().type.isWhitespace()) {
1123+
it.remove();
1124+
}
10481125
}
1049-
stream.clear();
1050-
stream.addAll(tempStream);
1051-
FileOptions fileOptions = stream.getFileOptions();
1126+
1127+
// Get the file options.
1128+
final FileOptions fileOptions = stream.getFileOptions();
10521129

10531130
ParseTree tree = new ParseTree(fileOptions);
10541131
tree.setData(CNull.NULL);
@@ -1080,13 +1157,14 @@ public static ParseTree compile(TokenStream stream) throws ConfigCompileExceptio
10801157

10811158
int bracketCount = 0;
10821159

1083-
for (int i = 0; i < stream.size(); i++) {
1084-
t = stream.get(i);
1085-
//Token prev2 = i - 2 >= 0 ? stream.get(i - 2) : new Token(TType.UNKNOWN, "", t.target);
1086-
Token prev1 = i - 1 >= 0 ? stream.get(i - 1) : new Token(TType.UNKNOWN, "", t.target);
1087-
Token next1 = i + 1 < stream.size() ? stream.get(i + 1) : new Token(TType.UNKNOWN, "", t.target);
1088-
Token next2 = i + 2 < stream.size() ? stream.get(i + 2) : new Token(TType.UNKNOWN, "", t.target);
1089-
Token next3 = i + 3 < stream.size() ? stream.get(i + 3) : new Token(TType.UNKNOWN, "", t.target);
1160+
// Create a Token array to iterate over, rather than using the LinkedList's O(n) get() method.
1161+
Token[] tokenArray = stream.toArray(new Token[stream.size()]);
1162+
for (int i = 0; i < tokenArray.length; i++) {
1163+
t = tokenArray[i];
1164+
Token prev1 = i - 1 >= 0 ? tokenArray[i - 1] : new Token(TType.UNKNOWN, "", t.target);
1165+
Token next1 = i + 1 < stream.size() ? tokenArray[i + 1] : new Token(TType.UNKNOWN, "", t.target);
1166+
Token next2 = i + 2 < stream.size() ? tokenArray[i + 2] : new Token(TType.UNKNOWN, "", t.target);
1167+
Token next3 = i + 3 < stream.size() ? tokenArray[i + 3] : new Token(TType.UNKNOWN, "", t.target);
10901168

10911169
// Brace handling
10921170
if (t.type == TType.LCURLY_BRACKET) {
@@ -1410,7 +1488,7 @@ public static ParseTree compile(TokenStream stream) throws ConfigCompileExceptio
14101488
if(t.value.startsWith("0m")) {
14111489
// CDecimal
14121490
String neg = "";
1413-
if(prev1.value.equals('-')) {
1491+
if(prev1.value.equals("-")) {
14141492
neg = "-";
14151493
}
14161494
c = new CDecimal(neg + t.value.substring(2) + '.' + next2.value, t.target);

src/main/java/com/laytonsmith/core/compiler/CompilerObject.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ private Construct resolveIdentifier(Token t) throws ConfigCompileException {
211211
} else if (keywords.contains(t.val())) {
212212
return new CKeyword(t.val(), t.getTarget());
213213
} else {
214-
if (stream.fileOptions.isStrict()) {
214+
if (stream.getFileOptions().isStrict()) {
215215
throw new ConfigCompileException("Bare strings not allowed in strict mode. (" + t.val() + ")", t.getTarget());
216216
} else {
217217
return new CString(t.val(), t.getTarget());

0 commit comments

Comments
 (0)