Skip to content

Optimize parsing of PHP files #1427

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* SonarQube PHP Plugin
* Copyright (C) 2010-2025 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the Sonar Source-Available License Version 1, as published by SonarSource SA.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the Sonar Source-Available License for more details.
*
* You should have received a copy of the Sonar Source-Available License
* along with this program; if not, see https://sonarsource.com/license/ssal/
*/
package org.sonar.php.parser;

import org.sonar.sslr.internal.matchers.Matcher;
import org.sonar.sslr.internal.vm.Machine;
import org.sonar.sslr.internal.vm.NativeExpression;
import org.sonar.sslr.internal.vm.PatternExpression;
import org.sonar.sslr.internal.vm.StringExpression;

/**
* This is a variant of {@link StringExpression} which does case-insensitive
* checks to avoid more expensive regex checks that would otherwise be done
* through {@link PatternExpression}.
*/
public class CaseInsensitiveStringExpression extends NativeExpression implements Matcher {

private final String string;

public CaseInsensitiveStringExpression(String string) {
this.string = string;
}

@Override
public void execute(Machine machine) {
if (machine.length() < string.length()) {
machine.backtrack();
return;
}
for (int i = 0; i < string.length(); i++) {
if (Character.toLowerCase(machine.charAt(i)) != Character.toLowerCase(string.charAt(i))) {
machine.backtrack();
return;
}
}
machine.createLeafNode(this, string.length());
machine.jump(1);
}

@Override
public String toString() {
return "String " + string;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ public static void lexical(LexerlessGrammarBuilder b) {

b.rule(EOF).is(b.token(GenericTokenType.EOF, b.endOfInput())).skip();

b.rule(NULL).is(word(b, "NULL")).skip();
b.rule(NULL).is(word(b, "null")).skip();
b.rule(CLASS_CONSTANT).is(word(b, "__CLASS__")).skip();
b.rule(FILE_CONSTANT).is(word(b, "__FILE__")).skip();
b.rule(DIR_CONSTANT).is(word(b, "__DIR__")).skip();
Expand All @@ -340,41 +340,41 @@ public static void lexical(LexerlessGrammarBuilder b) {
b.rule(TRAIT_CONSTANT).is(word(b, "__TRAIT__")).skip();
b.rule(ENUM).is(word(b, "enum")).skip();

b.rule(BOOLEAN_LITERAL).is(b.firstOf(word(b, "TRUE"), word(b, "FALSE")));
b.rule(BOOLEAN_LITERAL).is(b.firstOf(word(b, "true"), word(b, "false")));

b.rule(NEXT_IS_DOLLAR).is(b.next(PHPPunctuator.DOLLAR));
b.rule(VARIABLE_VARIABLE_DOLLAR).is(PHPPunctuator.DOLLAR, b.nextNot(b.firstOf(IDENTIFIER, KEYWORDS, PHPPunctuator.LCURLYBRACE)));

b.rule(ISSET).is(word(b, "ISSET")).skip();
b.rule(EMPTY).is(word(b, "EMPTY")).skip();
b.rule(INCLUDE_ONCE).is(word(b, "INCLUDE_ONCE")).skip();
b.rule(INCLUDE).is(word(b, "INCLUDE")).skip();
b.rule(EVAL).is(word(b, "EVAL")).skip();
b.rule(REQUIRE_ONCE).is(word(b, "REQUIRE_ONCE")).skip();
b.rule(REQUIRE).is(word(b, "REQUIRE")).skip();
b.rule(CLONE).is(word(b, "CLONE")).skip();
b.rule(PRINT).is(word(b, "PRINT")).skip();

b.rule(GET).is(word(b, "GET")).skip();
b.rule(SET).is(word(b, "SET")).skip();

b.rule(SELF).is(word(b, "SELF")).skip();
b.rule(PARENT).is(word(b, "PARENT")).skip();

b.rule(MIXED).is(word(b, "MIXED")).skip();
b.rule(INTEGER).is(word(b, "INTEGER")).skip();
b.rule(INT).is(word(b, "INT")).skip();
b.rule(DOUBLE).is(word(b, "DOUBLE")).skip();
b.rule(FLOAT).is(word(b, "FLOAT")).skip();
b.rule(REAL).is(word(b, "REAL")).skip();
b.rule(STRING).is(word(b, "STRING")).skip();
b.rule(OBJECT).is(word(b, "OBJECT")).skip();
b.rule(BOOLEAN).is(word(b, "BOOLEAN")).skip();
b.rule(BOOL).is(word(b, "BOOL")).skip();
b.rule(BINARY).is(word(b, "BINARY")).skip();
b.rule(ITERABLE).is(word(b, "ITERABLE")).skip();

b.rule(FROM).is(word(b, "FROM")).skip();
b.rule(ISSET).is(word(b, "isset")).skip();
b.rule(EMPTY).is(word(b, "empty")).skip();
b.rule(INCLUDE_ONCE).is(word(b, "include_once")).skip();
b.rule(INCLUDE).is(word(b, "include")).skip();
b.rule(EVAL).is(word(b, "eval")).skip();
b.rule(REQUIRE_ONCE).is(word(b, "require_once")).skip();
b.rule(REQUIRE).is(word(b, "require")).skip();
b.rule(CLONE).is(word(b, "clone")).skip();
b.rule(PRINT).is(word(b, "print")).skip();

b.rule(GET).is(word(b, "get")).skip();
b.rule(SET).is(word(b, "set")).skip();

b.rule(SELF).is(word(b, "self")).skip();
b.rule(PARENT).is(word(b, "parent")).skip();

b.rule(MIXED).is(word(b, "mixed")).skip();
b.rule(INTEGER).is(word(b, "integer")).skip();
b.rule(INT).is(word(b, "int")).skip();
b.rule(DOUBLE).is(word(b, "double")).skip();
b.rule(FLOAT).is(word(b, "float")).skip();
b.rule(REAL).is(word(b, "real")).skip();
b.rule(STRING).is(word(b, "string")).skip();
b.rule(OBJECT).is(word(b, "object")).skip();
b.rule(BOOLEAN).is(word(b, "boolean")).skip();
b.rule(BOOL).is(word(b, "bool")).skip();
b.rule(BINARY).is(word(b, "binary")).skip();
b.rule(ITERABLE).is(word(b, "iterable")).skip();

b.rule(FROM).is(word(b, "from")).skip();

}

Expand All @@ -385,21 +385,21 @@ private static void keywords(LexerlessGrammarBuilder b) {
PHPKeyword tokenType = PHPKeyword.values()[i];

// PHP keywords are case insensitive
b.rule(tokenType).is(SPACING, keywordRegexp(b, tokenType.getValue()), b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART))).skip();
b.rule(tokenType).is(SPACING, caseInsensitive(tokenType.getValue()), b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART))).skip();
if (i > 1) {
if (tokenType == PHPKeyword.READONLY) {
// Readonly is only a keyword when it is not used as a function name. SONARPHP-1266
rest[i - 2] = b.sequence(keywordRegexp(b, "readonly"), b.nextNot(b.regexp("[\\s]*\\(")));
rest[i - 2] = b.sequence(caseInsensitive("readonly"), b.nextNot(b.regexp("[\\s]*\\(")));
} else {
rest[i - 2] = keywordRegexp(b, tokenType.getValue());
rest[i - 2] = caseInsensitive(tokenType.getValue());
}
}
}

b.rule(KEYWORDS).is(SPACING,
b.firstOf(
keywordRegexp(b, PHPKeyword.getKeywordValues()[0]),
keywordRegexp(b, PHPKeyword.getKeywordValues()[1]),
caseInsensitive(PHPKeyword.getKeywordValues()[0]),
caseInsensitive(PHPKeyword.getKeywordValues()[1]),
rest),
b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART)));
}
Expand All @@ -411,10 +411,10 @@ private static void punctuators(LexerlessGrammarBuilder b) {
}

private static Object word(LexerlessGrammarBuilder b, String word) {
return b.sequence(SPACING, b.regexp("(?i)" + word), b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART)));
return b.sequence(SPACING, caseInsensitive(word), b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART)));
}

private static Object keywordRegexp(LexerlessGrammarBuilder b, String keywordValue) {
return b.regexp("(?i)" + keywordValue);
private static Object caseInsensitive(String value) {
return new CaseInsensitiveStringExpression(value);
}
}