diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala index 8f8c7b404e8f..9e86a68e18c4 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala @@ -9,7 +9,8 @@ import scopt.OParser final case class Config( inputPath: String = "", outputPath: String = X2CpgConfig.defaultOutputPath, - phpIni: Option[String] = None + phpIni: Option[String] = None, + excludeOverrides: Option[List[String]] = None ) extends X2CpgConfig[Config] { override def withInputPath(inputPath: String): Config = @@ -28,7 +29,11 @@ private object Frontend { programName("php2cpg"), opt[String]("php-ini") .action((x, c) => c.copy(phpIni = Some(x))) - .text("php.ini path used by php-parser. Defaults to php.ini shipped with Joern.") + .text("php.ini path used by php-parser. Defaults to php.ini shipped with Joern."), + opt[Seq[String]]("exclude-overrides") + .valueName("regex1,regex2,...") + .action((x, c) => c.copy(excludeOverrides = Some(x.toList))) + .text("Comma separated list of regexes where matches exclude files or directories. Overrides default list.") ) } } diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala index aea1a0b3d8f1..70c1c428a0ba 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala @@ -4,6 +4,7 @@ import better.files.File import io.joern.php2cpg.Config import io.joern.php2cpg.astcreation.AstCreator import io.joern.php2cpg.parser.PhpParser +import io.joern.php2cpg.utils.PathFilter import io.joern.x2cpg.SourceFiles import io.joern.x2cpg.datastructures.Global import io.shiftleft.codepropertygraph.Cpg @@ -11,6 +12,7 @@ import io.shiftleft.passes.ConcurrentWriterCpgPass import org.slf4j.LoggerFactory import scala.jdk.CollectionConverters._ +import java.nio.file.PathMatcher class AstCreationPass(config: Config, cpg: Cpg) extends ConcurrentWriterCpgPass[String](cpg) { @@ -18,17 +20,24 @@ class AstCreationPass(config: Config, cpg: Cpg) extends ConcurrentWriterCpgPass[ val global = new Global() val PhpSourceFileExtensions: Set[String] = Set(".php") + private val pathFilter = PathFilter(config.excludeOverrides) - override def generateParts(): Array[String] = SourceFiles.determine(config.inputPath, PhpSourceFileExtensions).toArray + override def generateParts(): Array[String] = + SourceFiles.determine(config.inputPath, PhpSourceFileExtensions).toArray override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = { val relativeFilename = File(config.inputPath).relativize(File(filename)).toString - PhpParser.parseFile(filename, config.phpIni) match { - case Some(parseResult) => - diffGraph.absorb(new AstCreator(relativeFilename, parseResult, global).createAst()) - case None => - logger.warn(s"Could not parse file $filename. Results will be missing!") + if (pathFilter.excluded(relativeFilename)) { + logger.debug(s"Skipping file due to matched exclusion: $relativeFilename") + } else { + PhpParser.parseFile(filename, config.phpIni) match { + case Some(parseResult) => + diffGraph.absorb(new AstCreator(relativeFilename, parseResult, global).createAst()) + + case None => + logger.warn(s"Could not parse file $filename. Results will be missing!") + } } } diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/utils/PathFilter.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/utils/PathFilter.scala new file mode 100644 index 000000000000..ebc162af4d75 --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/utils/PathFilter.scala @@ -0,0 +1,57 @@ +package io.joern.php2cpg.utils + +import io.joern.php2cpg.utils.PathFilter.standardisePath +import org.slf4j.LoggerFactory + +import java.io.File +import java.nio.file.Paths +import scala.util.matching.Regex + +class PathFilter(excludeOverrides: Option[List[String]]) { + private val logger = LoggerFactory.getLogger(this.getClass) + + private val sep = if (File.separator == "/") File.separator else "\\\\" + private val startOrSep = s"^([^$sep]+$sep)*" + private val anyDirSuffix = s"$sep.*" + + def dirExclude(dirName: String): Regex = { + val path = escapeSeparators(standardisePath(dirName)) + s"${startOrSep}$path${anyDirSuffix}".r + } + + private def escapeSeparators(filename: String): String = { + if (File.separator == "/") + filename + else + filename.replaceAll(raw"\\", raw"\\\\") + } + + private val excludeRegexes = { + excludeOverrides.getOrElse(PathFilter.DefaultExcludes).map(dirExclude) + } + + def excluded(filename: String): Boolean = { + excludeRegexes.exists(_.matches(filename)) + } +} + +object PathFilter { + val DefaultExcludes: List[String] = List( + "tests", + // For composer projects + "vendor", + ".git" + ) + + def standardisePath(filename: String): String = { + // Synthetic filename, so don't correct + if (filename.contains("<")) + filename + else + Paths.get(filename).toString + } + + def apply(excludes: Option[List[String]]): PathFilter = { + new PathFilter(excludes) + } +} diff --git a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/querying/FileTests.scala b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/querying/FileTests.scala new file mode 100644 index 000000000000..908880c8eea3 --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/querying/FileTests.scala @@ -0,0 +1,54 @@ +package io.joern.php2cpg.querying + +import io.joern.php2cpg.testfixtures.PhpCode2CpgFixture +import io.joern.php2cpg.utils.PathFilter +import io.joern.x2cpg.testfixtures.TestCpg +import io.shiftleft.semanticcpg.language._ +import io.joern.php2cpg.Config + +class FileTests extends PhpCode2CpgFixture { + val testFiles = List( + "a.php", + "src/b.php", + "src/sub/c.php", + "tests/d.php", + "tests/sub/e.php", + "vendor/autoload.php", + "vendor/dep/f.php", + "misc/g.php", + ".git/h.php", + "sub/tests/i.php", + "nottests/j.php" + ) + + private def getCpg(): TestCpg = { + val cpg = code("", fileName = testFiles.head) + testFiles.tail.foreach(path => cpg.moreCode("", fileName = PathFilter.standardisePath(path))) + cpg + } + + "test, vendor, and .git directories should be filtered out by default" in { + val cpg = getCpg() + + cpg.file.name.toSet shouldBe Set("a.php", "src/b.php", "src/sub/c.php", "misc/g.php", "nottests/j.php", "") + .map(PathFilter.standardisePath) + } + + "custom excludes should exclude the correct directories" in { + val config = Config(excludeOverrides = Some(List("dep", "src/sub/", "doesntexist"))) + val cpg = getCpg().config(config) + + cpg.file.name.toSet shouldBe Set( + "a.php", + "src/b.php", + "tests/d.php", + "tests/sub/e.php", + "vendor/autoload.php", + "misc/g.php", + ".git/h.php", + "sub/tests/i.php", + "nottests/j.php", + "" + ).map(PathFilter.standardisePath) + } +} diff --git a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala index d09735befe2a..d6fc03841677 100644 --- a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala +++ b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala @@ -7,6 +7,7 @@ import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.semanticcpg.language.{ICallResolver, NoResolve} import java.io.File +import io.joern.x2cpg.X2CpgConfig trait PhpFrontend extends LanguageFrontend { override val fileSuffix: String = ".php" @@ -15,6 +16,15 @@ trait PhpFrontend extends LanguageFrontend { implicit val defaultConfig: Config = Config() new Php2Cpg().createCpg(sourceCodeFile.getAbsolutePath).get } + + override def execute[T <: X2CpgConfig[_]](sourceCodeFile: File, config: T): Cpg = { + config match { + case phpConfig: Config => + new Php2Cpg().createCpg(sourceCodeFile.getAbsolutePath)(phpConfig).get + case _ => + throw new RuntimeException(s"Cannot invoke php2cpg with config type ${config.getClass().getCanonicalName()}") + } + } } class DefaultTestCpgWithPhp extends DefaultTestCpg with PhpFrontend diff --git a/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/testfixtures/LanguageFrontend.scala b/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/testfixtures/LanguageFrontend.scala index ed25937c9831..be63ee42b9dd 100644 --- a/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/testfixtures/LanguageFrontend.scala +++ b/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/testfixtures/LanguageFrontend.scala @@ -3,6 +3,8 @@ package io.joern.x2cpg.testfixtures import io.shiftleft.codepropertygraph.Cpg import java.io.File +import io.joern.x2cpg.X2CpgConfig +import scala.annotation.nowarn /** LanguageFrontend encapsulates the logic that translates the source code directory into CPGs */ @@ -19,4 +21,16 @@ trait LanguageFrontend { * CPG representation stored in a file */ def execute(sourceCodeFile: File): Cpg + + /** Generate CPG for the given source directory with the option to override the method to pass config down. + * @param sourceCodeFile + * direcotry where source code is located + * @param config + * frontend config: ignored unless overridden + * @return + * CPG representation stored in a file + */ + def execute[T <: X2CpgConfig[_]](sourceCodeFile: File, @nowarn config: T): Cpg = { + execute(sourceCodeFile) + } } diff --git a/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/testfixtures/TestCpg.scala b/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/testfixtures/TestCpg.scala index 63aef733f786..7969fdf88a93 100644 --- a/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/testfixtures/TestCpg.scala +++ b/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/testfixtures/TestCpg.scala @@ -8,6 +8,7 @@ import java.nio.file.{Files, Path, Paths} import java.util.Comparator import scala.annotation.nowarn import scala.collection.mutable +import io.joern.x2cpg.X2CpgConfig // Lazily populated test CPG which is created upon first access to the underlying graph. // The trait LanguageFrontend is mixed in and not property/field of this class in order @@ -16,6 +17,7 @@ abstract class TestCpg extends Cpg() with LanguageFrontend { private var _graph = Option.empty[Graph] private val codeFileNamePairs = mutable.ArrayBuffer.empty[(String, Path)] private var fileNameCounter = 0 + private var _config = Option.empty[X2CpgConfig[_]] @nowarn protected def codeFilePreProcessing(codeFile: Path): Unit = {} @@ -34,6 +36,22 @@ abstract class TestCpg extends Cpg() with LanguageFrontend { this } + def config[T <: X2CpgConfig[_]](newConfig: T): this.type = { + checkConfigEmpty() + this._config = Some(newConfig) + this + } + + private def checkConfigEmpty(): Unit = { + val messages = List( + Option.when(_graph.isDefined)("Modifying test config is not allowed after accessing graph."), + Option.when(_config.isDefined)("Test config may only be added once") + ).flatten + if (messages.nonEmpty) { + throw new RuntimeException(messages.mkString("\n")) + } + } + private def checkGraphEmpty(): Unit = { if (_graph.isDefined) { throw new RuntimeException("Modifying test data is not allowed after accessing graph.") @@ -65,7 +83,9 @@ abstract class TestCpg extends Cpg() with LanguageFrontend { if (_graph.isEmpty) { val codeDir = codeToFileSystem() try { - _graph = Option(execute(codeDir.toFile).graph) + _graph = _config + .map(execute(codeDir.toFile, _).graph) + .orElse(Option(execute(codeDir.toFile).graph)) applyPasses() } finally { deleteDir(codeDir)