Skip to content
This repository was archived by the owner on Oct 15, 2020. It is now read-only.

Commit 32d4f04

Browse files
authored
MetaData and AstCreation Passes (#210)
* Passes up to and including AST creation - minus most tests * Patches ported over from `fabs/simplify` for `for` handling Also places method return as last child of method again like it was in the old converter. * Adapt test to fix * Port over more tests * Ported over all AST tests * Replace `println` with call to logger * Cleanup * More cleanup * More cleanup * Fix typo * Some comments * Merge FileAndNamespaceBlockPass and AstCreatorPass * Perform id assignment in AstCreationPass * Set correct CPG version
1 parent b844e92 commit 32d4f04

File tree

10 files changed

+2356
-4
lines changed

10 files changed

+2356
-4
lines changed

build.sbt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ organization := "io.shiftleft"
33
scalaVersion := "2.13.1"
44
enablePlugins(GitVersioning)
55

6-
val cpgVersion = "0.11.329"
6+
val cpgVersion = "0.11.331"
77
val antlrVersion = "4.7.2"
88

99
libraryDependencies ++= Seq(
1010
"com.github.scopt" %% "scopt" % "3.7.1",
1111
"org.antlr" % "antlr4-runtime" % antlrVersion,
1212
"io.shiftleft" %% "codepropertygraph" % cpgVersion,
1313
"io.shiftleft" %% "codepropertygraph-protos" % cpgVersion,
14+
"io.shiftleft" %% "semanticcpg" % cpgVersion,
1415

1516
"commons-cli" % "commons-cli" % "1.4",
1617
"com.github.pathikrit" %% "better-files" % "3.8.0",
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package io.shiftleft.fuzzyc2cpg.passes
2+
3+
import io.shiftleft.codepropertygraph.Cpg
4+
import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes}
5+
import io.shiftleft.fuzzyc2cpg.Utils.getGlobalNamespaceBlockFullName
6+
import io.shiftleft.fuzzyc2cpg.passes.astcreation.{AntlrCModuleParserDriver, AstVisitor}
7+
import io.shiftleft.fuzzyc2cpg.{Defines, Global}
8+
import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, KeyPool, ParallelCpgPass}
9+
import org.slf4j.LoggerFactory
10+
11+
/**
12+
* Given a list of filenames, this pass creates abstract syntax trees for
13+
* each file, including File and NamespaceBlock nodes. Files are processed in parallel.
14+
* */
15+
class AstCreationPass(filenames: List[String], cpg: Cpg, keyPool: IntervalKeyPool)
16+
extends ParallelCpgPass[String](cpg, keyPools = Some(keyPool.split(filenames.size))) {
17+
18+
private val logger = LoggerFactory.getLogger(getClass)
19+
val global: Global = Global()
20+
21+
override def partIterator: Iterator[String] = filenames.iterator
22+
23+
override def runOnPart(filename: String): Iterator[DiffGraph] = {
24+
25+
val diffGraph = DiffGraph.newBuilder
26+
val absolutePath = new java.io.File(filename).toPath.toAbsolutePath.normalize().toString
27+
val fileNode = nodes.NewFile(name = absolutePath)
28+
diffGraph.addNode(fileNode)
29+
val namespaceBlock = nodes.NewNamespaceBlock(
30+
name = Defines.globalNamespaceName,
31+
fullName = getGlobalNamespaceBlockFullName(Some(fileNode.name))
32+
)
33+
diffGraph.addNode(fileNode)
34+
diffGraph.addNode(namespaceBlock)
35+
diffGraph.addEdge(namespaceBlock, fileNode, EdgeTypes.SOURCE_FILE)
36+
37+
val driver = createDriver(fileNode, namespaceBlock)
38+
tryToParse(driver, filename, diffGraph)
39+
}
40+
41+
private def createDriver(fileNode: nodes.NewFile,
42+
namespaceBlock: nodes.NewNamespaceBlock): AntlrCModuleParserDriver = {
43+
val driver = new AntlrCModuleParserDriver()
44+
val astVisitor = new AstVisitor(driver, namespaceBlock, global)
45+
driver.addObserver(astVisitor)
46+
driver.setFileNode(fileNode)
47+
driver
48+
}
49+
50+
private def tryToParse(driver: AntlrCModuleParserDriver,
51+
filename: String,
52+
diffGraph: DiffGraph.Builder): Iterator[DiffGraph] = {
53+
try {
54+
driver.parseAndWalkFile(filename, diffGraph)
55+
Iterator(diffGraph.build)
56+
} catch {
57+
case ex: RuntimeException => {
58+
logger.warn("Cannot parse module: " + filename + ", skipping")
59+
logger.warn("Complete exception: ", ex)
60+
ex.printStackTrace()
61+
Iterator()
62+
}
63+
case _: StackOverflowError => {
64+
logger.warn("Cannot parse module: " + filename + ", skipping, StackOverflow")
65+
Iterator()
66+
}
67+
}
68+
}
69+
70+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package io.shiftleft.fuzzyc2cpg.passes
2+
3+
import io.shiftleft.codepropertygraph.Cpg
4+
import io.shiftleft.codepropertygraph.generated.{Languages, nodes}
5+
import io.shiftleft.fuzzyc2cpg.Defines
6+
import io.shiftleft.fuzzyc2cpg.Utils.getGlobalNamespaceBlockFullName
7+
import io.shiftleft.passes.{CpgPass, DiffGraph, KeyPool}
8+
9+
/**
10+
* A pass that creates a MetaData node, specifying that this
11+
* is a CPG for C, and a NamespaceBlock for anything that
12+
* cannot be assigned to any other namespace.
13+
* */
14+
class CMetaDataPass(cpg: Cpg, keyPool: Option[KeyPool] = None) extends CpgPass(cpg, keyPool = keyPool) {
15+
override def run(): Iterator[DiffGraph] = {
16+
def addMetaDataNode(diffGraph: DiffGraph.Builder): Unit = {
17+
val metaNode = nodes.NewMetaData(language = Languages.C)
18+
diffGraph.addNode(metaNode)
19+
}
20+
21+
def addAnyNamespaceBlock(diffGraph: DiffGraph.Builder): Unit = {
22+
val node = nodes.NewNamespaceBlock(
23+
name = Defines.globalNamespaceName,
24+
fullName = getGlobalNamespaceBlockFullName(None)
25+
)
26+
diffGraph.addNode(node)
27+
}
28+
29+
val diffGraph = DiffGraph.newBuilder
30+
addMetaDataNode(diffGraph)
31+
addAnyNamespaceBlock(diffGraph)
32+
Iterator(diffGraph.build())
33+
}
34+
}
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
package io.shiftleft.fuzzyc2cpg.passes.astcreation;
2+
3+
import static org.antlr.v4.runtime.Token.EOF;
4+
5+
import io.shiftleft.codepropertygraph.generated.EdgeTypes;
6+
import io.shiftleft.codepropertygraph.generated.nodes.NewComment;
7+
import io.shiftleft.codepropertygraph.generated.nodes.NewFile;
8+
import io.shiftleft.fuzzyc2cpg.ast.AstNode;
9+
import io.shiftleft.fuzzyc2cpg.ast.AstNodeBuilder;
10+
import io.shiftleft.fuzzyc2cpg.parser.AntlrParserDriverObserver;
11+
import io.shiftleft.fuzzyc2cpg.parser.CommonParserContext;
12+
import io.shiftleft.fuzzyc2cpg.parser.TokenSubStream;
13+
import io.shiftleft.passes.DiffGraph;
14+
import java.io.IOException;
15+
import java.util.ArrayList;
16+
import java.util.List;
17+
import java.util.Stack;
18+
import java.util.function.Consumer;
19+
import jdk.nashorn.internal.runtime.ParserException;
20+
import org.antlr.v4.runtime.BailErrorStrategy;
21+
import org.antlr.v4.runtime.CharStream;
22+
import org.antlr.v4.runtime.CharStreams;
23+
import org.antlr.v4.runtime.CommonTokenStream;
24+
import org.antlr.v4.runtime.DefaultErrorStrategy;
25+
import org.antlr.v4.runtime.Lexer;
26+
import org.antlr.v4.runtime.Parser;
27+
import org.antlr.v4.runtime.ParserRuleContext;
28+
import org.antlr.v4.runtime.RecognitionException;
29+
import org.antlr.v4.runtime.Token;
30+
import org.antlr.v4.runtime.TokenSource;
31+
import org.antlr.v4.runtime.misc.ParseCancellationException;
32+
import org.antlr.v4.runtime.tree.ParseTree;
33+
import org.antlr.v4.runtime.tree.ParseTreeListener;
34+
import org.antlr.v4.runtime.tree.ParseTreeWalker;
35+
import io.shiftleft.codepropertygraph.generated.nodes.File;
36+
import scala.Some;
37+
import scala.collection.immutable.List$;
38+
39+
abstract public class AntlrParserDriver {
40+
// TODO: This class does two things:
41+
// * It is a driver for the ANTLRParser, i.e., the parser
42+
// that creates ParseTrees from Strings. It can also already
43+
// 'walk' the ParseTree to create ASTs.
44+
// * It is an AST provider in that it will notify watchers
45+
// when ASTs are ready.
46+
// We should split this into two classes.
47+
48+
public Stack<AstNodeBuilder<? extends AstNode>> builderStack = new Stack<>();
49+
public TokenSubStream stream;
50+
public String filename;
51+
52+
private ParseTreeListener listener;
53+
private CommonParserContext context = null;
54+
public DiffGraph.Builder cpg;
55+
private final List<AntlrParserDriverObserver> observers = new ArrayList<>();
56+
private NewFile fileNode;
57+
58+
public AntlrParserDriver() {
59+
super();
60+
}
61+
62+
public void setFileNode(NewFile fileNode) {
63+
this.fileNode = fileNode;
64+
}
65+
66+
public abstract ParseTree parseTokenStreamImpl(TokenSubStream tokens);
67+
68+
public abstract Lexer createLexer(CharStream input);
69+
70+
public DiffGraph.Builder parseAndWalkFile(String filename, DiffGraph.Builder diffGraph) throws ParserException {
71+
cpg = diffGraph;
72+
handleHiddenTokens(filename);
73+
TokenSubStream stream = createTokenStreamFromFile(filename);
74+
initializeContextWithFile(filename, stream);
75+
76+
ParseTree tree = parseTokenStream(stream);
77+
walkTree(tree);
78+
return cpg;
79+
}
80+
81+
private void handleHiddenTokens(String filename) {
82+
CommonTokenStream tokenStream = createStreamOfHiddenTokensFromFile(filename);
83+
TokenSource tokenSource = tokenStream.getTokenSource();
84+
85+
while (true){
86+
Token token = tokenSource.nextToken();
87+
if (token.getType() == EOF) {
88+
break;
89+
}
90+
if (token.getChannel() != Token.HIDDEN_CHANNEL) {
91+
continue;
92+
}
93+
int line = token.getLine();
94+
String text = token.getText();
95+
NewComment commentNode = new NewComment(
96+
new Some<>(line),
97+
text
98+
);
99+
cpg.addNode(commentNode);
100+
cpg.addEdge(fileNode, commentNode, EdgeTypes.AST, List$.MODULE$.empty());
101+
}
102+
}
103+
104+
105+
public ParseTree parseTokenStream(TokenSubStream tokens)
106+
throws ParserException {
107+
ParseTree returnTree = parseTokenStreamImpl(tokens);
108+
if (returnTree == null) {
109+
throw new ParserException("");
110+
}
111+
return returnTree;
112+
}
113+
114+
protected TokenSubStream createTokenStreamFromFile(String filename)
115+
throws ParserException {
116+
117+
CharStream input = createInputStreamForFile(filename);
118+
Lexer lexer = createLexer(input);
119+
TokenSubStream tokens = new TokenSubStream(lexer);
120+
return tokens;
121+
122+
}
123+
124+
private CharStream createInputStreamForFile(String filename) {
125+
126+
try {
127+
return CharStreams.fromFileName(filename);
128+
} catch (IOException exception) {
129+
throw new RuntimeException(String.format("Unable to find source file [%s]", filename));
130+
}
131+
132+
}
133+
134+
protected CommonTokenStream createStreamOfHiddenTokensFromFile(String filename) {
135+
CharStream input = createInputStreamForFile(filename);
136+
Lexer lexer = createLexer(input);
137+
return new CommonTokenStream(lexer, Token.HIDDEN_CHANNEL);
138+
}
139+
140+
protected void walkTree(ParseTree tree) {
141+
ParseTreeWalker walker = new ParseTreeWalker();
142+
walker.walk(getListener(), tree);
143+
}
144+
145+
protected void initializeContextWithFile(String filename,
146+
TokenSubStream stream) {
147+
setContext(new CommonParserContext());
148+
getContext().filename = filename;
149+
getContext().stream = stream;
150+
initializeContext(getContext());
151+
}
152+
153+
protected boolean isRecognitionException(RuntimeException ex) {
154+
155+
return ex.getClass() == ParseCancellationException.class
156+
&& ex.getCause() instanceof RecognitionException;
157+
}
158+
159+
protected void setLLStarMode(Parser parser) {
160+
parser.removeErrorListeners();
161+
parser.setErrorHandler(new DefaultErrorStrategy());
162+
}
163+
164+
protected void setSLLMode(Parser parser) {
165+
parser.removeErrorListeners();
166+
parser.setErrorHandler(new BailErrorStrategy());
167+
}
168+
169+
public void initializeContext(CommonParserContext context) {
170+
filename = context.filename;
171+
stream = context.stream;
172+
}
173+
174+
public void setStack(Stack<AstNodeBuilder<? extends AstNode>> aStack) {
175+
builderStack = aStack;
176+
}
177+
178+
// //////////////////
179+
180+
public void addObserver(AntlrParserDriverObserver observer) {
181+
observers.add(observer);
182+
}
183+
184+
private void notifyObservers(Consumer<AntlrParserDriverObserver> function) {
185+
for (AntlrParserDriverObserver observer : observers) {
186+
function.accept(observer);
187+
}
188+
189+
}
190+
191+
public void begin() {
192+
notifyObserversOfBegin();
193+
}
194+
195+
public void end() {
196+
notifyObserversOfEnd();
197+
}
198+
199+
private void notifyObserversOfBegin() {
200+
notifyObservers(AntlrParserDriverObserver::begin);
201+
}
202+
203+
private void notifyObserversOfEnd() {
204+
notifyObservers(AntlrParserDriverObserver::end);
205+
}
206+
207+
public void notifyObserversOfUnitStart(ParserRuleContext ctx) {
208+
notifyObservers(observer -> observer.startOfUnit(ctx, filename));
209+
}
210+
211+
public void notifyObserversOfUnitEnd(ParserRuleContext ctx) {
212+
notifyObservers(observer -> observer.endOfUnit(ctx, filename));
213+
}
214+
215+
public void notifyObserversOfItem(AstNode aItem) {
216+
notifyObservers(observer -> observer.processItem(aItem, builderStack));
217+
}
218+
219+
public ParseTreeListener getListener() {
220+
return listener;
221+
}
222+
223+
public void setListener(ParseTreeListener listener) {
224+
this.listener = listener;
225+
}
226+
227+
public CommonParserContext getContext() {
228+
return context;
229+
}
230+
231+
public void setContext(CommonParserContext context) {
232+
this.context = context;
233+
}
234+
235+
}
236+

0 commit comments

Comments
 (0)