Skip to content

Commit 05a04f4

Browse files
committed
Files.qll library implementation
1 parent 6315621 commit 05a04f4

File tree

1 file changed

+225
-12
lines changed

1 file changed

+225
-12
lines changed

ql/lib/codeql/ruby/frameworks/Files.qll

Lines changed: 225 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,223 @@
55
private import ruby
66
private import codeql.ruby.Concepts
77
private import codeql.ruby.ApiGraphs
8+
private import codeql.ruby.DataFlow
9+
private import codeql.ruby.frameworks.StandardLibrary
10+
11+
private DataFlow::Node ioInstanceInstantiation() {
12+
result = API::getTopLevelMember("IO").getAnInstantiation() or
13+
result = API::getTopLevelMember("IO").getAMethodCall(["for_fd", "open", "try_convert"])
14+
}
15+
16+
private DataFlow::Node ioInstance() {
17+
result = ioInstanceInstantiation()
18+
or
19+
exists(DataFlow::Node inst |
20+
inst = ioInstance() and
21+
inst.(DataFlow::LocalSourceNode).flowsTo(result)
22+
)
23+
}
24+
25+
// Match some simple cases where a path argument specifies a shell command to
26+
// be executed. For example, the `"|date"` argument in `IO.read("|date")`, which
27+
// will execute a shell command and read its output rather than reading from the
28+
// filesystem.
29+
private predicate pathArgSpawnsSubprocess(Expr arg) {
30+
arg.(StringlikeLiteral).getValueText().charAt(0) = "|"
31+
}
32+
33+
private DataFlow::Node fileInstanceInstantiation() {
34+
result = API::getTopLevelMember("File").getAnInstantiation()
35+
or
36+
result = API::getTopLevelMember("File").getAMethodCall("open")
37+
or
38+
// Calls to `Kernel.open` can yield `File` instances
39+
exists(KernelMethodCall c |
40+
c = result.asExpr().getExpr() and
41+
c.getMethodName() = "open" and
42+
// Assume that calls that don't invoke shell commands will instead open
43+
// a file.
44+
not pathArgSpawnsSubprocess(c.getArgument(0))
45+
)
46+
}
47+
48+
private DataFlow::Node fileInstance() {
49+
result = fileInstanceInstantiation()
50+
or
51+
exists(DataFlow::Node inst |
52+
inst = fileInstance() and
53+
inst.(DataFlow::LocalSourceNode).flowsTo(result)
54+
)
55+
}
56+
57+
private string ioFileReaderClassMethodName() {
58+
result = ["binread", "foreach", "read", "readlines", "try_convert"]
59+
}
60+
61+
private string ioFileReaderInstanceMethodName() {
62+
result =
63+
[
64+
"getbyte", "getc", "gets", "pread", "read", "read_nonblock", "readbyte", "readchar",
65+
"readline", "readlines", "readpartial", "sysread"
66+
]
67+
}
68+
69+
private string ioFileReaderMethodName(boolean classMethodCall) {
70+
classMethodCall = true and result = ioFileReaderClassMethodName()
71+
or
72+
classMethodCall = false and result = ioFileReaderInstanceMethodName()
73+
}
874

975
/**
10-
* Classes and predicates for modelling the `File` module from the standard
11-
* library.
76+
* Classes and predicates for modelling the core `IO` module.
1277
*/
13-
private module File {
14-
private class FileModuleReader extends FileSystemReadAccess::Range, DataFlow::CallNode {
15-
FileModuleReader() { this = API::getTopLevelMember("File").getAMethodCall(["new", "open"]) }
78+
module IO {
79+
/**
80+
* An instance of the `IO` class, for example in
81+
*
82+
* ```rb
83+
* rand = IO.new(IO.sysopen("/dev/random", "r"), "r")
84+
* rand_data = rand.read(32)
85+
* ```
86+
*
87+
* there are 3 `IOInstance`s - the call to `IO.new`, the assignment
88+
* `rand = ...`, and the read access to `rand` on the second line.
89+
*/
90+
class IOInstance extends DataFlow::Node {
91+
IOInstance() {
92+
this = ioInstance() or
93+
this = fileInstance()
94+
}
95+
}
96+
97+
// "Direct" `IO` instances, i.e. cases where there is no more specific
98+
// subtype such as `File`
99+
private class IOInstanceStrict extends IOInstance {
100+
IOInstanceStrict() { this = ioInstance() }
101+
}
16102

17-
override DataFlow::Node getAPathArgument() { result = this.getArgument(0) }
103+
/**
104+
* A `DataFlow::CallNode` that reads data using the `IO` class. For example,
105+
* the `IO.read call in:
106+
*
107+
* ```rb
108+
* IO.read("|date")
109+
* ```
110+
*
111+
* returns the output of the `date` shell command, invoked as a subprocess.
112+
*
113+
* This class includes reads both from shell commands and reads from the
114+
* filesystem. For working with filesystem accesses specifically, see
115+
* `IOFileReader` or the `FileSystemReadAccess` concept.
116+
*/
117+
class IOReader extends DataFlow::CallNode {
118+
private boolean classMethodCall;
119+
private string api;
18120

121+
IOReader() {
122+
// Class methods
123+
api = ["File", "IO"] and
124+
classMethodCall = true and
125+
this = API::getTopLevelMember(api).getAMethodCall(ioFileReaderMethodName(classMethodCall))
126+
or
127+
// IO instance methods
128+
classMethodCall = false and
129+
api = "IO" and
130+
exists(IOInstanceStrict ii |
131+
this.getReceiver() = ii and
132+
this.asExpr().getExpr().(MethodCall).getMethodName() =
133+
ioFileReaderMethodName(classMethodCall)
134+
)
135+
or
136+
// File instance methods
137+
classMethodCall = false and
138+
api = "File" and
139+
exists(File::FileInstance fi |
140+
this.getReceiver() = fi and
141+
this.asExpr().getExpr().(MethodCall).getMethodName() =
142+
ioFileReaderMethodName(classMethodCall)
143+
)
144+
// TODO: enumeration style methods such as `each`, `foreach`, etc.
145+
}
146+
147+
/**
148+
* Returns the most specific core class used for this read, `IO` or `File`
149+
*/
150+
string getAPI() { result = api }
151+
152+
predicate isClassMethodCall() { classMethodCall = true }
153+
}
154+
155+
/**
156+
* A `DataFlow::CallNode` that reads data from the filesystem using the `IO`
157+
* class. For example, the `IO.read call in:
158+
*
159+
* ```rb
160+
* IO.read("foo.txt")
161+
* ```
162+
*
163+
* reads the file `foo.txt` and returns its contents as a string.
164+
*/
165+
class IOFileReader extends IOReader, FileSystemReadAccess::Range {
166+
IOFileReader() {
167+
this.getAPI() = "File"
168+
or
169+
this.isClassMethodCall() and
170+
// Assume that calls that don't invoke shell commands will instead
171+
// read from a file.
172+
not pathArgSpawnsSubprocess(this.getArgument(0).asExpr().getExpr())
173+
}
174+
175+
// TODO: can we infer a path argument for instance method calls?
176+
// e.g. by tracing back to the instantiation of that instance
177+
override DataFlow::Node getAPathArgument() {
178+
result = this.getArgument(0) and this.isClassMethodCall()
179+
}
180+
181+
// This class represents calls that return data
19182
override DataFlow::Node getADataNode() { result = this }
20183
}
184+
}
185+
186+
/**
187+
* Classes and predicates for modelling the core `File` module.
188+
*
189+
* Because `File` is a subclass of `IO`, all `FileInstance`s and
190+
* `FileModuleReader`s are also `IOInstance`s and `IOModuleReader`s
191+
* respectively.
192+
*/
193+
module File {
194+
/**
195+
* An instance of the `File` class, for example in
196+
*
197+
* ```rb
198+
* f = File.new("foo.txt")
199+
* puts f.read()
200+
* ```
201+
*
202+
* there are 3 `FileInstance`s - the call to `File.new`, the assignment
203+
* `f = ...`, and the read access to `f` on the second line.
204+
*/
205+
class FileInstance extends IO::IOInstance {
206+
FileInstance() { this = fileInstance() }
207+
}
208+
209+
/**
210+
* A read using the `File` module, e.g. the `f.read` call in
211+
*
212+
* ```rb
213+
* f = File.new("foo.txt")
214+
* puts f.read()
215+
* ```
216+
*/
217+
class FileModuleReader extends IO::IOFileReader {
218+
FileModuleReader() { this.getAPI() = "File" }
219+
}
21220

22-
private class FileModuleFilenameSource extends FileNameSource {
221+
/**
222+
* A call to a File method that may return one or more filenames.
223+
*/
224+
class FileModuleFilenameSource extends FileNameSource, DataFlow::CallNode {
23225
FileModuleFilenameSource() {
24226
// Class methods
25227
this =
@@ -28,6 +230,12 @@ private module File {
28230
"absolute_path", "basename", "expand_path", "join", "path", "readlink",
29231
"realdirpath", "realpath"
30232
])
233+
or
234+
// Instance methods
235+
exists(FileInstance fi |
236+
this.getReceiver() = fi and
237+
this.asExpr().getExpr().(MethodCall).getMethodName() = ["path", "to_path"]
238+
)
31239
}
32240
}
33241

@@ -50,12 +258,19 @@ private module File {
50258
}
51259
}
52260

53-
private module FileUtils {
54-
private class FileUtilsFilenameSource extends FileNameSource {
261+
/**
262+
* Classes and predicates for modelling the `FileUtils` module from the standard
263+
* library.
264+
*/
265+
module FileUtils {
266+
/**
267+
* A call to a FileUtils method that may return one or more filenames.
268+
*/
269+
class FileUtilsFilenameSource extends FileNameSource {
55270
FileUtilsFilenameSource() {
56271
// Note that many methods in FileUtils accept a `noop` option that will
57272
// perform a dry run of the command. This means that, for instance, `rm`
58-
// and similar methods may not actually delete/unlink a file.
273+
// and similar methods may not actually delete/unlink a file when called.
59274
this =
60275
API::getTopLevelMember("FileUtils")
61276
.getAMethodCall([
@@ -85,5 +300,3 @@ private module FileUtils {
85300
override DataFlow::Node getAPermissionNode() { result = permissionArg }
86301
}
87302
}
88-
89-
private module IO { }

0 commit comments

Comments
 (0)