Skip to content

Commit ebefcb8

Browse files
authored
Merge pull request github#15513 from microsoft/50-model-gettext-family-of-string-operations
Added model for gettext variants.
2 parents 0ee3c99 + 5866fc1 commit ebefcb8

File tree

6 files changed

+153
-16
lines changed

6 files changed

+153
-16
lines changed

cpp/ql/lib/semmle/code/cpp/models/Models.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ private import implementations.Deallocation
33
private import implementations.Fread
44
private import implementations.Getenv
55
private import implementations.Gets
6+
private import implementations.GetText
67
private import implementations.IdentityFunction
78
private import implementations.Inet
89
private import implementations.Iterator
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import semmle.code.cpp.models.interfaces.DataFlow
2+
3+
/**
4+
* Returns the transated text index for a given gettext function `f`
5+
*/
6+
private int getTextArg(Function f) {
7+
// basic variations of gettext
8+
f.hasGlobalOrStdName("gettext") and result = 0
9+
or
10+
f.hasGlobalOrStdName("dgettext") and result = 1
11+
or
12+
f.hasGlobalOrStdName("dcgettext") and result = 1
13+
or
14+
// plural variations of gettext that take one format string for singular and another for plural form
15+
f.hasGlobalOrStdName("ngettext") and
16+
(result = 0 or result = 1)
17+
or
18+
f.hasGlobalOrStdName("dngettext") and
19+
(result = 1 or result = 2)
20+
or
21+
f.hasGlobalOrStdName("dcngettext") and
22+
(result = 1 or result = 2)
23+
}
24+
25+
class GetTextFunction extends DataFlowFunction {
26+
int argInd;
27+
28+
GetTextFunction() { argInd = getTextArg(this) }
29+
30+
override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
31+
input.isParameterDeref(argInd) and output.isReturnValueDeref()
32+
}
33+
}

cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717

1818
import semmle.code.cpp.ir.dataflow.TaintTracking
19+
import semmle.code.cpp.models.implementations.GetText
1920
import semmle.code.cpp.commons.Printf
2021

2122
// For the following `...gettext` functions, we assume that
@@ -26,30 +27,21 @@ predicate whitelistFunction(Function f, int arg) {
2627
// basic variations of gettext
2728
f.getName() = "_" and arg = 0
2829
or
29-
f.getName() = "gettext" and arg = 0
30-
or
31-
f.getName() = "dgettext" and arg = 1
32-
or
33-
f.getName() = "dcgettext" and arg = 1
34-
or
35-
// plural variations of gettext that take one format string for singular and another for plural form
36-
f.getName() = "ngettext" and
37-
(arg = 0 or arg = 1)
38-
or
39-
f.getName() = "dngettext" and
40-
(arg = 1 or arg = 2)
41-
or
42-
f.getName() = "dcngettext" and
43-
(arg = 1 or arg = 2)
30+
exists(FunctionInput input |
31+
f.(GetTextFunction).hasDataFlow(input, _) and
32+
input.isParameterDeref(arg)
33+
)
4434
}
4535

46-
// we assume that ALL uses of the `_` macro
36+
// we assume that ALL uses of the `_` macro (and calls to `gettext`)
4737
// return constant string literals
4838
predicate underscoreMacro(Expr e) {
4939
exists(MacroInvocation mi |
5040
mi.getMacroName() = "_" and
5141
mi.getExpr() = e
5242
)
43+
or
44+
e = any(GetTextFunction gettext).getACallToThisFunction()
5345
}
5446

5547
/**
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Added dataflow models for the `gettext` function variants.

cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,12 @@ irFlow
303303
| test.cpp:914:46:914:53 | source | test.cpp:919:10:919:30 | global_pointer_static |
304304
| test.cpp:915:57:915:76 | *indirect_source(1) | test.cpp:921:19:921:50 | *global_pointer_static_indirect_1 |
305305
| test.cpp:932:23:932:28 | call to source | test.cpp:937:10:937:24 | * ... |
306+
| test.cpp:958:18:958:32 | *call to indirect_source | test.cpp:961:19:961:28 | *translated |
307+
| test.cpp:973:18:973:32 | *call to indirect_source | test.cpp:977:19:977:28 | *translated |
308+
| test.cpp:994:18:994:32 | *call to indirect_source | test.cpp:999:19:999:28 | *translated |
309+
| test.cpp:994:18:994:32 | *call to indirect_source | test.cpp:1003:19:1003:28 | *translated |
310+
| test.cpp:1021:18:1021:32 | *call to indirect_source | test.cpp:1027:19:1027:28 | *translated |
311+
| test.cpp:1021:18:1021:32 | *call to indirect_source | test.cpp:1031:19:1031:28 | *translated |
306312
| true_upon_entry.cpp:9:11:9:16 | call to source | true_upon_entry.cpp:13:8:13:8 | x |
307313
| true_upon_entry.cpp:17:11:17:16 | call to source | true_upon_entry.cpp:21:8:21:8 | x |
308314
| true_upon_entry.cpp:27:9:27:14 | call to source | true_upon_entry.cpp:29:8:29:8 | x |

cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -936,4 +936,105 @@ namespace global_variable_conflation_test {
936936
sink(global_pointer); // clean
937937
sink(*global_pointer); // $ ir MISSING: ast
938938
}
939+
}
940+
941+
char* gettext(const char*);
942+
char* dgettext(const char*, const char*);
943+
char* ngettext(const char*, const char*, unsigned long int);
944+
char* dngettext (const char*, const char *, const char *, unsigned long int);
945+
946+
namespace test_gettext {
947+
char* source();
948+
char* indirect_source();
949+
950+
void test_gettext() {
951+
char* data = source();
952+
char* translated = gettext(data);
953+
sink(translated); // clean
954+
indirect_sink(translated); // clean
955+
}
956+
957+
void indirect_test_dgettext() {
958+
char* data = indirect_source();
959+
char* translated = gettext(data);
960+
sink(translated); // clean
961+
indirect_sink(translated); // $ ir MISSING: ast
962+
}
963+
964+
void test_dgettext() {
965+
char* data = source();
966+
char* domain = source(); // Should not trace from this source
967+
char* translated = dgettext(domain, data);
968+
sink(translated); // clean
969+
indirect_sink(translated); // clean
970+
}
971+
972+
void indirect_test_gettext() {
973+
char* data = indirect_source();
974+
char* domain = indirect_source(); // Should not trace from this source
975+
char* translated = dgettext(domain, data);
976+
sink(translated); // clean
977+
indirect_sink(translated); // $ ir MISSING: ast
978+
}
979+
980+
void test_ngettext() {
981+
char* data = source();
982+
char* np = nullptr; // Don't coun't as a source
983+
984+
char* translated = ngettext(data, np, 0);
985+
sink(translated); // clean
986+
indirect_sink(translated); // clean
987+
988+
translated = ngettext(np, data, 0);
989+
sink(translated); // clean
990+
indirect_sink(translated); // clean
991+
}
992+
993+
void indirect_test_ngettext() {
994+
char* data = indirect_source();
995+
char* np = nullptr; // Don't coun't as a source
996+
997+
char* translated = ngettext(data, np, 0);
998+
sink(translated); // clean
999+
indirect_sink(translated); // $ ir MISSING: ast
1000+
1001+
translated = ngettext(np, data, 0);
1002+
sink(translated); // clean
1003+
indirect_sink(translated); // $ ir MISSING: ast
1004+
}
1005+
1006+
void test_dngettext() {
1007+
char* data = source();
1008+
char* np = nullptr; // Don't coun't as a source
1009+
char* domain = source(); // Should not trace from this source
1010+
1011+
char* translated = dngettext(domain, data, np, 0);
1012+
sink(translated); // clean
1013+
indirect_sink(translated); // clean
1014+
1015+
translated = dngettext(domain, np, data, 0);
1016+
sink(translated); // clean
1017+
indirect_sink(translated); // clean
1018+
}
1019+
1020+
void indirect_test_dngettext() {
1021+
char* data = indirect_source();
1022+
char* np = nullptr; // Don't coun't as a source
1023+
char* domain = indirect_source(); // Should not trace from this source
1024+
1025+
char* translated = dngettext(domain, data, np, 0);
1026+
sink(translated); // clean
1027+
indirect_sink(translated); // $ ir MISSING: ast
1028+
1029+
translated = dngettext(domain, np, data, 0);
1030+
sink(translated); // clean
1031+
indirect_sink(translated); // $ ir MISSING: ast
1032+
}
1033+
1034+
void indirect_test_gettext_no_flow_from_domain() {
1035+
char* domain = source(); // Should not trace from this source
1036+
char* translated = dgettext(domain, nullptr);
1037+
sink(translated); // clean
1038+
indirect_sink(translated); // clean
1039+
}
9391040
}

0 commit comments

Comments
 (0)