Skip to content

Commit 18bd0d0

Browse files
authored
Merge pull request #14954 from microsoft/32-cpp-string-concatenation-library
32 cpp string concatenation library
2 parents 2f6f376 + 250ed48 commit 18bd0d0

File tree

6 files changed

+874
-0
lines changed

6 files changed

+874
-0
lines changed
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/**
2+
* A library for detecting general string concatenations.
3+
*/
4+
5+
import cpp
6+
import semmle.code.cpp.models.implementations.Strcat
7+
import semmle.code.cpp.models.interfaces.FormattingFunction
8+
private import semmle.code.cpp.dataflow.new.DataFlow
9+
10+
/**
11+
* A call that performs a string concatenation. A string can be either a C
12+
* string (i.e., a value of type `char*`), or a C++ string (i.e., a value of
13+
* type `std::string`).
14+
*/
15+
class StringConcatenation extends Call {
16+
StringConcatenation() {
17+
// sprintf-like functions, i.e., concat through formatting
18+
this instanceof FormattingFunctionCall
19+
or
20+
this.getTarget() instanceof StrcatFunction
21+
or
22+
this.getTarget() instanceof StrlcatFunction
23+
or
24+
// operator+ and ostream (<<) concat
25+
exists(Call call, Operator op |
26+
call.getTarget() = op and
27+
op.hasQualifiedName(["std", "bsl"], ["operator+", "operator<<"]) and
28+
op.getType()
29+
.stripType()
30+
.(UserType)
31+
.hasQualifiedName(["std", "bsl"], ["basic_string", "basic_ostream"]) and
32+
this = call
33+
)
34+
}
35+
36+
/**
37+
* Gets an operand of this concatenation (one of the string operands being
38+
* concatenated).
39+
* Will not return out param for sprintf-like functions, but will consider the format string
40+
* to be part of the operands.
41+
*/
42+
Expr getAnOperand() {
43+
// The result is an argument of 'this' (a call)
44+
result = this.getAnArgument() and
45+
// addresses odd behavior with overloaded operators
46+
// i.e., "call to operator+" appearing as an operand
47+
// occurs in cases like `string s = s1 + s2 + s3`, which is represented as
48+
// `string s = (s1.operator+(s2)).operator+(s3);`
49+
// By limiting to non-calls we get the leaf operands (the variables or raw strings)
50+
// also, by not enumerating allowed types (variables and strings) we avoid issues
51+
// with missed corner cases or extensions/changes to CodeQL in the future which might
52+
// invalidate that approach.
53+
not result instanceof Call and
54+
// Limit the result type to string
55+
(
56+
result.getUnderlyingType().stripType().getName() = "char"
57+
or
58+
result
59+
.getType()
60+
.getUnspecifiedType()
61+
.(UserType)
62+
.hasQualifiedName(["std", "bsl"], "basic_string")
63+
) and
64+
// when 'this' is a `FormattingFunctionCall` the result must be the format string argument
65+
// or one of the formatting arguments
66+
(
67+
this instanceof FormattingFunctionCall
68+
implies
69+
(
70+
result = this.(FormattingFunctionCall).getFormat()
71+
or
72+
exists(int n |
73+
result = this.getArgument(n) and
74+
n >= this.(FormattingFunctionCall).getTarget().getFirstFormatArgumentIndex()
75+
)
76+
)
77+
)
78+
}
79+
80+
/**
81+
* Gets the data flow node representing the concatenation result.
82+
*/
83+
DataFlow::Node getResultNode() {
84+
if this.getTarget() instanceof StrcatFunction
85+
then
86+
result.asDefiningArgument() =
87+
this.getArgument(this.getTarget().(StrcatFunction).getParamDest())
88+
or
89+
// Hardcoding it is also the return
90+
result.asExpr() = this.(Call)
91+
else
92+
if this.getTarget() instanceof StrlcatFunction
93+
then (
94+
result.asDefiningArgument() =
95+
this.getArgument(this.getTarget().(StrlcatFunction).getParamDest())
96+
) else
97+
if this instanceof FormattingFunctionCall
98+
then result.asDefiningArgument() = this.(FormattingFunctionCall).getOutputArgument(_)
99+
else result.asExpr() = this.(Call)
100+
}
101+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// #include <iostream>
2+
// #include <string>
3+
// #include <stdio.h>
4+
// #include <string.h>
5+
// #include <sstream>
6+
#include "stl.h"
7+
8+
int sprintf(char *s, const char *format, ...);
9+
char *strcat(char * s1, const char * s2);
10+
11+
using namespace std;
12+
13+
14+
void test1(){
15+
string str1 = "Hello";
16+
string str2 = "World";
17+
string str3 = "!";
18+
string str4 = "Concatenation";
19+
string str5 = "is";
20+
string str6 = "fun";
21+
22+
// Using the + operator
23+
string result1 = str1 + " " + str2 + str3;
24+
25+
// Using the append() function
26+
//----TODO: currently not modeled----
27+
// string result2 = str4.append(" ") + str5.append(" ") + str6;
28+
29+
// Using the insert() function
30+
//----TODO: currently not modeled----
31+
// string result3 = str1.insert(5, " ") + str2.insert(5, "! ");
32+
33+
// Using the replace() function
34+
//----TODO: currently not modeled----
35+
// string result4 = str1.replace(0, 5, "Hi") + str2.replace(0, 5, "There");
36+
37+
// Using the push_back() function
38+
//----TODO: currently not modeled----
39+
// string result5;
40+
// for (char c : str1) {
41+
// result5.push_back(c);
42+
// }
43+
44+
// Using the stream operator
45+
string result6;
46+
std::stringstream ss;
47+
ss << str1 << " " << str2 << str3;
48+
}
49+
50+
51+
void test2(char* ucstr) {
52+
char str1[20] = "Hello";
53+
char str2[20] = "World";
54+
char result[40];
55+
char *result2;
56+
57+
// Using sprintf
58+
sprintf(result, "%s %s %s", str1, str2, ucstr);
59+
60+
// Using strcat
61+
strcat(str1, ucstr);
62+
}

0 commit comments

Comments
 (0)