13
13
*/
14
14
15
15
import cpp
16
+ import semmle.code.cpp.controlflow.Guards
16
17
17
18
class WideCharPointerType extends PointerType {
18
19
WideCharPointerType ( ) { this .getBaseType ( ) instanceof WideCharType }
19
20
}
20
21
22
+ /**
23
+ * Given type `t`, recurses through and returns all
24
+ * intermediate base types, including `t`.
25
+ */
26
+ Type getABaseType ( Type t ) {
27
+ result = t
28
+ or
29
+ result = getABaseType ( t .( DerivedType ) .getBaseType ( ) )
30
+ or
31
+ result = getABaseType ( t .( TypedefType ) .getBaseType ( ) )
32
+ }
33
+
21
34
/**
22
35
* A type that may also be `CharPointerType`, but that are likely used as arbitrary buffers.
23
36
*/
24
37
class UnlikelyToBeAStringType extends Type {
25
38
UnlikelyToBeAStringType ( ) {
26
- this .( PointerType ) .getBaseType ( ) .( CharType ) .isUnsigned ( ) or
27
- this .( PointerType ) .getBaseType ( ) .getName ( ) .toLowerCase ( ) .matches ( "%byte" ) or
28
- this .getName ( ) .toLowerCase ( ) .matches ( "%byte" ) or
29
- this .( PointerType ) .getBaseType ( ) .hasName ( "uint8_t" )
39
+ exists ( Type targ | getABaseType ( this ) = targ |
40
+ // NOTE: not using CharType isUnsigned, but rather look for any explicitly declared unsigned
41
+ // char types. Assuming these are used for buffers, not strings.
42
+ targ .( CharType ) .getName ( ) .toLowerCase ( ) .matches ( "unsigned%" ) or
43
+ targ .getName ( ) .toLowerCase ( ) .matches ( [ "uint8_t" , "%byte%" ] )
44
+ )
30
45
}
31
46
}
32
47
48
+ // Types that can be wide depending on the UNICODE macro
49
+ // see https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types
50
+ class UnicodeMacroDependentWidthType extends Type {
51
+ UnicodeMacroDependentWidthType ( ) {
52
+ exists ( Type targ | getABaseType ( this ) = targ |
53
+ targ .getName ( ) in [
54
+ "LPCTSTR" ,
55
+ "LPTSTR" ,
56
+ "PCTSTR" ,
57
+ "PTSTR" ,
58
+ "TBYTE" ,
59
+ "TCHAR"
60
+ ]
61
+ )
62
+ }
63
+ }
64
+
65
+ class UnicodeMacro extends Macro {
66
+ UnicodeMacro ( ) { this .getName ( ) .toLowerCase ( ) .matches ( "%unicode%" ) }
67
+ }
68
+
69
+ class UnicodeMacroInvocation extends MacroInvocation {
70
+ UnicodeMacroInvocation ( ) { this .getMacro ( ) instanceof UnicodeMacro }
71
+ }
72
+
73
+ /**
74
+ * Holds when a expression whose type is UnicodeMacroDependentWidthType and
75
+ * is observed to be guarded by a check involving a bitwise-and operation
76
+ * with a UnicodeMacroInvocation.
77
+ * Such expressions are assumed to be checked dynamically, i.e.,
78
+ * the flag would indicate if UNICODE typing is set correctly to allow
79
+ * or disallow a widening cast.
80
+ */
81
+ predicate isLikelyDynamicallyChecked ( Expr e ) {
82
+ e .getType ( ) instanceof UnicodeMacroDependentWidthType and
83
+ exists ( GuardCondition gc , BitwiseAndExpr bai , UnicodeMacroInvocation umi |
84
+ bai .getAnOperand ( ) = umi .getExpr ( )
85
+ |
86
+ // bai == 0 is false when reaching `e.getBasicBlock()`.
87
+ // That is, bai != 0 when reaching `e.getBasicBlock()`.
88
+ gc .ensuresEq ( bai , 0 , e .getBasicBlock ( ) , false )
89
+ or
90
+ // bai == k and k != 0 is true when reaching `e.getBasicBlock()`.
91
+ gc .ensuresEq ( bai , any ( int k | k != 0 ) , e .getBasicBlock ( ) , true )
92
+ )
93
+ }
94
+
33
95
from Expr e1 , Cast e2
34
96
where
35
97
e2 = e1 .getConversion ( ) and
@@ -42,7 +104,11 @@ where
42
104
not e1 .getType ( ) instanceof UnlikelyToBeAStringType and
43
105
// Avoid castings from 'new' expressions as typically these will be safe
44
106
// Example: `__Type* ret = reinterpret_cast<__Type*>(New(m_pmo) char[num * sizeof(__Type)]);`
45
- not exists ( NewOrNewArrayExpr newExpr | newExpr .getAChild * ( ) = e1 )
107
+ not exists ( NewOrNewArrayExpr newExpr | newExpr .getAChild * ( ) = e1 ) and
108
+ // Avoid cases where the cast is guarded by a check to determine if
109
+ // unicode encoding is enabled in such a way to disallow the dangerous cast
110
+ // at runtime.
111
+ not isLikelyDynamicallyChecked ( e1 )
46
112
select e1 ,
47
113
"Conversion from " + e1 .getType ( ) .toString ( ) + " to " + e2 .getType ( ) .toString ( ) +
48
114
". Use of invalid string can lead to undefined behavior."
0 commit comments