Skip to content

Commit e8eedb7

Browse files
committed
C#: Extractor support for UTF-8 encoded strings.
1 parent 72fa3bd commit e8eedb7

File tree

6 files changed

+26
-5
lines changed

6 files changed

+26
-5
lines changed

csharp/extractor/Semmle.Extraction.CSharp/Entities/ExpressionNodeInfo.cs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
using Microsoft.CodeAnalysis.CSharp;
33
using Microsoft.CodeAnalysis.CSharp.Syntax;
44
using Semmle.Extraction.CSharp.Populators;
5-
using Semmle.Extraction.Entities;
65
using Semmle.Extraction.Kinds;
76

87
namespace Semmle.Extraction.CSharp.Entities
@@ -107,6 +106,11 @@ public string? ExprValue
107106
return Expression.ValueAsString(val);
108107
}
109108

109+
if (TryGetStringValueFromUtf8Literal(out var s))
110+
{
111+
return s;
112+
}
113+
110114
return null;
111115
}
112116
}
@@ -181,6 +185,17 @@ private bool TryGetBoolValueFromLiteral(out bool val)
181185
return isTrue || isFalse;
182186
}
183187

188+
private bool TryGetStringValueFromUtf8Literal(out string? value)
189+
{
190+
value = null;
191+
if (Node.IsKind(SyntaxKind.Utf8StringLiteralExpression) && Node is LiteralExpressionSyntax literal)
192+
{
193+
value = literal.Token.ValueText;
194+
return true;
195+
}
196+
return false;
197+
}
198+
184199
public bool IsBoolLiteral()
185200
{
186201
return TryGetBoolValueFromLiteral(out var _);

csharp/extractor/Semmle.Extraction.CSharp/Entities/Expressions/Factory.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ internal static Expression Create(ExpressionNodeInfo info)
4747
case SyntaxKind.FalseLiteralExpression:
4848
case SyntaxKind.TrueLiteralExpression:
4949
case SyntaxKind.StringLiteralExpression:
50+
case SyntaxKind.Utf8StringLiteralExpression:
5051
case SyntaxKind.NullLiteralExpression:
5152
case SyntaxKind.NumericLiteralExpression:
5253
case SyntaxKind.CharacterLiteralExpression:

csharp/extractor/Semmle.Extraction.CSharp/Entities/Expressions/InterpolatedString.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ protected override void PopulateExpression(TextWriter trapFile)
2727
case SyntaxKind.InterpolatedStringText:
2828
// Create a string literal
2929
var interpolatedText = (InterpolatedStringTextSyntax)c;
30-
new Expression(new ExpressionInfo(Context, Type, Context.CreateLocation(c.GetLocation()), ExprKind.STRING_LITERAL, this, child++, false, interpolatedText.TextToken.ValueText));
30+
new Expression(new ExpressionInfo(Context, Type, Context.CreateLocation(c.GetLocation()), ExprKind.UTF16_STRING_LITERAL, this, child++, false, interpolatedText.TextToken.ValueText));
3131
break;
3232
default:
3333
throw new InternalError(c, $"Unhandled interpolation kind {c.Kind()}");

csharp/extractor/Semmle.Extraction.CSharp/Entities/Expressions/Literal.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ private static ExprKind GetKind(ExpressionNodeInfo info)
2020
{
2121
case SyntaxKind.DefaultLiteralExpression:
2222
return ExprKind.DEFAULT;
23+
case SyntaxKind.Utf8StringLiteralExpression:
24+
return ExprKind.UTF8_STRING_LITERAL;
2325
case SyntaxKind.NullLiteralExpression:
2426
info.SetType(null); // Don't use converted type.
2527
return ExprKind.NULL_LITERAL;
@@ -63,7 +65,7 @@ private static ExprKind GetExprKind(ITypeSymbol? type, ExpressionSyntax? expr, E
6365
return ExprKind.FLOAT_LITERAL;
6466

6567
case SpecialType.System_String:
66-
return ExprKind.STRING_LITERAL;
68+
return ExprKind.UTF16_STRING_LITERAL;
6769

6870
case SpecialType.System_UInt16:
6971
case SpecialType.System_UInt32:

csharp/extractor/Semmle.Extraction.CSharp/Kinds/ExprKind.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public enum ExprKind
1515
ULONG_LITERAL = 7,
1616
FLOAT_LITERAL = 8,
1717
DOUBLE_LITERAL = 9,
18-
STRING_LITERAL = 10,
18+
UTF16_STRING_LITERAL = 10,
1919
NULL_LITERAL = 11,
2020
THIS_ACCESS = 12,
2121
BASE_ACCESS = 13,
@@ -129,6 +129,7 @@ public enum ExprKind
129129
SLICE_PATTERN = 132,
130130
URSHIFT = 133,
131131
ASSIGN_URSHIFT = 134,
132+
UTF8_STRING_LITERAL = 135,
132133
DEFINE_SYMBOL = 999,
133134
}
134135
}

csharp/ql/lib/semmlecode.csharp.dbscheme

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,7 @@ case @expr.kind of
10051005
| 7 = @ulong_literal_expr
10061006
| 8 = @float_literal_expr
10071007
| 9 = @double_literal_expr
1008-
| 10 = @string_literal_expr
1008+
| 10 = @utf16_string_literal_expr
10091009
| 11 = @null_literal_expr
10101010
/* primary & unary */
10111011
| 12 = @this_access_expr
@@ -1139,6 +1139,7 @@ case @expr.kind of
11391139
| 132 = @slice_pattern_expr
11401140
| 133 = @urshift_expr
11411141
| 134 = @assign_urshift_expr
1142+
| 135 = @utf8_string_literal_expr
11421143
/* Preprocessor */
11431144
| 999 = @define_symbol_expr
11441145
;
@@ -1152,6 +1153,7 @@ case @expr.kind of
11521153

11531154
@integer_literal_expr = @int_literal_expr | @long_literal_expr | @uint_literal_expr | @ulong_literal_expr;
11541155
@real_literal_expr = @float_literal_expr | @double_literal_expr | @decimal_literal_expr;
1156+
@string_literal_expr = @utf16_string_literal_expr | @utf8_string_literal_expr;
11551157
@literal_expr = @bool_literal_expr | @char_literal_expr | @integer_literal_expr | @real_literal_expr
11561158
| @string_literal_expr | @null_literal_expr;
11571159

0 commit comments

Comments
 (0)