Skip to content

Commit 2375548

Browse files
authored
Merge pull request #255 from mhutch/csharp-type-validation
Fix validation of Using items with generics and escaping
2 parents 4bd136a + 9720806 commit 2375548

File tree

12 files changed

+332
-86
lines changed

12 files changed

+332
-86
lines changed

MonoDevelop.MSBuild.Tests/Analyzers/CoreDiagnosticTests.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,5 +656,33 @@ public void SdkNameExpressionWithProperty ()
656656
ignoreDiagnostics: [CoreDiagnostics.UnwrittenProperty]
657657
);
658658
}
659+
660+
[Test]
661+
public void InvalidCSharpType ()
662+
{
663+
var source = TextWithMarkers.Parse (@"<Project>
664+
<PropertyGroup>
665+
<SomeType>|Foo-Bar|</SomeType>
666+
<SomeType>System.Collections.Generic.Dictionary%3CMicrosoft.NET.Sdk.WorkloadManifestReader.WorkloadId, Microsoft.NET.Sdk.WorkloadManifestReader.WorkloadDefinition%3E</SomeType>
667+
</PropertyGroup>
668+
</Project>", '|');
669+
670+
var schema = new MSBuildSchema {
671+
new PropertyInfo ("SomeType", "", MSBuildValueKind.CSharpType)
672+
};
673+
674+
var spans = source.GetMarkedSpans ('|');
675+
676+
var expected = new MSBuildDiagnostic (
677+
CoreDiagnostics.InvalidCSharpType,
678+
spans[0],
679+
messageArgs: [ "Foo-Bar" ]
680+
);
681+
682+
VerifyDiagnostics (source.Text, out _,
683+
includeCoreDiagnostics: true,
684+
expectedDiagnostics: [ expected ],
685+
schema: schema);
686+
}
659687
}
660688
}

MonoDevelop.MSBuild/Language/CoreDiagnostics.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,10 +511,17 @@ class CoreDiagnostics
511511
public const string InvalidClrType_Id = nameof (InvalidClrType);
512512
public static readonly MSBuildDiagnosticDescriptor InvalidClrType = new (
513513
InvalidClrType_Id,
514-
"Invalid qualified .NET type name",
514+
"Invalid .NET type name",
515515
"The value `{0}` is not a valid qualified .NET type name string",
516516
MSBuildDiagnosticSeverity.Error);
517517

518+
public const string InvalidCSharpType_Id = nameof (InvalidCSharpType);
519+
public static readonly MSBuildDiagnosticDescriptor InvalidCSharpType = new (
520+
InvalidCSharpType_Id,
521+
"Invalid C# type name",
522+
"The value `{0}` is not a valid qualified C# type name string",
523+
MSBuildDiagnosticSeverity.Error);
524+
518525
public const string InvalidClrTypeName_Id = nameof (InvalidClrTypeName);
519526
public static readonly MSBuildDiagnosticDescriptor InvalidClrTypeName = new (
520527
InvalidClrTypeName_Id,

MonoDevelop.MSBuild/Language/Expressions/ExpressionText.cs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,15 @@ namespace MonoDevelop.MSBuild.Language.Expressions
1010
[DebuggerDisplay ("{Value} (IsPure: {IsPure})")]
1111
public class ExpressionText : ExpressionNode
1212
{
13+
/// <summary>
14+
/// The raw value of this text, including any whitespace and XML entities and MSBuild escape sequences.
15+
/// </summary>
16+
// TODO: audit access to this and make sure they should not be calling GetUnescapedValue instead
1317
public string Value { get; }
1418

1519
/// <summary>
16-
/// Gets the unescaped value of this text, optionally trimming whitespace.
20+
/// Gets the unescaped value of this text, optionally trimming whitespace. This unescapes
21+
/// both XML entities and MSBuild %XX escape sequences.
1722
/// </summary>
1823
/// <param name="trim">Whether to trim leading and trailing whitespace.</param>
1924
/// <param name="trimmedOffset">The offset of the text, taking optional trimming into account.</param>
@@ -41,17 +46,22 @@ public string GetUnescapedValue (bool trim, out int trimmedOffset, out int escap
4146
trimmedOffset = start + Offset;
4247
}
4348

44-
return XmlEscaping.UnescapeEntities (value);
49+
// TODO: technically there could be escaped whitespace, should we trim it too?
50+
var xmlUnescaped = XmlEscaping.UnescapeEntities (value);
51+
var msbuildUnescaped = Util.MSBuildEscaping.UnescapeString (xmlUnescaped);
52+
53+
return msbuildUnescaped;
4554
}
4655

4756
/// <summary>
4857
/// Indicates whether this exists by itself, as opposed to being concatenated with other values.
4958
/// </summary>
5059
public bool IsPure { get; }
5160

52-
public ExpressionText (int offset, string value, bool isPure) : base (offset, value.Length)
61+
// TODO: add ctor that takes unescaped value and audit callers
62+
public ExpressionText (int offset, string rawValue, bool isPure) : base (offset, rawValue.Length)
5363
{
54-
Value = value;
64+
Value = rawValue;
5565
IsPure = isPure;
5666
}
5767

MonoDevelop.MSBuild/Language/MSBuildDocumentValidator.cs

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -857,22 +857,28 @@ void VisitPureLiteral (MSBuildElementSyntax elementSyntax, MSBuildAttributeSynta
857857
break;
858858
}
859859
case MSBuildValueKind.ClrNamespace:
860-
if (!IsValidTypeOrNamespace (value, out _)) {
860+
if (!TypeNameValidation.IsValidClrNamespace (value)) {
861861
AddErrorWithArgs (CoreDiagnostics.InvalidClrNamespace, value);
862862
}
863863
break;
864864

865865
case MSBuildValueKind.ClrType:
866-
if (!IsValidTypeOrNamespace (value, out _)) {
866+
if (!TypeNameValidation.IsValidClrTypeOrNamespace (value, out _, out _)) {
867867
AddErrorWithArgs (CoreDiagnostics.InvalidClrType, value);
868868
}
869869
break;
870870

871871
case MSBuildValueKind.ClrTypeName:
872-
if (!(IsValidTypeOrNamespace (value, out int componentCount) && componentCount == 1)) {
872+
if (!TypeNameValidation.IsValidClrTypeName (value)) {
873873
AddErrorWithArgs (CoreDiagnostics.InvalidClrTypeName, value);
874874
}
875875
break;
876+
877+
case MSBuildValueKind.CSharpType:
878+
if (!TypeNameValidation.IsValidCSharpTypeOrNamespace (value)) {
879+
AddErrorWithArgs (CoreDiagnostics.InvalidCSharpType, value);
880+
}
881+
break;
876882
}
877883

878884
void AddErrorWithArgs (MSBuildDiagnosticDescriptor d, params object[] args) => Diagnostics.Add (d, new TextSpan (trimmedOffset, escapedLength), args);
@@ -889,18 +895,6 @@ void AddMisspelledValueError (MSBuildDiagnosticDescriptor d, params object[] arg
889895
args
890896
);
891897
}
892-
893-
static bool IsValidTypeOrNamespace (string value, out int componentCount)
894-
{
895-
string[] components = value.Split ('.');
896-
componentCount = components.Length;
897-
foreach (var component in components) {
898-
if (!System.CodeDom.Compiler.CodeGenerator.IsValidLanguageIndependentIdentifier (component)) {
899-
return false;
900-
}
901-
}
902-
return true;
903-
}
904898
}
905899

906900
void CheckPropertyWrite (PropertyInfo resolvedProperty, TextSpan span)
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#if NETCOREAPP
5+
#nullable enable
6+
#endif
7+
8+
using System.Globalization;
9+
10+
namespace MonoDevelop.MSBuild.Language;
11+
12+
static class TypeNameValidation
13+
{
14+
public static bool IsValidCSharpTypeOrNamespace (string value) => IsValidCSharpTypeOrNamespace(value, out _, out _);
15+
16+
public static bool IsValidCSharpTypeOrNamespace (string value, out int componentCount, out bool isGenericType)
17+
{
18+
int offset = 0;
19+
if (!ConsumeCSharpGenericTypeOrNamespace(value, ref offset, out componentCount, out isGenericType)) {
20+
return false;
21+
}
22+
return offset == value.Length;
23+
}
24+
25+
26+
static bool ConsumeCSharpGenericTypeOrNamespace (string value, ref int offset, out int componentCount, out bool isGenericType)
27+
{
28+
isGenericType = false;
29+
30+
componentCount = ConsumeIdentifiers(value, ref offset);
31+
32+
if (componentCount == 0) {
33+
return false;
34+
}
35+
36+
if (offset == value.Length) {
37+
return true;
38+
}
39+
40+
// try to consume generics in the form SomeType<SomeOtherType,AnotherType>
41+
if (value[offset] != '<') {
42+
// complete but not a generic type, we are done consuming, caller decides what to do with next char
43+
return true;
44+
}
45+
offset++;
46+
47+
isGenericType = true;
48+
do {
49+
// Ignore whitespace after a comma as that's pretty common e.g. SomeType<SomeOtherType, AnotherType>
50+
// and is valid if this is going to be injected verbatim into a C# file.
51+
// We could be more strict and disallow whitespace, but that will likely trip people up.
52+
// We could also be more liberal and allow general whitespace around the angle brackets and commas,
53+
// but that's a bit of a rabbit hole and not likely to be very useful.
54+
if (value[offset] == ',') {
55+
offset++;
56+
ConsumeSpaces(value, ref offset);
57+
}
58+
if (!ConsumeCSharpGenericTypeOrNamespace(value, ref offset, out int consumedComponents, out _)) {
59+
return false;
60+
}
61+
} while (value[offset] == ',');
62+
63+
if (offset < value.Length && value[offset] == '>') {
64+
offset++;
65+
return true;
66+
}
67+
68+
return false;
69+
}
70+
71+
static void ConsumeSpaces(string value, ref int offset)
72+
{
73+
while (value[offset] == ' ') {
74+
offset++;
75+
}
76+
}
77+
78+
public static bool IsValidClrNamespace (string value) => IsValidClrTypeOrNamespace(value, out _, out var hasGenerics) && !hasGenerics;
79+
80+
public static bool IsValidClrTypeName (string value) => IsValidClrTypeOrNamespace(value, out var componentCount, out var hasGenerics) && componentCount == 1 && !hasGenerics;
81+
82+
public static bool IsValidClrTypeOrNamespace (string value) => IsValidClrTypeOrNamespace(value, out _, out _);
83+
84+
public static bool IsValidClrTypeOrNamespace (string value, out int componentCount, out bool isGenericType)
85+
{
86+
int offset = 0;
87+
if (!ConsumeClrGenericTypeOrNamespace(value, ref offset, out componentCount, out isGenericType)) {
88+
return false;
89+
}
90+
return offset == value.Length;
91+
}
92+
93+
static bool ConsumeClrGenericTypeOrNamespace (string value, ref int offset, out int componentCount, out bool isGenericType)
94+
{
95+
isGenericType = false;
96+
97+
componentCount = ConsumeIdentifiers(value, ref offset);
98+
99+
if (componentCount == 0) {
100+
return false;
101+
}
102+
103+
if (offset == value.Length) {
104+
return true;
105+
}
106+
107+
// try to consume generics in the form SomeType`2[SomeOtherType,AnotherType]
108+
if (value[offset] != '`') {
109+
// complete but not a generic type, we are done consuming, caller decides what to do with next char
110+
return true;
111+
}
112+
offset++;
113+
114+
int argCountOffset = offset;
115+
if(!ConsumeNumbers(value, ref offset) || !int.TryParse(value.Substring(argCountOffset, offset - argCountOffset), out int genericArgCount)) {
116+
return false;
117+
}
118+
119+
isGenericType = true;
120+
if (value[offset++] != '[') {
121+
return false;
122+
}
123+
124+
for (int i = 0; i < genericArgCount; i++) {
125+
// recursively consume the generic type arguments
126+
if (!ConsumeClrGenericTypeOrNamespace(value, ref offset, out int consumedComponents, out _)) {
127+
return false;
128+
}
129+
if (i < genericArgCount - 1) {
130+
if (offset < value.Length && value[offset] == ',') {
131+
offset++;
132+
} else {
133+
return false;
134+
}
135+
}
136+
}
137+
138+
if (offset < value.Length && value[offset] == ']') {
139+
offset++;
140+
return true;
141+
}
142+
143+
return false;
144+
}
145+
146+
static bool ConsumeNumbers (string value, ref int offset)
147+
{
148+
int start = offset;
149+
while(char.IsDigit(value[offset])) {
150+
offset++;
151+
}
152+
return offset > start;
153+
}
154+
155+
static int ConsumeIdentifiers (string value, ref int offset)
156+
{
157+
int componentCount = 0;
158+
159+
while(ConsumeIdentifier(value, ref offset)) {
160+
componentCount++;
161+
if (offset >= value.Length || value[offset] != '.') {
162+
return componentCount;
163+
}
164+
offset++;
165+
}
166+
167+
if (componentCount > 0 && value[offset - 1] == '.') {
168+
// we consumed a dot but no identifier followed it, so walk it back
169+
offset--;
170+
}
171+
172+
return componentCount;
173+
}
174+
175+
static bool ConsumeIdentifier (string value, ref int offset)
176+
{
177+
bool nextMustBeStartChar = true;
178+
179+
while (offset < value.Length) {
180+
char ch = value[offset];
181+
// based on https://github.com/dotnet/runtime/blob/96be510135829ff199c6c341ad461c36bab4809e/src/libraries/Common/src/System/CSharpHelpers.cs#L141
182+
UnicodeCategory uc = CharUnicodeInfo.GetUnicodeCategory(ch);
183+
switch (uc) {
184+
case UnicodeCategory.UppercaseLetter: // Lu
185+
case UnicodeCategory.LowercaseLetter: // Ll
186+
case UnicodeCategory.TitlecaseLetter: // Lt
187+
case UnicodeCategory.ModifierLetter: // Lm
188+
case UnicodeCategory.LetterNumber: // Lm
189+
case UnicodeCategory.OtherLetter: // Lo
190+
nextMustBeStartChar = false;
191+
offset++;
192+
continue;
193+
194+
case UnicodeCategory.NonSpacingMark: // Mn
195+
case UnicodeCategory.SpacingCombiningMark: // Mc
196+
case UnicodeCategory.ConnectorPunctuation: // Pc
197+
case UnicodeCategory.DecimalDigitNumber: // Nd
198+
// Underscore is a valid starting character, even though it is a ConnectorPunctuation.
199+
if (nextMustBeStartChar && ch != '_')
200+
return false;
201+
offset++;
202+
continue;
203+
204+
default:
205+
// unlike CSharpHelpers.cs, don't check IsSpecialTypeChar here because we're only looking
206+
// for identifier components that are valid for all languages, and generic syntax is
207+
// handled separately.
208+
//
209+
break;
210+
}
211+
break;
212+
}
213+
214+
// return true if we have been able to consume a valid identifier. it need
215+
// not be the entire string.
216+
return !nextMustBeStartChar;
217+
}
218+
219+
public static bool IsValidCSharpType (string value, out int componentCount)
220+
{
221+
string[] components = value.Split ('.');
222+
componentCount = components.Length;
223+
foreach (var component in components) {
224+
if (!System.CodeDom.Compiler.CodeGenerator.IsValidLanguageIndependentIdentifier (component)) {
225+
return false;
226+
}
227+
}
228+
return true;
229+
}
230+
}

0 commit comments

Comments
 (0)