Skip to content

Commit f5cb4fc

Browse files
committed
Merge pull request #309 from PowerShell/RuleForUnEncodedScriptBranch
Rule to validate the encoding scheme for missing BOM in a file
2 parents f9b0911 + 6ba70f7 commit f5cb4fc

10 files changed

+248
-4
lines changed

Engine/ScriptAnalyzer.cs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,7 @@ private IEnumerable<DiagnosticRecord> AnalyzeFile(string filePath)
962962
{
963963
ScriptBlockAst scriptAst = null;
964964
Token[] scriptTokens = null;
965-
ParseError[] errors;
965+
ParseError[] errors = null;
966966

967967
this.outputWriter.WriteVerbose(string.Format(CultureInfo.CurrentCulture, Strings.VerboseFileMessage, filePath));
968968

@@ -972,7 +972,15 @@ private IEnumerable<DiagnosticRecord> AnalyzeFile(string filePath)
972972
// processing for non help script
973973
if (!(Path.GetFileName(filePath).StartsWith("about_") && Path.GetFileName(filePath).EndsWith(".help.txt")))
974974
{
975-
scriptAst = Parser.ParseFile(filePath, out scriptTokens, out errors);
975+
try
976+
{
977+
scriptAst = Parser.ParseFile(filePath, out scriptTokens, out errors);
978+
}
979+
catch (Exception e)
980+
{
981+
this.outputWriter.WriteWarning(e.ToString());
982+
return null;
983+
}
976984

977985
if (errors != null && errors.Length > 0)
978986
{
@@ -983,7 +991,7 @@ private IEnumerable<DiagnosticRecord> AnalyzeFile(string filePath)
983991
}
984992
}
985993

986-
if (errors.Length > 10)
994+
if (errors != null && errors.Length > 10)
987995
{
988996
string manyParseErrorMessage = String.Format(CultureInfo.CurrentCulture, Strings.ParserErrorMessage, System.IO.Path.GetFileName(filePath));
989997
this.outputWriter.WriteError(new ErrorRecord(new ParseException(manyParseErrorMessage), manyParseErrorMessage, ErrorCategory.ParserError, filePath));

Rules/ScriptAnalyzerBuiltinRules.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
<Compile Include="AvoidUsingWriteHost.cs" />
7171
<Compile Include="DscTestsPresent.cs" />
7272
<Compile Include="DscExamplesPresent.cs" />
73+
<Compile Include="UseBOMForUnicodeEncodedFile.cs" />
7374
<Compile Include="UseOutputTypeCorrectly.cs" />
7475
<Compile Include="MissingModuleManifestField.cs" />
7576
<Compile Include="PossibleIncorrectComparisonWithNull.cs" />

Rules/Strings.Designer.cs

Lines changed: 37 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Rules/Strings.resx

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,4 +744,16 @@
744744
<data name="UseUTF8EncodingForHelpFileName" xml:space="preserve">
745745
<value>UseUTF8EncodingForHelpFile</value>
746746
</data>
747+
<data name="UseBOMForUnicodeEncodedFileCommonName" xml:space="preserve">
748+
<value>Use BOM encoding for non-ASCII files</value>
749+
</data>
750+
<data name="UseBOMForUnicodeEncodedFileDescription" xml:space="preserve">
751+
<value>For a file encoded with a format other than ASCII, ensure BOM is present to ensure that any application consuming this file can interpret it correctly.</value>
752+
</data>
753+
<data name="UseBOMForUnicodeEncodedFileError" xml:space="preserve">
754+
<value>Missing BOM encoding for non-ASCII encoded file '{0}'</value>
755+
</data>
756+
<data name="UseBOMForUnicodeEncodedFileName" xml:space="preserve">
757+
<value>UseBOMForUnicodeEncodedFile</value>
758+
</data>
747759
</root>

Rules/UseBOMForUnicodeEncodedFile.cs

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
//
2+
// Copyright (c) Microsoft Corporation.
3+
//
4+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
8+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
9+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
10+
// THE SOFTWARE.
11+
//
12+
13+
using System;
14+
using System.Collections.Generic;
15+
using System.Management.Automation.Language;
16+
using Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic;
17+
using System.ComponentModel.Composition;
18+
using System.Globalization;
19+
using System.IO;
20+
using System.Text;
21+
using System.Linq;
22+
23+
namespace Microsoft.Windows.PowerShell.ScriptAnalyzer.BuiltinRules
24+
{
25+
/// <summary>
26+
/// AvoidAlias: Check if help file uses utf8 encoding
27+
/// </summary>
28+
[Export(typeof(IScriptRule))]
29+
public class UseBOMForUnicodeEncodedFile : IScriptRule
30+
{
31+
/// <summary>
32+
/// AnalyzeScript: For a file that has BOM missing, check if content is encoded in ASCII
33+
/// </summary>
34+
public IEnumerable<DiagnosticRecord> AnalyzeScript(Ast ast, string fileName)
35+
{
36+
byte[] byteStream = File.ReadAllBytes(fileName);
37+
38+
if (null == GetByteStreamEncoding(byteStream))
39+
{
40+
// Did not detect the presence of BOM
41+
// Make sure there is no byte > 127 (0x7F) to ensure file is ASCII encoded
42+
// Else emit rule violation
43+
44+
if (0 != byteStream.Count(o => o > 0x7F))
45+
{
46+
yield return new DiagnosticRecord(string.Format(CultureInfo.CurrentCulture, Strings.UseBOMForUnicodeEncodedFileError, System.IO.Path.GetFileName(fileName), null),
47+
null, GetName(), DiagnosticSeverity.Warning, fileName);
48+
}
49+
}
50+
}
51+
52+
/// <summary>
53+
/// GetByteStreamEncoding: Detect the file encoding using the file's byte stream
54+
/// </summary>
55+
private Encoding GetByteStreamEncoding(byte[] byteStream)
56+
{
57+
// Analyze BOM
58+
if (byteStream.Length >= 4 && byteStream[0] == 0x00 && byteStream[1] == 0x00 && byteStream[2] == 0xFE && byteStream[3] == 0xFF)
59+
{
60+
// UTF-32, big-endian
61+
return Encoding.GetEncoding("utf-32BE");
62+
}
63+
else if (byteStream.Length >= 4 && byteStream[0] == 0xFF && byteStream[1] == 0xFE && byteStream[2] == 0x00 && byteStream[3] == 0x00)
64+
{
65+
// UTF-32, little-endian
66+
return Encoding.UTF32;
67+
}
68+
else if (byteStream.Length >= 2 && byteStream[0] == 0xFE && byteStream[1] == 0xFF)
69+
{
70+
// UTF-16, big-endian
71+
return Encoding.BigEndianUnicode;
72+
}
73+
else if (byteStream.Length >= 2 && byteStream[0] == 0xFF && byteStream[1] == 0xFE)
74+
{
75+
// UTF-16, little-endian
76+
return Encoding.Unicode;
77+
}
78+
else if (byteStream.Length >= 3 && byteStream[0] == 0xEF && byteStream[1] == 0xBB && byteStream[2] == 0xBF)
79+
{
80+
// UTF-8
81+
return Encoding.UTF8;
82+
}
83+
else if (byteStream.Length >= 3 && byteStream[0] == 0x2b && byteStream[1] == 0x2f && byteStream[2] == 0x76)
84+
{
85+
// UTF7
86+
return Encoding.UTF7;
87+
}
88+
89+
// Did not detect BOM OR Unknown File encoding
90+
return null;
91+
92+
}
93+
94+
/// <summary>
95+
/// GetName: Retrieves the name of this rule.
96+
/// </summary>
97+
/// <returns>The name of this rule</returns>
98+
public string GetName()
99+
{
100+
return string.Format(CultureInfo.CurrentCulture, Strings.NameSpaceFormat, GetSourceName(), Strings.UseBOMForUnicodeEncodedFileName);
101+
}
102+
103+
/// <summary>
104+
/// GetCommonName: Retrieves the common name of this rule.
105+
/// </summary>
106+
/// <returns>The common name of this rule</returns>
107+
public string GetCommonName()
108+
{
109+
return string.Format(CultureInfo.CurrentCulture, Strings.UseBOMForUnicodeEncodedFileCommonName);
110+
}
111+
112+
/// <summary>
113+
/// GetDescription: Retrieves the description of this rule.
114+
/// </summary>
115+
/// <returns>The description of this rule</returns>
116+
public string GetDescription()
117+
{
118+
return string.Format(CultureInfo.CurrentCulture, Strings.UseBOMForUnicodeEncodedFileDescription);
119+
}
120+
121+
/// <summary>
122+
/// GetSourceType: Retrieves the type of the rule, Builtin, Managed or Module.
123+
/// </summary>
124+
public SourceType GetSourceType()
125+
{
126+
return SourceType.Builtin;
127+
}
128+
129+
/// <summary>
130+
/// GetSeverity: Retrieves the severity of the rule: error, warning of information.
131+
/// </summary>
132+
/// <returns></returns>
133+
public RuleSeverity GetSeverity()
134+
{
135+
return RuleSeverity.Warning;
136+
}
137+
138+
/// <summary>
139+
/// GetSourceName: Retrieves the name of the module/assembly the rule is from.
140+
/// </summary>
141+
public string GetSourceName()
142+
{
143+
return string.Format(CultureInfo.CurrentCulture, Strings.SourceName);
144+
}
145+
}
146+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Test Script
Binary file not shown.
Binary file not shown.
Binary file not shown.
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
Import-Module PSScriptAnalyzer
2+
$violationMessageOne = "Missing BOM encoding for non-ASCII encoded file 'BOMAbsent_UTF16EncodedScript.ps1'"
3+
$violationMessageTwo = "Missing BOM encoding for non-ASCII encoded file 'BOMAbsent_UnknownEncodedScript.ps1'"
4+
$violationName = "PSUseBOMForUnicodeEncodedFile"
5+
$directory = Split-Path -Parent $MyInvocation.MyCommand.Path
6+
$violationsOne = Invoke-ScriptAnalyzer "$directory\TestFiles\BOMAbsent_UTF16EncodedScript.ps1" | Where-Object {$_.RuleName -eq $violationName}
7+
$violationsTwo = Invoke-ScriptAnalyzer "$directory\TestFiles\BOMAbsent_UnknownEncodedScript.ps1" | Where-Object {$_.RuleName -eq $violationName}
8+
$noViolationsOne = Invoke-ScriptAnalyzer "$directory\TestFiles\BOMPresent_UTF16EncodedScript.ps1" | Where-Object {$_.RuleName -eq $violationName}
9+
$noViolationsTwo = Invoke-ScriptAnalyzer "$directory\TestFiles\BOMAbsent_ASCIIEncodedScript.ps1" | Where-Object {$_.RuleName -eq $violationName}
10+
11+
Describe "UseBOMForUnicodeEncodedFile" {
12+
Context "When there are violations" {
13+
It "has 1 rule violation for BOM Absent - UTF16 Encoded file" {
14+
$violationsOne.Count | Should Be 1
15+
}
16+
17+
It "has the correct description message for BOM Absent - UTF16 Encoded file" {
18+
$violationsOne[0].Message | Should Match $violationMessageOne
19+
}
20+
21+
It "has 1 rule violation for BOM Absent - Unknown Encoded file" {
22+
$violationsTwo.Count | Should Be 1
23+
}
24+
25+
It "has the correct description message for BOM Absent - Unknown Encoded file" {
26+
$violationsTwo[0].Message | Should Match $violationMessageTwo
27+
}
28+
29+
}
30+
31+
Context "When there are no violations" {
32+
It "returns no violations for BOM Present - UTF16 Encoded File" {
33+
$noViolationsOne.Count | Should Be 0
34+
}
35+
36+
It "returns no violations for BOM Absent - ASCII Encoded File" {
37+
$noViolationsTwo.Count | Should Be 0
38+
}
39+
}
40+
}

0 commit comments

Comments
 (0)