Skip to content

Commit 9ba4ffd

Browse files
authored
Merge pull request #81 from microsoft/open-source-powershell-extractor
PS: Open source the powershell extractor
2 parents 0550ff1 + 95d02e6 commit 9ba4ffd

File tree

194 files changed

+15234
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

194 files changed

+15234
-0
lines changed

powershell/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Powershell Extractor
2+
3+
## Directories:
4+
- `extractor`
5+
- Powershell extractor source code
6+
- `ql`
7+
- QL libraries and queries for Powershell (to be written)
8+
- `tools`
9+
- Directory containing files that must be copied to powershell/tools in the directory containing the CodeQL CLI. This will be done automatically by `build.ps1` (see below).
10+
11+
## How to build the Powershell:
12+
- Run `build.ps1 path-to-codeql-cli-folder` where `path-to-codeql-cli-folder` is the path to the folder containing the CodeQL CLI (i.e., `codeql.exe`).

powershell/build.ps1

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
param (
2+
[Parameter(Mandatory=$true)][string]$cliFolder
3+
)
4+
5+
$toolsWin64Folder = Join-Path (Join-Path (Join-Path $cliFolder "powershell") "tools") "win64"
6+
dotnet publish (Join-Path "extractor" "powershell.sln") -o $toolsWin64Folder
7+
if ($LASTEXITCODE -ne 0) {
8+
Write-Host "Build failed"
9+
exit 1
10+
}
11+
12+
$powershellFolder = Join-Path -Path $cliFolder -ChildPath "powershell"
13+
Copy-Item -Path codeql-extractor.yml -Destination $powershellFolder -Force
14+
$qlLibFolder = Join-Path -Path "ql" -ChildPath "lib"
15+
Copy-Item -Path (Join-Path $qlLibFolder "semmlecode.powershell.dbscheme") -Destination $powershellFolder -Force
16+
Copy-Item -Path (Join-Path $qlLibFolder "semmlecode.powershell.dbscheme.stats") -Destination $powershellFolder -Force
17+
Copy-Item -Path "tools" -Destination $powershellFolder -Recurse -Force

powershell/codeql-extractor.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
name: "powershell"
2+
display_name: "powershell"
3+
version: 0.0.1
4+
column_kind: "utf16"
5+
legacy_qltest_extraction: true
6+
file_types:
7+
- name: powershell
8+
display_name: powershellscripts
9+
extensions:
10+
- .ps1
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net7.0</TargetFramework>
5+
<ImplicitUsings>enable</ImplicitUsings>
6+
<Nullable>enable</Nullable>
7+
8+
<IsPackable>false</IsPackable>
9+
<IsTestProject>true</IsTestProject>
10+
</PropertyGroup>
11+
12+
<ItemGroup>
13+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
14+
<PackageReference Include="xunit" Version="2.4.2" />
15+
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.5">
16+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
17+
<PrivateAssets>all</PrivateAssets>
18+
</PackageReference>
19+
<PackageReference Include="coverlet.collector" Version="3.1.2">
20+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
21+
<PrivateAssets>all</PrivateAssets>
22+
</PackageReference>
23+
</ItemGroup>
24+
25+
<ItemGroup>
26+
<ProjectReference Include="..\Semmle.Extraction.PowerShell.Standalone\Semmle.Extraction.PowerShell.Standalone.csproj" />
27+
</ItemGroup>
28+
29+
</Project>
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
using System.Text;
2+
using System.Text.RegularExpressions;
3+
4+
namespace Microsoft.Extraction.Tests;
5+
6+
/// <summary>
7+
/// This class provides a method for sanitizing a trap files in tests so they can be validated.
8+
/// The resulting trap file will not be valid for making a codeqldb due to missing file metadata,
9+
/// which is removed to ensure the test case can match the expected trap.
10+
/// </summary>
11+
internal class TrapSanitizer
12+
{
13+
// Regex to match the IDs in the file (# followed by digits)
14+
private static readonly Regex CaptureId = new Regex($"#([0-9]+)");
15+
16+
/// <summary>
17+
/// Sanitize a Trap file to check equality by removing run specific things like file names and squashing ids
18+
/// to a consistent range
19+
/// </summary>
20+
/// <param name="TrapContents">The lines of the trap file to sanitize</param>
21+
/// <returns>A string containing the sanitized contents</returns>
22+
public static string SanitizeTrap(string[] TrapContents)
23+
{
24+
StringBuilder sb = new();
25+
int largestId = 0;
26+
int startingLineActual = -1;
27+
for (int i = 0; i < TrapContents.Length; i++)
28+
{
29+
// The first line with actual extracted contents will be after the numlines line
30+
if (TrapContents[i].StartsWith("numlines"))
31+
{
32+
startingLineActual = i + 1;
33+
break;
34+
}
35+
// If a line before numlines has an ID it is a candidate for largest id found
36+
if (CaptureId.IsMatch(TrapContents[i]))
37+
{
38+
largestId = int.Max(largestId, int.Parse(CaptureId.Matches(TrapContents[i])[0].Groups[1].Captures[0].Value));
39+
}
40+
}
41+
42+
// Starting from the line after numlines declaration
43+
for (int i = startingLineActual; i < TrapContents.Length; i++)
44+
{
45+
// Replace IDs in each line based on the largest previous ID found
46+
// Reserve #1 for the File
47+
sb.Append(SanitizeLine(TrapContents[i], largestId - 1));
48+
sb.Append(Environment.NewLine);
49+
}
50+
51+
return sb.ToString();
52+
}
53+
54+
/// <summary>
55+
/// Sanitize a single line of trap content given the largest previously used id number to ignore,
56+
/// subtracting the offset from those IDs.
57+
/// </summary>
58+
/// <param name="trapContent">A single line of trap content</param>
59+
/// <param name="offset">The offset to apply</param>
60+
/// <returns>A sanitized line</returns>
61+
private static string SanitizeLine(string trapContent, int offset)
62+
{
63+
var matches = CaptureId.Matches(trapContent);
64+
if (!matches.Any())
65+
{
66+
return trapContent;
67+
}
68+
var sb = new StringBuilder();
69+
int lastIndex = 0;
70+
foreach (Match match in matches)
71+
{
72+
var capture = match.Groups[1].Captures[0];
73+
sb.Append(trapContent[lastIndex..capture.Index]);
74+
lastIndex = capture.Index + capture.Length;
75+
int newInt = int.Parse(capture.Value);
76+
if (newInt > 1)
77+
{
78+
sb.Append(newInt - offset);
79+
}
80+
else
81+
{
82+
sb.Append(newInt);
83+
}
84+
}
85+
sb.Append(trapContent[lastIndex..]);
86+
return sb.ToString();
87+
}
88+
}
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
using System.Reflection;
2+
using System.Text.RegularExpressions;
3+
using Microsoft.Extraction.Tests;
4+
using Semmle.Extraction;
5+
using Semmle.Extraction.PowerShell.Standalone;
6+
using Xunit.Abstractions;
7+
using Xunit.Sdk;
8+
using Semmle.Extraction.PowerShell;
9+
10+
namespace Microsoft.Extractor.Tests;
11+
12+
internal static class PathHolder
13+
{
14+
internal static string powershellSource = Path.Join("..", "..", "..", "..", "..", "samples", "code");
15+
internal static string expectedTraps = Path.Join("..", "..", "..", "..", "..", "samples", "traps");
16+
internal static string schemaPath = Path.Join("..", "..", "..", "..", "..", "config", "semmlecode.powershell.dbscheme");
17+
internal static string generatedTraps = Path.Join(".", Path.GetFullPath(powershellSource).Replace(":", "_"));
18+
}
19+
public class TrapTestFixture : IDisposable
20+
{
21+
public TrapTestFixture()
22+
{
23+
// Setup here
24+
}
25+
26+
public void Dispose()
27+
{
28+
// Delete the generated traps
29+
Directory.Delete(PathHolder.generatedTraps, true);
30+
}
31+
}
32+
33+
public class Traps : IClassFixture<TrapTestFixture>
34+
{
35+
private readonly ITestOutputHelper _output;
36+
public Traps(ITestOutputHelper output)
37+
{
38+
_output = output;
39+
}
40+
41+
private static Regex schemaDeclStart = new("([a-zA-Z_]+)\\(");
42+
private static Regex schemaEnd = new("^\\)");
43+
private static Regex commentEnd = new("\\*/");
44+
45+
/// <summary>
46+
/// Naiively parse the schema and try to determine how many parameters each table expects
47+
/// </summary>
48+
/// <param name="schemaContents"></param>
49+
/// <returns>Dictionary mapping table name to number of parameters</returns>
50+
private static Dictionary<string, int> ParseSchema(string[] schemaContents)
51+
{
52+
bool isParsingTable = false;
53+
int expectedNumEntries = 0;
54+
string targetName = string.Empty;
55+
Dictionary<string, int> output = new();
56+
for (int index = 0; index < schemaContents.Length; index++)
57+
{
58+
if (!isParsingTable)
59+
{
60+
if (schemaDeclStart.IsMatch(schemaContents[index]))
61+
{
62+
targetName = schemaDeclStart.Matches(schemaContents[index])[0].Groups[1].Captures[0].Value;
63+
isParsingTable = true;
64+
expectedNumEntries = 0;
65+
}
66+
}
67+
else
68+
{
69+
if (commentEnd.IsMatch(schemaContents[index]))
70+
{
71+
isParsingTable = false;
72+
expectedNumEntries = 0;
73+
}
74+
if (schemaEnd.IsMatch(schemaContents[index]))
75+
{
76+
output.Add(targetName, expectedNumEntries);
77+
isParsingTable = false;
78+
expectedNumEntries++;
79+
}
80+
else
81+
{
82+
expectedNumEntries++;
83+
}
84+
}
85+
}
86+
87+
return output;
88+
}
89+
90+
/// <summary>
91+
/// Check that the Schema entries match the implemented methods in Tuples.cs
92+
/// </summary>
93+
[Fact]
94+
public void Schema_Matches_Tuples()
95+
{
96+
string[] schemaContents = File.ReadLines(PathHolder.schemaPath).ToArray();
97+
Dictionary<string, int> expected = ParseSchema(schemaContents);
98+
// Get all the nonpublic static methods from the Tuples classes
99+
var methods = typeof(Semmle.Extraction.PowerShell.Tuples)
100+
.GetMethods(BindingFlags.Static | BindingFlags.NonPublic)
101+
.Union(typeof(Semmle.Extraction.Tuples).GetMethods(BindingFlags.Static | BindingFlags.NonPublic))
102+
// Select a tuple of the method, its parameters
103+
.Select(method => (method, method.GetParameters(),
104+
// the expected number of parameters - one fewer than actual if the first is a TextWriter, and the name of the method
105+
method.GetParameters()[0].ParameterType.Name.Equals("TextWriter") ? method.GetParameters().Length - 1 : method.GetParameters().Length , method.Name));
106+
List<string> errors = new();
107+
List<string> warnings = new();
108+
// If a tuple method exists and doesn't have a matching schema entry that is an error, as the produce traps won't be match
109+
foreach (var method in methods)
110+
{
111+
if (expected.Any(entry => method.Name == entry.Key && (method.Item3) == entry.Value))
112+
{
113+
continue;
114+
}
115+
errors.Add($"Tuple {method.Name} does not match any schema entry, expected {method.Item3} parameters.");
116+
}
117+
// If the schema has a superfluous entity that is a warning, as the extractor simply cannot product those things
118+
foreach (var entry in expected)
119+
{
120+
if (methods.Any(method => method.Name == entry.Key && (method.Item3) == entry.Value))
121+
{
122+
continue;
123+
}
124+
warnings.Add($"Schema entry {entry.Key} does not match any implemented Tuple, expected {entry.Value} parameters.");
125+
}
126+
127+
foreach (var warning in warnings)
128+
{
129+
_output.WriteLine($"Warning: {warning}");
130+
}
131+
foreach (var error in errors)
132+
{
133+
_output.WriteLine($"Error: {error}");
134+
}
135+
Assert.Empty(errors);
136+
}
137+
138+
[Fact]
139+
public void Verify_Sample_Traps()
140+
{
141+
string[] expectedTrapsFiles = Directory.GetFiles(PathHolder.expectedTraps);
142+
int numFailures = 0;
143+
foreach (string expected in expectedTrapsFiles)
144+
{
145+
if (File.ReadAllText(expected).Contains("extractor_messages"))
146+
{
147+
numFailures++;
148+
_output.WriteLine($"Expected sample trap {expected} has extractor error messages.");
149+
}
150+
}
151+
152+
if (numFailures > 0)
153+
{
154+
_output.WriteLine($"{numFailures} errors were detected.");
155+
}
156+
Assert.Equal(0, numFailures);
157+
}
158+
159+
160+
[Fact]
161+
public void Compare_Generated_Traps()
162+
{
163+
string[] args = new string[] { PathHolder.powershellSource };
164+
int exitcode = Program.Main(args);
165+
Assert.Equal(0, exitcode);
166+
string[] generatedTrapsFiles = Directory.GetFiles(PathHolder.generatedTraps);
167+
string[] expectedTrapsFiles = Directory.GetFiles(PathHolder.expectedTraps);
168+
169+
Assert.NotEmpty(generatedTrapsFiles);
170+
int numFailures = 0;
171+
var generatedFileNames = generatedTrapsFiles.Select(x => (Path.GetFileName(x), x)).ToList();
172+
var expectedFileNames = expectedTrapsFiles.Select(x => (Path.GetFileName(x), x)).ToList();
173+
foreach (var expectedTrapFile in expectedFileNames)
174+
{
175+
if (generatedFileNames.Any(x => x.Item1 == expectedTrapFile.Item1)) continue;
176+
numFailures++;
177+
_output.WriteLine($"{expectedTrapFile} has no matching filename in generated.");
178+
}
179+
foreach (var generated in generatedFileNames)
180+
{
181+
var expected = expectedFileNames.FirstOrDefault(filePath => filePath.Item1.Equals(generated.Item1));
182+
if (expected.Item1 is null || expected.x is null)
183+
{
184+
numFailures++;
185+
_output.WriteLine($"{generated.Item1} has no matching filename in expected.");
186+
}
187+
else
188+
{
189+
if (File.ReadAllText(generated.x).Contains("extractor_messages"))
190+
{
191+
_output.WriteLine($"Test generated trap {generated} has extractor error messages.");
192+
numFailures++;
193+
continue;
194+
}
195+
string generatedFileSanitized = TrapSanitizer.SanitizeTrap(File.ReadAllLines(generated.x));
196+
string expectedFileSanitized = TrapSanitizer.SanitizeTrap(File.ReadAllLines(expected.x));
197+
if (!generatedFileSanitized.Equals(expectedFileSanitized))
198+
{
199+
numFailures++;
200+
_output.WriteLine($"{generated} does not match {expected}");
201+
}
202+
}
203+
}
204+
205+
if (numFailures > 0)
206+
{
207+
_output.WriteLine($"{numFailures} errors were detected.");
208+
}
209+
Assert.Equal(expectedTrapsFiles.Length, generatedTrapsFiles.Length);
210+
Assert.Equal(0, numFailures);
211+
}
212+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
global using Xunit;

0 commit comments

Comments
 (0)