Skip to content

Commit d3570a6

Browse files
authored
fix: improve EmbededImagesRepairToolBase.GetImageFormat to support additional image header variants (#3049)
### Summary This PR fixes an issue where `EmbededImagesRepairToolBase.GetImageFormat` could not correctly identify certain image formats due to incomplete header checks. ### Changes - Added support for additional JPEG APP markers (ICC profile, Photoshop IRB, Adobe). - Improved detection for PNG and TIFF by checking full signatures. ### Why Previously, some valid images were misclassified as Authentication errors or not detected at all. This update ensures broader and more accurate format detection. (Might still have gaps). ### Testing - Added unit tests for JPEG variants (APP2, APP13, APP14) and other formats. - Verified detection works for PNG, TIFF, BMP, GIF, both for "fake headers" and real test assets. Closes #3048 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added SVG image format support. * **Bug Fixes** * Improved detection and validation for BMP, GIF, PNG, TIFF and JPEG, with more robust handling of short or null inputs. * **Tests** * Added comprehensive tests for image format detection and included image assets for test runs. * **Chores** * Updated gitignore to exclude development container files. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
2 parents 29aaddf + def3063 commit d3570a6

File tree

10 files changed

+293
-33
lines changed

10 files changed

+293
-33
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ ClientBin/
117117
*.[Pp]ublish.xml
118118
*.pfx
119119
*.publishsettings
120+
.devcontainer
121+
.devcontainer/*
120122

121123
# RIA/Silverlight projects
122124
Generated_Code/

src/MigrationTools.Tests/MigrationTools.Tests.csproj

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,27 @@
2222
<ProjectReference Include="..\MigrationTools.Clients.FileSystem\MigrationTools.Clients.FileSystem.csproj" />
2323
<ProjectReference Include="..\MigrationTools.Shadows\MigrationTools.Shadows.csproj" />
2424
<ProjectReference Include="..\MigrationTools\MigrationTools.csproj" />
25+
</ItemGroup>
26+
27+
<ItemGroup>
28+
<Content Include="..\MigrationTools.Tests\Tools\Infrastructure\Assets\bmpsample.bmp">
29+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
30+
</Content>
31+
<Content Include="..\MigrationTools.Tests\Tools\Infrastructure\Assets\gifsample.gif">
32+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
33+
</Content>
34+
<Content Include="..\MigrationTools.Tests\Tools\Infrastructure\Assets\jpgsample.jpg">
35+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
36+
</Content>
37+
<Content Include="..\MigrationTools.Tests\Tools\Infrastructure\Assets\tiffsample.tiff">
38+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
39+
</Content>
40+
<Content Include="..\MigrationTools.Tests\Tools\Infrastructure\Assets\pngsample.png">
41+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
42+
</Content>
43+
<Content Include="..\MigrationTools.Tests\Tools\Infrastructure\Assets\svgsample.svg">
44+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
45+
</Content>
2546
</ItemGroup>
2647

2748
<ItemGroup>
Binary file not shown.
3.85 KB
Loading
5.51 KB
Loading
5.13 KB
Loading
Lines changed: 6 additions & 0 deletions
Loading
Binary file not shown.
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Drawing;
4+
using System.IO;
5+
using System.Text;
6+
using Microsoft.Extensions.Logging;
7+
using Microsoft.Extensions.Options;
8+
using Microsoft.VisualStudio.TestTools.UnitTesting;
9+
using MigrationTools.DataContracts;
10+
using MigrationTools.Options;
11+
using MigrationTools.Tools;
12+
using MigrationTools.Tools.Infrastructure;
13+
14+
namespace MigrationTools.Tests.Tools.Infrastructure
15+
{
16+
[TestClass]
17+
public class EmbededImagesRepairToolBaseTests
18+
{
19+
private static byte[] LoadAsset(string name)
20+
{
21+
string path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Tools", "Infrastructure", "Assets", name);
22+
return File.ReadAllBytes(path);
23+
}
24+
25+
[TestMethod]
26+
public void ShouldDetectJpegWithPhotoshopMetadata_FromFile()
27+
{
28+
byte[] bytes = LoadAsset("jpgsample.jpg");
29+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(bytes);
30+
Assert.AreEqual("jpeg", format);
31+
}
32+
33+
[TestMethod]
34+
public void ShouldDetectPng_FromFile()
35+
{
36+
byte[] bytes = LoadAsset("pngsample.png");
37+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(bytes);
38+
Assert.AreEqual("png", format);
39+
}
40+
41+
[TestMethod]
42+
public void ShouldDetectTiff_FromFile()
43+
{
44+
byte[] bytes = LoadAsset("tiffsample.tiff");
45+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(bytes);
46+
Assert.AreEqual("tiff", format);
47+
}
48+
49+
[TestMethod]
50+
public void ShouldDetectGif_FromFile()
51+
{
52+
byte[] bytes = LoadAsset("gifsample.gif");
53+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(bytes);
54+
Assert.AreEqual("gif", format);
55+
}
56+
57+
[TestMethod]
58+
public void ShouldDetectSvg_FromFile()
59+
{
60+
byte[] bytes = LoadAsset("svgsample.svg");
61+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(bytes);
62+
Assert.AreEqual("svg", format);
63+
}
64+
65+
[TestMethod]
66+
public void ShouldDetectBmp()
67+
{
68+
byte[] bmpHeader = new byte[] { 66, 77 }; // "BM"
69+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(bmpHeader);
70+
Assert.AreEqual("bmp", format);
71+
}
72+
73+
[TestMethod]
74+
public void ShouldDetectSVG()
75+
{
76+
byte[] svgHeader = Encoding.ASCII.GetBytes("<svg");
77+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(svgHeader);
78+
Assert.AreEqual("svg", format);
79+
}
80+
81+
[TestMethod]
82+
public void ShouldDetectGIF87a()
83+
{
84+
byte[] gifHeader = Encoding.ASCII.GetBytes("GIF87a");
85+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(gifHeader);
86+
Assert.AreEqual("gif", format);
87+
}
88+
89+
[TestMethod]
90+
public void ShouldDetectGIF89a()
91+
{
92+
byte[] gifHeader = Encoding.ASCII.GetBytes("GIF89a");
93+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(gifHeader);
94+
Assert.AreEqual("gif", format);
95+
}
96+
97+
[TestMethod]
98+
public void ShouldDetectPng()
99+
{
100+
byte[] pngHeader = new byte[] { 137, 80, 78, 71, 13, 10, 26, 10 };
101+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(pngHeader);
102+
Assert.AreEqual("png", format);
103+
}
104+
105+
[TestMethod]
106+
public void ShouldDetectTiffLittleEndian()
107+
{
108+
byte[] tiffHeader = new byte[] { 73, 73, 42, 0 }; // II followed by 2A 00
109+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(tiffHeader);
110+
Assert.AreEqual("tiff", format);
111+
}
112+
113+
[TestMethod]
114+
public void ShouldDetectTiffBigEndian()
115+
{
116+
byte[] tiffHeader = new byte[] { 77, 77, 0, 42 }; // MM followed by 00 2
117+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(tiffHeader);
118+
Assert.AreEqual("tiff", format);
119+
}
120+
121+
[TestMethod]
122+
public void ShouldDetectJpegStandard()
123+
{
124+
byte[] jpegHeader = new byte[] { 255, 216, 255, 224 };
125+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(jpegHeader);
126+
Assert.AreEqual("jpeg", format);
127+
}
128+
129+
[TestMethod]
130+
public void ShouldDetectJpegCanon()
131+
{
132+
byte[] jpegHeader = new byte[] { 255, 216, 255, 225 };
133+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(jpegHeader);
134+
Assert.AreEqual("jpeg", format);
135+
}
136+
137+
[TestMethod]
138+
public void ShouldDetectJpegWithSPIFF()
139+
{
140+
byte[] jpegHeader = new byte[] { 255, 216, 255, 232 };
141+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(jpegHeader);
142+
Assert.AreEqual("jpeg", format);
143+
}
144+
145+
[TestMethod]
146+
public void ShouldDetectJpegWithAdobeMarker()
147+
{
148+
byte[] jpegHeader = new byte[] { 255, 216, 255, 239 };
149+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(jpegHeader);
150+
Assert.AreEqual("jpeg", format);
151+
}
152+
153+
[TestMethod]
154+
public void ShouldDetectJpegWithICCProfile()
155+
{
156+
byte[] jpegHeader = new byte[] { 255, 216, 255, 227 };
157+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(jpegHeader);
158+
Assert.AreEqual("jpeg", format);
159+
}
160+
161+
[TestMethod]
162+
public void ShouldDetectJpegWithIRBMetadata()
163+
{
164+
byte[] jpegHeader = new byte[] { 255, 216, 255, 237 };
165+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(jpegHeader);
166+
Assert.AreEqual("jpeg", format);
167+
}
168+
169+
[TestMethod]
170+
public void ShouldDetectJpegWithQuantizationTable()
171+
{
172+
byte[] jpegHeader = new byte[] { 255, 216, 255, 219 };
173+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(jpegHeader);
174+
Assert.AreEqual("jpeg", format);
175+
}
176+
177+
[TestMethod]
178+
public void ShouldDetectJpegWithPhotoshopMetadata()
179+
{
180+
byte[] jpegHeader = new byte[] { 255, 216, 255, 238 };
181+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(jpegHeader);
182+
Assert.AreEqual("jpeg", format);
183+
}
184+
185+
[TestMethod]
186+
public void ShouldDetectJpegWithCanonMarker()
187+
{
188+
byte[] jpegHeader = new byte[] { 255, 216, 255, 226 };
189+
var format = TestableEmbededImagesRepairTool.CallGetImageFormat(jpegHeader);
190+
Assert.AreEqual("jpeg", format);
191+
}
192+
193+
}
194+
195+
/// <summary>
196+
/// Wrapper class to expose GetImageFormat from EmbededImagesRepairToolBase
197+
/// </summary>
198+
public class TestableEmbededImagesRepairTool : EmbededImagesRepairToolBase<TestToolOptions>
199+
{
200+
public TestableEmbededImagesRepairTool(IOptions<TestToolOptions> options, IServiceProvider services, ILogger<ITool> logger, ITelemetryLogger telemetry) : base(options, services, logger, telemetry)
201+
{
202+
}
203+
204+
public static string CallGetImageFormat(byte[] bytes)
205+
{
206+
return GetImageFormat(bytes).ToString();
207+
}
208+
209+
protected override void FixEmbededImages(WorkItemData wi, string oldTfsurl, string newTfsurl, string sourcePersonalAccessToken = "")
210+
{
211+
throw new NotImplementedException();
212+
}
213+
}
214+
215+
public class TestToolOptions : IToolOptions
216+
{
217+
public ConfigurationMetadata ConfigurationMetadata => throw new NotImplementedException();
218+
219+
public bool Enabled { get => throw new NotImplementedException(); set => throw new NotImplementedException(); }
220+
}
221+
}

src/MigrationTools/Tools/Infrastructure/EmbededImagesRepairEnricherBase.cs

Lines changed: 43 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -56,51 +56,60 @@ protected static HttpResponseMessage DownloadFile(HttpClient httpClient, string
5656
/// Retrieve Image Format for a given byte array
5757
/// </summary>
5858
/// <param name="bytes">Image to check</param>
59-
/// <remarks>From https://stackoverflow.com/a/9446045/1317161</remarks>
6059
/// <returns>Image format</returns>
6160
protected static ImageFormat GetImageFormat(byte[] bytes)
6261
{
63-
// see http://www.mikekunz.com/image_file_header.html
64-
var bmp = Encoding.ASCII.GetBytes("BM"); // BMP
65-
var gif = Encoding.ASCII.GetBytes("GIF"); // GIF
66-
var png = new byte[] { 137, 80, 78, 71 }; // PNG
67-
var tiff = new byte[] { 73, 73, 42 }; // TIFF
68-
var tiff2 = new byte[] { 77, 77, 42 }; // TIFF
69-
var jpeg = new byte[] { 255, 216, 255, 224 }; // jpeg
70-
var jpeg2 = new byte[] { 255, 216, 255, 225 }; // jpeg canon
71-
var jpeg3 = new byte[] { 255, 216, 255, 237 }; // jpeg
72-
var jpeg4 = new byte[] { 255, 216, 255, 232 }; // jpeg still picture interchange file format (SPIFF)
73-
var jpeg5 = new byte[] { 255, 216, 255, 226 }; // jpeg canon
74-
75-
if (bmp.SequenceEqual(bytes.Take(bmp.Length)))
76-
return ImageFormat.bmp;
77-
78-
if (gif.SequenceEqual(bytes.Take(gif.Length)))
62+
if (bytes != null && bytes.Length > 1)
63+
{
64+
// BMP: 42 4D
65+
var bmp = new byte[] { 0x42, 0x4D };
66+
if (bmp.SequenceEqual(bytes.Take(bmp.Length)))
67+
return ImageFormat.bmp;
68+
}
69+
70+
if (bytes == null || bytes.Length < 4)
71+
return ImageFormat.unknown;
72+
73+
// GIF: GIF87a or GIF89a
74+
var gif87a = System.Text.Encoding.ASCII.GetBytes("GIF87a");
75+
var gif89a = System.Text.Encoding.ASCII.GetBytes("GIF89a");
76+
77+
// PNG: 89 50 4E 47 0D 0A 1A 0A
78+
var png = new byte[] { 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A };
79+
80+
// TIFF: II* or MM*
81+
var tiffLE = new byte[] { 0x49, 0x49, 0x2A, 0x00 };
82+
var tiffBE = new byte[] { 0x4D, 0x4D, 0x00, 0x2A };
83+
84+
// JPEG: FF D8
85+
var jpegSOI = new byte[] { 0xFF, 0xD8 };
86+
87+
// Check GIF
88+
if (gif87a.SequenceEqual(bytes.Take(gif87a.Length)) ||
89+
gif89a.SequenceEqual(bytes.Take(gif89a.Length)))
7990
return ImageFormat.gif;
8091

92+
// Check PNG
8193
if (png.SequenceEqual(bytes.Take(png.Length)))
8294
return ImageFormat.png;
8395

84-
if (tiff.SequenceEqual(bytes.Take(tiff.Length)))
85-
return ImageFormat.tiff;
86-
87-
if (tiff2.SequenceEqual(bytes.Take(tiff2.Length)))
96+
// Check TIFF
97+
if (tiffLE.SequenceEqual(bytes.Take(tiffLE.Length)) ||
98+
tiffBE.SequenceEqual(bytes.Take(tiffBE.Length)))
8899
return ImageFormat.tiff;
89100

90-
if (jpeg.SequenceEqual(bytes.Take(jpeg.Length)))
101+
// Check JPEG
102+
if (jpegSOI.SequenceEqual(bytes.Take(jpegSOI.Length)))
91103
return ImageFormat.jpeg;
92104

93-
if (jpeg2.SequenceEqual(bytes.Take(jpeg2.Length)))
94-
return ImageFormat.jpeg;
105+
var text = Encoding.UTF8.GetString(bytes);
106+
text = text.TrimStart();
95107

96-
if (jpeg3.SequenceEqual(bytes.Take(jpeg3.Length)))
97-
return ImageFormat.jpeg;
98-
99-
if (jpeg4.SequenceEqual(bytes.Take(jpeg4.Length)))
100-
return ImageFormat.jpeg;
101-
102-
if (jpeg5.SequenceEqual(bytes.Take(jpeg5.Length)))
103-
return ImageFormat.jpeg;
108+
if (text.StartsWith("<svg", StringComparison.OrdinalIgnoreCase) ||
109+
(text.StartsWith("<?xml", StringComparison.OrdinalIgnoreCase) && text.Contains("<svg")))
110+
{
111+
return ImageFormat.svg;
112+
}
104113

105114
return ImageFormat.unknown;
106115
}
@@ -130,7 +139,8 @@ protected enum ImageFormat
130139
gif,
131140
png,
132141
tiff,
133-
jpeg
142+
jpeg,
143+
svg
134144
}
135145
}
136146
}

0 commit comments

Comments
 (0)