Skip to content
This repository was archived by the owner on Jun 21, 2023. It is now read-only.

Commit 50b9e91

Browse files
authored
Merge pull request #1268 from github/fixes/1241-carriage-return-in-diff
Fix diff parsing when file contains loose carriage return
2 parents a9ddfd0 + 66c84b1 commit 50b9e91

File tree

2 files changed

+221
-54
lines changed

2 files changed

+221
-54
lines changed

src/GitHub.Exports/Models/DiffUtilities.cs

Lines changed: 123 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
using System.Collections.Generic;
44
using System.Text.RegularExpressions;
55
using System.Diagnostics.CodeAnalysis;
6+
using GitHub.Extensions;
67

78
namespace GitHub.Models
89
{
@@ -12,73 +13,74 @@ public static class DiffUtilities
1213

1314
public static IEnumerable<DiffChunk> ParseFragment(string diff)
1415
{
15-
using (var reader = new StringReader(diff))
16+
var reader = new LineReader(diff);
17+
string line;
18+
DiffChunk chunk = null;
19+
int diffLine = -1;
20+
int oldLine = -1;
21+
int newLine = -1;
22+
23+
while ((line = reader.ReadLine()) != null)
1624
{
17-
string line;
18-
DiffChunk chunk = null;
19-
int diffLine = -1;
20-
int oldLine = -1;
21-
int newLine = -1;
25+
var headerMatch = ChunkHeaderRegex.Match(line);
2226

23-
while ((line = reader.ReadLine()) != null)
27+
if (headerMatch.Success)
2428
{
25-
var headerMatch = ChunkHeaderRegex.Match(line);
26-
27-
if (headerMatch.Success)
29+
if (chunk != null)
2830
{
29-
if (chunk != null)
30-
{
31-
yield return chunk;
32-
}
31+
yield return chunk;
32+
}
3333

34-
if (diffLine == -1) diffLine = 0;
34+
if (diffLine == -1) diffLine = 0;
3535

36-
chunk = new DiffChunk
37-
{
38-
OldLineNumber = oldLine = int.Parse(headerMatch.Groups[1].Value),
39-
NewLineNumber = newLine = int.Parse(headerMatch.Groups[2].Value),
40-
DiffLine = diffLine,
41-
};
42-
}
43-
else if (chunk != null)
36+
chunk = new DiffChunk
4437
{
45-
var type = GetLineChange(line[0]);
38+
OldLineNumber = oldLine = int.Parse(headerMatch.Groups[1].Value),
39+
NewLineNumber = newLine = int.Parse(headerMatch.Groups[2].Value),
40+
DiffLine = diffLine,
41+
};
42+
}
43+
else if (chunk != null)
44+
{
45+
var type = GetLineChange(line[0]);
46+
47+
// This might contain info about previous line (e.g. "\ No newline at end of file").
48+
if (type != DiffChangeType.Control)
49+
{
50+
chunk.Lines.Add(new DiffLine
51+
{
52+
Type = type,
53+
OldLineNumber = type != DiffChangeType.Add ? oldLine : -1,
54+
NewLineNumber = type != DiffChangeType.Delete ? newLine : -1,
55+
DiffLineNumber = diffLine,
56+
Content = line,
57+
});
58+
59+
var lineCount = 1;
60+
lineCount += LineReader.CountCarriageReturns(line);
4661

47-
// This might contain info about previous line (e.g. "\ No newline at end of file").
48-
if (type != DiffChangeType.Control)
62+
switch (type)
4963
{
50-
chunk.Lines.Add(new DiffLine
51-
{
52-
Type = type,
53-
OldLineNumber = type != DiffChangeType.Add ? oldLine : -1,
54-
NewLineNumber = type != DiffChangeType.Delete ? newLine : -1,
55-
DiffLineNumber = diffLine,
56-
Content = line,
57-
});
58-
59-
switch (type)
60-
{
61-
case DiffChangeType.None:
62-
++oldLine;
63-
++newLine;
64-
break;
65-
case DiffChangeType.Delete:
66-
++oldLine;
67-
break;
68-
case DiffChangeType.Add:
69-
++newLine;
70-
break;
71-
}
64+
case DiffChangeType.None:
65+
oldLine += lineCount;
66+
newLine += lineCount;
67+
break;
68+
case DiffChangeType.Delete:
69+
oldLine += lineCount;
70+
break;
71+
case DiffChangeType.Add:
72+
newLine += lineCount;
73+
break;
7274
}
7375
}
74-
75-
if (diffLine != -1) ++diffLine;
7676
}
7777

78-
if (chunk != null)
79-
{
80-
yield return chunk;
81-
}
78+
if (diffLine != -1) ++diffLine;
79+
}
80+
81+
if (chunk != null)
82+
{
83+
yield return chunk;
8284
}
8385
}
8486

@@ -113,6 +115,73 @@ public static DiffLine Match(IEnumerable<DiffChunk> diff, IList<DiffLine> target
113115
return null;
114116
}
115117

118+
/// Here are some alternative implementations we tried:
119+
/// https://gist.github.com/shana/200e4719d4f571caab9dbf5921fa5276
120+
/// Scanning with `text.IndexOf('\n', index)` appears to the the best compromise for average .diff files.
121+
/// It's likely that `text.IndexOfAny(new [] {'\r', '\n'}, index)` would be faster if lines were much longer.
122+
public class LineReader
123+
{
124+
readonly string text;
125+
int index = 0;
126+
127+
public LineReader(string text)
128+
{
129+
Guard.ArgumentNotNull(text, nameof(text));
130+
131+
this.text = text;
132+
}
133+
134+
public string ReadLine()
135+
{
136+
if (EndOfText)
137+
{
138+
if (StartOfText)
139+
{
140+
index = -1;
141+
return string.Empty;
142+
}
143+
144+
return null;
145+
}
146+
147+
var startIndex = index;
148+
index = text.IndexOf('\n', index);
149+
var endIndex = index != -1 ? index : text.Length;
150+
var length = endIndex - startIndex;
151+
152+
if (index != -1)
153+
{
154+
if (index > 0 && text[index - 1] == '\r')
155+
{
156+
length--;
157+
}
158+
159+
index++;
160+
}
161+
162+
return text.Substring(startIndex, length);
163+
}
164+
165+
public static int CountCarriageReturns(string text)
166+
{
167+
Guard.ArgumentNotNull(text, nameof(text));
168+
169+
int count = 0;
170+
int index = 0;
171+
while ((index = text.IndexOf('\r', index)) != -1)
172+
{
173+
index++;
174+
count++;
175+
}
176+
177+
return count;
178+
}
179+
180+
bool StartOfText => index == 0;
181+
182+
bool EndOfText => index == -1 || index == text.Length;
183+
}
184+
116185
[SuppressMessage("Microsoft.Globalization", "CA1305:SpecifyIFormatProvider", MessageId = "System.String.Format(System.String,System.Object)")]
117186
static DiffChangeType GetLineChange(char c)
118187
{

test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,52 @@ public void NoNewLineNotAtEndOfChunk_CheckDiffLineNumber()
9696
Assert.Equal(3, line.DiffLineNumber);
9797
}
9898

99+
[Theory]
100+
[InlineData("+foo\n+bar\n", "+foo", "+bar")]
101+
[InlineData("+fo\ro\n+bar\n", "+fo\ro", "+bar")]
102+
[InlineData("+foo\r\r\n+bar\n", "+foo\r", "+bar")]
103+
[InlineData("+\\r\n+\r\n", "+\\r", "+")]
104+
public void FirstChunk_CheckLineContent(string diffLines, string contentLine0, string contentLine1)
105+
{
106+
var header = "@@ -1 +1 @@";
107+
var diff = header + "\n" + diffLines;
108+
109+
var chunk = DiffUtilities.ParseFragment(diff).First();
110+
111+
Assert.Equal(contentLine0, chunk.Lines[0].Content);
112+
Assert.Equal(contentLine1, chunk.Lines[1].Content);
113+
}
114+
115+
[Theory]
116+
[InlineData("+foo\n+bar\n", 1, 2)]
117+
[InlineData("+fo\ro\n+bar\n", 1, 3)]
118+
[InlineData("+foo\r\r\n+bar\n", 1, 3)]
119+
public void FirstChunk_CheckNewLineNumber(string diffLines, int lineNumber0, int lineNumber1)
120+
{
121+
var header = "@@ -1 +1 @@";
122+
var diff = header + "\n" + diffLines;
123+
124+
var chunk = DiffUtilities.ParseFragment(diff).First();
125+
126+
Assert.Equal(lineNumber0, chunk.Lines[0].NewLineNumber);
127+
Assert.Equal(lineNumber1, chunk.Lines[1].NewLineNumber);
128+
}
129+
130+
[Theory]
131+
[InlineData("-foo\n-bar\n", 1, 2)]
132+
[InlineData("-fo\ro\n-bar\n", 1, 3)]
133+
[InlineData("-foo\r\r\n-bar\n", 1, 3)]
134+
public void FirstChunk_CheckOldLineNumber(string diffLines, int lineNumber0, int lineNumber1)
135+
{
136+
var header = "@@ -1 +1 @@";
137+
var diff = header + "\n" + diffLines;
138+
139+
var chunk = DiffUtilities.ParseFragment(diff).First();
140+
141+
Assert.Equal(lineNumber0, chunk.Lines[0].OldLineNumber);
142+
Assert.Equal(lineNumber1, chunk.Lines[1].OldLineNumber);
143+
}
144+
99145
[Fact]
100146
public void FirstChunk_CheckDiffLineZeroBased()
101147
{
@@ -269,5 +315,57 @@ public void NoLineMatchesFromNoLines()
269315
Assert.Equal(null, line);
270316
}
271317
}
318+
319+
public class TheLineReaderClass
320+
{
321+
[Theory]
322+
[InlineData("", new[] { "", null })]
323+
[InlineData("\n", new[] { "", null })]
324+
[InlineData("\r\n", new[] { "", null })]
325+
[InlineData("1", new[] { "1", null })]
326+
[InlineData("1\n2\n", new[] { "1", "2", null })]
327+
[InlineData("1\n2", new[] { "1", "2", null })]
328+
[InlineData("1\r\n2\n", new[] { "1", "2", null })]
329+
[InlineData("1\r\n2", new[] { "1", "2", null })]
330+
[InlineData("\r", new[] { "\r", null })]
331+
[InlineData("\r\r", new[] { "\r\r", null })]
332+
[InlineData("\r\r\n", new[] { "\r", null })]
333+
[InlineData("\r_\n", new[] { "\r_", null })]
334+
public void ReadLines(string text, string[] expectLines)
335+
{
336+
var lineReader = new DiffUtilities.LineReader(text);
337+
338+
foreach (var expectLine in expectLines)
339+
{
340+
var line = lineReader.ReadLine();
341+
Assert.Equal(expectLine, line);
342+
}
343+
}
344+
345+
[Fact]
346+
public void Constructor_NullText_ArgumentNullException()
347+
{
348+
Assert.Throws<ArgumentNullException>(() => new DiffUtilities.LineReader(null));
349+
}
350+
351+
[Theory]
352+
[InlineData("", 0)]
353+
[InlineData("\r", 1)]
354+
[InlineData("\r\n", 1)]
355+
[InlineData("\r\r", 2)]
356+
[InlineData("\r-\r", 2)]
357+
public void CountCarriageReturns(string text, int expectCount)
358+
{
359+
var count = DiffUtilities.LineReader.CountCarriageReturns(text);
360+
361+
Assert.Equal(expectCount, count);
362+
}
363+
364+
[Fact]
365+
public void CountCarriageReturns_NullText_ArgumentNullException()
366+
{
367+
Assert.Throws<ArgumentNullException>(() => DiffUtilities.LineReader.CountCarriageReturns(null));
368+
}
369+
}
272370
}
273371
}

0 commit comments

Comments
 (0)