Skip to content

Commit 6fba565

Browse files
rhuijbenBobLd
authored andcommitted
Avoid doing a true file seek for simple peeking the next char in the token parser
1 parent 3592fc8 commit 6fba565

File tree

4 files changed

+36
-21
lines changed

4 files changed

+36
-21
lines changed

src/UglyToad.PdfPig.Core/StreamInputBytes.cs

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ public sealed class StreamInputBytes : IInputBytes
1111
{
1212
private readonly Stream stream;
1313
private readonly bool shouldDispose;
14+
private byte? peekByte;
1415

1516
private bool isAtEnd;
1617

1718
/// <inheritdoc />
18-
public long CurrentOffset => stream.Position;
19+
public long CurrentOffset => peekByte.HasValue ? stream.Position - 1 : stream.Position;
1920

2021
/// <inheritdoc />
2122
public byte CurrentByte { get; private set; }
@@ -52,7 +53,8 @@ public StreamInputBytes(Stream stream, bool shouldDispose = true)
5253
/// <inheritdoc />
5354
public bool MoveNext()
5455
{
55-
var b = stream.ReadByte();
56+
var b = peekByte ?? stream.ReadByte();
57+
peekByte = null;
5658

5759
if (b == -1)
5860
{
@@ -68,18 +70,21 @@ public bool MoveNext()
6870
/// <inheritdoc />
6971
public byte? Peek()
7072
{
71-
var current = CurrentOffset;
72-
73-
var b = stream.ReadByte();
74-
75-
stream.Seek(current, SeekOrigin.Begin);
76-
77-
if (b == -1)
73+
if (!peekByte.HasValue)
7874
{
79-
return null;
75+
var v = stream.ReadByte();
76+
77+
if (v >= 0)
78+
{
79+
peekByte = (byte)v;
80+
}
81+
else
82+
{
83+
return null;
84+
}
8085
}
8186

82-
return (byte)b;
87+
return peekByte;
8388
}
8489

8590
/// <inheritdoc />
@@ -92,6 +97,7 @@ public bool IsAtEnd()
9297
public void Seek(long position)
9398
{
9499
isAtEnd = false;
100+
peekByte = null;
95101

96102
if (position == 0)
97103
{
@@ -112,9 +118,15 @@ public int Read(Span<byte> buffer)
112118
{
113119
return 0;
114120
}
121+
else if (peekByte.HasValue)
122+
{
123+
buffer[0] = peekByte.Value;
124+
peekByte = null;
115125

116-
int read = stream.Read(buffer);
126+
return Read(buffer.Slice(1)) + 1;
127+
}
117128

129+
int read = stream.Read(buffer);
118130
if (read > 0)
119131
{
120132
CurrentByte = buffer[read - 1];

src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@
2525

2626
internal static class PdfDocumentFactory
2727
{
28-
public static PdfDocument Open(byte[] fileBytes, ParsingOptions? options = null)
28+
public static PdfDocument Open(ReadOnlyMemory<byte> memory, ParsingOptions? options = null)
2929
{
30-
var inputBytes = new MemoryInputBytes(fileBytes);
30+
var inputBytes = new MemoryInputBytes(memory);
3131

3232
return Open(inputBytes, options);
3333
}

src/UglyToad.PdfPig/PdfDocument.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,14 @@ internal PdfDocument(
102102
/// <returns>A <see cref="PdfDocument"/> providing access to the file contents.</returns>
103103
public static PdfDocument Open(byte[] fileBytes, ParsingOptions? options = null) => PdfDocumentFactory.Open(fileBytes, options);
104104

105+
/// <summary>
106+
/// Creates a <see cref="PdfDocument"/> for reading from the provided file bytes.
107+
/// </summary>
108+
/// <param name="memory">The bytes of the PDF file.</param>
109+
/// <param name="options">Optional parameters controlling parsing.</param>
110+
/// <returns>A <see cref="PdfDocument"/> providing access to the file contents.</returns>
111+
public static PdfDocument Open(ReadOnlyMemory<byte> memory, ParsingOptions? options = null) => PdfDocumentFactory.Open(memory, options);
112+
105113
/// <summary>
106114
/// Opens a file and creates a <see cref="PdfDocument"/> for reading from the provided file path.
107115
/// </summary>

src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -342,14 +342,9 @@ private bool TryReadStream(long startStreamTokenOffset, bool getLength, [NotNull
342342

343343
if ((char)inputBytes.CurrentByte == '\r')
344344
{
345-
if (!inputBytes.MoveNext())
345+
if (inputBytes.Peek() == '\n')
346346
{
347-
return false;
348-
}
349-
350-
if ((char)inputBytes.CurrentByte != '\n')
351-
{
352-
inputBytes.Seek(inputBytes.CurrentOffset - 1);
347+
inputBytes.MoveNext();
353348
}
354349
break;
355350
}

0 commit comments

Comments
 (0)