Skip to content

Commit e8f79a8

Browse files
authored
Merge pull request #227 from jafin/feature/2gbfiles
feat: Add basic support for reading/writing files >2gb
2 parents 1591788 + f18063c commit e8f79a8

File tree

11 files changed

+593
-67
lines changed

11 files changed

+593
-67
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
namespace PdfSharpCore.Test.Helpers
2+
{
3+
public static class TestData
4+
{
5+
public static string LoremIpsumText =
6+
@"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed non sapien leo. Aliquam elementum volutpat lacus, sit amet aliquet est volutpat at. Nam eleifend vehicula bibendum. Sed lacinia velit ex, id auctor tellus varius a. Vivamus cursus ut nulla quis pretium. Nunc accumsan felis nec tortor fermentum iaculis. Vivamus non lacus ullamcorper, porta justo ut, dictum nibh. Cras scelerisque in risus vitae hendrerit. Duis venenatis felis in lacinia vestibulum. Proin mauris ex, efficitur nec tincidunt in, imperdiet eget risus. Nulla porttitor mollis pellentesque. Fusce pretium ut odio et imperdiet.
7+
8+
Vivamus euismod velit id massa mollis, quis congue metus faucibus. Donec ante enim, vehicula a cursus ut, porta vel dui. Proin porta faucibus dolor non consequat. Mauris aliquam, leo a interdum pretium, tellus nisi semper libero, at suscipit dui mi bibendum turpis. Phasellus tempor mauris a eleifend placerat. Fusce in velit ut lectus sagittis varius. Etiam vulputate, libero sit amet posuere posuere, orci nunc ultricies velit, non porttitor dui ante feugiat purus. Ut semper congue lacinia. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Sed facilisis ante at sapien dignissim, ut volutpat elit dictum. Morbi nulla ante, laoreet non vulputate et, facilisis ut lectus. Mauris mollis mi nec venenatis congue. Fusce efficitur vitae massa non vehicula. Fusce euismod molestie posuere. Nunc facilisis ipsum nec justo scelerisque, nec cursus diam gravida.
9+
10+
Morbi at sapien risus. Nunc dui tellus, faucibus rhoncus euismod vitae, maximus at lacus. Nam neque dui, venenatis at euismod vel, ullamcorper a velit. Maecenas placerat tellus quis justo feugiat molestie. Mauris tristique sollicitudin nisi at sagittis. Integer eleifend, velit et facilisis rutrum, nisi ipsum sodales turpis, non dictum odio dui sed velit. Aliquam consequat ac lorem non hendrerit. Morbi quis mollis nibh, et mattis nisi. Donec nec ultricies dui, nec pellentesque sapien. Vivamus sed lectus fermentum mauris placerat sodales ut sed quam. Nam facilisis, risus quis hendrerit mattis, velit nisi placerat neque, in feugiat urna nisl in nibh. Suspendisse potenti. Proin sollicitudin maximus ligula, eu sodales tellus posuere vitae. Integer orci magna, ultricies in luctus a, euismod quis lectus. Phasellus blandit justo mauris, nec maximus sapien tempor non. Ut eu quam aliquam, facilisis nibh sit amet, blandit eros.
11+
12+
Nulla accumsan augue diam, sed egestas ipsum vestibulum vel. Praesent vitae dignissim quam. Nunc mollis tincidunt vehicula. In vitae euismod urna, eget tincidunt magna. In quis ligula eu magna vehicula venenatis. Vestibulum sapien sem, congue in congue et, vehicula et risus. Suspendisse aliquam, est et commodo porta, nibh tortor mollis nisi, quis porttitor ligula leo a ipsum. Aliquam viverra sodales leo quis faucibus.
13+
14+
In sit amet laoreet nisl. Aenean et tortor diam. Maecenas imperdiet massa eget justo faucibus dignissim. Donec nec finibus diam, vel lobortis neque. Ut justo felis, suscipit et justo ultrices, fringilla convallis nisi. Vestibulum egestas felis vehicula tellus maximus, a ultrices magna aliquet. Cras eu laoreet mauris. Duis id euismod est. Nulla facilisi. Vivamus fermentum metus velit, vitae vulputate lectus laoreet in. Aenean semper ante odio. Ut aliquam ligula eu enim ullamcorper, ac pellentesque enim hendrerit. Vestibulum eu efficitur nisl. Aenean tempor diam nec nulla rutrum, sit amet congue urna blandit.
15+
16+
Duis ullamcorper tellus ac mattis vestibulum. Integer nibh nunc, commodo sit amet pharetra sit amet, molestie a sapien. Nulla magna orci, semper at enim non, ultricies faucibus nulla. Aenean leo ex, rutrum ut fermentum ut, fringilla sit amet diam. Morbi et congue tortor, non maximus tellus. Nunc sit amet placerat ipsum, at molestie tellus. Etiam tincidunt eleifend ligula, quis dapibus elit tempus tempor. Curabitur feugiat mattis leo eu blandit. Maecenas sed vestibulum mi. In molestie eleifend euismod. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Vivamus id turpis a magna sollicitudin pellentesque.
17+
18+
Donec sollicitudin efficitur sapien iaculis fringilla. Donec rutrum sagittis dolor, at auctor turpis finibus et. Aliquam et dictum risus. Praesent viverra efficitur congue. Suspendisse condimentum posuere risus, id dignissim enim viverra ut. Aliquam eleifend nisl eu dolor luctus, nec rhoncus nisi porta. Donec ut lacinia risus. Curabitur bibendum lacus consequat sem consequat euismod. Donec auctor cursus ante quis rutrum. Aliquam mattis eget libero non porta.
19+
20+
Maecenas mollis sollicitudin felis at imperdiet. Duis dignissim purus quis interdum mattis. Nam sit amet quam quis enim hendrerit tincidunt. Aliquam euismod metus justo, non lobortis risus vehicula in. Pellentesque tempus, leo at placerat interdum, diam lectus gravida purus, id placerat justo quam nec mauris. Ut ullamcorper commodo dui in pretium. Suspendisse luctus mauris lacinia neque faucibus sollicitudin. Pellentesque ut ipsum metus. Quisque rutrum, risus eget feugiat vestibulum, enim nisl ornare risus, sit amet interdum arcu lacus at turpis. Mauris nec tristique massa. Curabitur diam urna, dapibus eget lorem porta, venenatis mattis justo. Sed eleifend accumsan lectus, id tempor metus semper in.";
21+
}
22+
}

PdfSharpCore.Test/IO/IoBaseTest.cs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
using System.IO;
2+
using System.Text;
3+
using FluentAssertions;
4+
using PdfSharpCore.Pdf;
5+
using PdfSharpCore.Pdf.IO;
6+
using PdfSharpCore.Test.Helpers;
7+
using Xunit;
8+
9+
namespace PdfSharpCore.Test.IO
10+
{
11+
public abstract class IoBaseTest
12+
{
13+
private readonly string _rootPath = PathHelper.GetInstance().RootDir;
14+
private const string _outputDirName = "Out";
15+
16+
public void CanReadPdf(string fileName)
17+
{
18+
var path = GetOutFilePath(fileName);
19+
using var fs = File.OpenRead(path);
20+
var inputDocument = Pdf.IO.PdfReader.Open(fs, PdfDocumentOpenMode.Import);
21+
var info = inputDocument.Info;
22+
info.Should().NotBeNullOrEmpty();
23+
}
24+
25+
protected void SaveDocument(PdfDocument document, string name)
26+
{
27+
var outFilePath = GetOutFilePath(name);
28+
var dir = Path.GetDirectoryName(outFilePath);
29+
if (!Directory.Exists(dir))
30+
{
31+
Directory.CreateDirectory(dir);
32+
}
33+
34+
document.Save(outFilePath);
35+
}
36+
37+
protected void ValidateFileIsPdf(string v)
38+
{
39+
var path = GetOutFilePath(v);
40+
Assert.True(File.Exists(path));
41+
var fi = new FileInfo(path);
42+
Assert.True(fi.Length > 1);
43+
44+
using var stream = File.OpenRead(path);
45+
ReadStreamAndVerifyPdfHeaderSignature(stream);
46+
}
47+
48+
private static void ReadStreamAndVerifyPdfHeaderSignature(Stream stream)
49+
{
50+
var readBuffer = new byte[5];
51+
var pdfSignature = Encoding.ASCII.GetBytes("%PDF-"); // PDF must start with %PDF-
52+
53+
stream.Read(readBuffer, 0, readBuffer.Length);
54+
readBuffer.Should().Equal(pdfSignature);
55+
}
56+
57+
protected void ValidateTargetAvailable(string file)
58+
{
59+
var path = GetOutFilePath(file);
60+
if (File.Exists(path))
61+
{
62+
File.Delete(path);
63+
}
64+
65+
Assert.False(File.Exists(path));
66+
}
67+
68+
protected string GetOutFilePath(string name)
69+
{
70+
return Path.Combine(_rootPath, _outputDirName, name);
71+
}
72+
}
73+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
using PdfSharpCore.Drawing;
2+
using PdfSharpCore.Drawing.Layout;
3+
using PdfSharpCore.Pdf;
4+
using PdfSharpCore.Test.Helpers;
5+
using PdfSharpCore.Test.IO;
6+
using Xunit;
7+
using Xunit.Abstractions;
8+
9+
namespace PdfSharpCore.Test
10+
{
11+
public class LargePDFReadWrite : IoBaseTest
12+
{
13+
private readonly ITestOutputHelper output;
14+
15+
public LargePDFReadWrite(ITestOutputHelper output)
16+
{
17+
this.output = output;
18+
}
19+
20+
[Fact(Skip = "Too slow for Unit test runner")]
21+
public void CanCreatePdfOver2gb()
22+
{
23+
const string outName = "CreateLargePdf.pdf";
24+
int pageCount = 70000; //2.1gb @ 369sec to create
25+
ValidateTargetAvailable(outName);
26+
27+
var document = new PdfDocument();
28+
var watch = new System.Diagnostics.Stopwatch();
29+
var font = new XFont("Arial", 10);
30+
31+
watch.Start();
32+
for (var i = 0; i < pageCount; i++)
33+
{
34+
AddAPage(document, font);
35+
}
36+
37+
watch.Stop();
38+
39+
SaveDocument(document, outName);
40+
output.WriteLine($"CreatePDF took {watch.Elapsed.TotalSeconds} sec");
41+
ValidateFileIsPdf(outName);
42+
CanReadPdf(outName);
43+
}
44+
45+
private void AddAPage(PdfDocument document, XFont font)
46+
{
47+
const int x = 40;
48+
const int y = 50;
49+
var page = document.AddPage();
50+
var renderer = XGraphics.FromPdfPage(page);
51+
var tf = new XTextFormatter(renderer);
52+
var width = page.Width.Value - 50 - x;
53+
var height = page.Height.Value - 50 - y;
54+
var rect = new XRect(40, 50, width, height);
55+
renderer.DrawRectangle(XBrushes.SeaShell, rect);
56+
tf.DrawString(TestData.LoremIpsumText, font, XBrushes.Black, rect, XStringFormats.TopLeft);
57+
}
58+
}
59+
}

PdfSharpCore/Pdf.Advanced/PdfReference.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ public PdfReference(PdfObject pdfObject)
7979
/// <summary>
8080
/// Initializes a new PdfReference instance from the specified object identifier and file position.
8181
/// </summary>
82-
public PdfReference(PdfObjectID objectID, int position)
82+
public PdfReference(PdfObjectID objectID, long position)
8383
{
8484
_objectID = objectID;
8585
_position = position;
@@ -152,12 +152,12 @@ public int GenerationNumber
152152
/// <summary>
153153
/// Gets or sets the file position of the related PdfObject.
154154
/// </summary>
155-
public int Position
155+
public long Position
156156
{
157157
get { return _position; }
158158
set { _position = value; }
159159
}
160-
int _position; // I know it should be long, but I have never seen a 2GB PDF file.
160+
long _position;
161161

162162
//public bool InUse
163163
//{

PdfSharpCore/Pdf.IO/Lexer.cs

Lines changed: 37 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
using PdfSharpCore.Internal;
3636
using PdfSharpCore.Pdf.Internal;
3737

38-
#pragma warning disable 1591
39-
4038
namespace PdfSharpCore.Pdf.IO
4139
{
4240
/// <summary>
@@ -54,15 +52,15 @@ public class Lexer
5452
public Lexer(Stream pdfInputStream)
5553
{
5654
_pdfSteam = pdfInputStream;
57-
_pdfLength = (int)_pdfSteam.Length;
55+
_pdfLength = _pdfSteam.Length;
5856
_idxChar = 0;
5957
Position = 0;
6058
}
6159

6260
/// <summary>
6361
/// Gets or sets the position within the PDF stream.
6462
/// </summary>
65-
public int Position
63+
public long Position
6664
{
6765
get { return _idxChar; }
6866
set
@@ -173,7 +171,7 @@ public Symbol ScanNextToken()
173171
/// </summary>
174172
public byte[] ReadStream(int length)
175173
{
176-
int pos;
174+
long pos;
177175

178176
// Skip illegal blanks behind «stream».
179177
while (_currChar == Chars.SP)
@@ -203,7 +201,7 @@ public byte[] ReadStream(int length)
203201
/// <summary>
204202
/// Reads a string in raw encoding.
205203
/// </summary>
206-
public String ReadRawString(int position, int length)
204+
public string ReadRawString(long position, int length)
207205
{
208206
_pdfSteam.Position = position;
209207
byte[] bytes = new byte[length];
@@ -303,23 +301,17 @@ public Symbol ScanNumber()
303301
long l = Int64.Parse(_token.ToString(), CultureInfo.InvariantCulture);
304302
if (l >= Int32.MinValue && l <= Int32.MaxValue)
305303
return Symbol.Integer;
306-
if (l > 0 && l <= UInt32.MaxValue)
307-
return Symbol.UInteger;
304+
if (l >= Int64.MinValue && l <= Int64.MaxValue)
305+
return Symbol.Long;
308306

309307
// Got an AutoCAD PDF file that contains this: /C 264584027963392
310308
// Best we can do is to convert it to real value.
311309
return Symbol.Real;
312-
//thr ow new PdfReaderException("Number exceeds integer range.");
313310
}
314311

315312
public Symbol ScanNumberOrReference()
316313
{
317314
Symbol result = ScanNumber();
318-
if (result == Symbol.Integer)
319-
{
320-
int pos = Position;
321-
string objectNumber = Token;
322-
}
323315
return result;
324316
}
325317

@@ -643,7 +635,7 @@ bool PeekReference()
643635
// A Reference has the form "nnn mmm R". The implementation of the the parser used a
644636
// reduce/shift algorithm in the first place. But this case is the only one we need to
645637
// look ahead 3 tokens.
646-
int positon = Position;
638+
var positon = Position;
647639

648640
// Skip digits.
649641
while (char.IsDigit(_currChar))
@@ -731,32 +723,32 @@ public char MoveToNonWhiteSpace()
731723
return _currChar;
732724
}
733725

734-
#if DEBUG
735-
public string SurroundingsOfCurrentPosition(bool hex)
736-
{
737-
const int range = 20;
738-
int start = Math.Max(Position - range, 0);
739-
int length = Math.Min(2 * range, PdfLength - start);
740-
long posOld = _pdfSteam.Position;
741-
_pdfSteam.Position = start;
742-
byte[] bytes = new byte[length];
743-
_pdfSteam.Read(bytes, 0, length);
744-
_pdfSteam.Position = posOld;
745-
string result = "";
746-
if (hex)
747-
{
748-
for (int idx = 0; idx < length; idx++)
749-
result += ((int)bytes[idx]).ToString("x2");
750-
//result += string.Format("{0:", (int) bytes[idx]);
751-
}
752-
else
753-
{
754-
for (int idx = 0; idx < length; idx++)
755-
result += (char)bytes[idx];
756-
}
757-
return result;
758-
}
759-
#endif
726+
// #if DEBUG
727+
// public string SurroundingsOfCurrentPosition(bool hex)
728+
// {
729+
// const int range = 20;
730+
// int start = Math.Max(Position - range, 0);
731+
// int length = Math.Min(2 * range, PdfLength - start);
732+
// long posOld = _pdfSteam.Position;
733+
// _pdfSteam.Position = start;
734+
// byte[] bytes = new byte[length];
735+
// _pdfSteam.Read(bytes, 0, length);
736+
// _pdfSteam.Position = posOld;
737+
// string result = "";
738+
// if (hex)
739+
// {
740+
// for (int idx = 0; idx < length; idx++)
741+
// result += ((int)bytes[idx]).ToString("x2");
742+
// //result += string.Format("{0:", (int) bytes[idx]);
743+
// }
744+
// else
745+
// {
746+
// for (int idx = 0; idx < length; idx++)
747+
// result += (char)bytes[idx];
748+
// }
749+
// return result;
750+
// }
751+
// #endif
760752

761753
/// <summary>
762754
/// Gets the current symbol.
@@ -810,6 +802,7 @@ public uint TokenToUInteger
810802
return uint.Parse(_token.ToString(), CultureInfo.InvariantCulture);
811803
}
812804
}
805+
public long TokenToLong => long.Parse(_token.ToString(), CultureInfo.InvariantCulture);
813806

814807
/// <summary>
815808
/// Interprets current token as real or integer literal.
@@ -876,13 +869,13 @@ internal static bool IsDelimiter(char ch)
876869
/// <summary>
877870
/// Gets the length of the PDF output.
878871
/// </summary>
879-
public int PdfLength
872+
public long PdfLength
880873
{
881874
get { return _pdfLength; }
882875
}
883876

884-
readonly int _pdfLength;
885-
int _idxChar;
877+
readonly long _pdfLength;
878+
long _idxChar;
886879
char _currChar;
887880
char _nextChar;
888881
StringBuilder _token;

0 commit comments

Comments
 (0)