Skip to content

Commit 96e8ca2

Browse files
committed
Fixed some bugs related to how duplicate filenames and duplicate files were handled. Plus some typo fixes.
1 parent 29df2d3 commit 96e8ca2

File tree

3 files changed

+95
-53
lines changed

3 files changed

+95
-53
lines changed

EaPdf/Helpers/Pdf/ITextSharpHelpers.cs

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,44 @@
11
using iTextSharp.text.pdf;
2-
using System;
3-
using System.Collections.Generic;
4-
using System.Linq;
5-
using System.Text;
6-
using System.Threading.Tasks;
72

83
namespace UIUCLibrary.EaPdf.Helpers.Pdf
94
{
105
public static class ITextSharpHelpers
116
{
7+
/// <summary>
8+
/// Make the names unique by appending a number in parentheses to the name if it is not unique.
9+
/// </summary>
10+
/// <param name="nameList"></param>
11+
/// <returns></returns>
12+
public static List<KeyValuePair<string, PdfIndirectReference>> MakeUniqueNames(List<KeyValuePair<string, PdfIndirectReference>> nameList)
13+
{
14+
Dictionary<string, int> stringCounts = new();
15+
16+
List<KeyValuePair<string, PdfIndirectReference>> result = new();
17+
18+
foreach (var kvp in nameList)
19+
{
20+
if (stringCounts.ContainsKey(kvp.Key))
21+
{
22+
stringCounts[kvp.Key]++;
23+
result.Add(new KeyValuePair<string, PdfIndirectReference>($"{kvp.Key} ({stringCounts[kvp.Key]})",kvp.Value));
24+
}
25+
else
26+
{
27+
stringCounts[kvp.Key] = 0;
28+
result.Add(kvp);
29+
}
30+
}
31+
32+
return result;
33+
}
34+
1235

36+
/// <summary>
37+
/// Compare two PdfIndirectReference objects for equality.
38+
/// </summary>
39+
/// <param name="indRef1"></param>
40+
/// <param name="indRef2"></param>
41+
/// <returns></returns>
1342
public static bool EqualsIndRef(this PdfIndirectReference indRef1, PdfIndirectReference indRef2)
1443
{
1544
if (indRef1 == null || indRef2 == null)

EaPdf/Helpers/Pdf/ITextSharpPdfEnhancer.cs

Lines changed: 60 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public class ITextSharpPdfEnhancer : IPdfEnhancer
3131

3232
/// <summary>
3333
/// Track references to Filespec dictionaries by checksum and messageId
34-
/// Assumes that the same message will not contain multiple attachments of the exact same file or checksum
34+
/// Assumes that the _same message_ will not contain multiple attachments of the exact same file or checksum
3535
/// </summary>
3636
private Dictionary<(string checksum, string messageId), PdfIndirectReference> FilespecsByCheckSum = new();
3737

@@ -205,7 +205,7 @@ private void CorrectForDummyPlaceholderFilespecs(List<(PdfDictionary? annotation
205205
throw new Exception("Real filespecs not found");
206206
}
207207

208-
//get the real EmbeddedFile stream and makler sure that each real filespec points to the same EmbeddedFile
208+
//get the real EmbeddedFile stream and make sure that each real filespec points to the same EmbeddedFile
209209
PdfIndirectReference? realFIndRef = null;
210210
PdfStream? realFStream = null;
211211
foreach (var (annotation, filespec, indRef) in realFileSpecs)
@@ -495,7 +495,9 @@ private void ReaddCatalogNamesEmbeddedFiles()
495495
/// <returns></returns>
496496
private PdfDictionary ConvertToNameTree(List<KeyValuePair<string, PdfIndirectReference>> nameList)
497497
{
498-
var sortedNameList = nameList.OrderBy(kv => kv.Key).ToList();
498+
var uniqNameList = ITextSharpHelpers.MakeUniqueNames(nameList);
499+
500+
var sortedNameList = uniqNameList.OrderBy(kv => kv.Key,StringComparer.Ordinal).ToList();
499501

500502
//Start with the most basic structure, a single Names dictionary at the root; this seems to be what FOP already uses regardless of the number of entries
501503
var names = new PdfArray();
@@ -524,7 +526,8 @@ private string GetNameTreeName(PdfDictionary fileSpec)
524526
var f = fileSpec.GetAsString(PdfName.F) ?? throw new Exception("Filespec F not found");
525527

526528
//Just use the filename as the name tree name; there might be duplicates which can cause problems for some viewers
527-
return f.ToString();
529+
//Some names might have file path components which should be removed
530+
return Path.GetFileName(f.ToString());
528531

529532
////Use the file extension and the checksum to create a unique name for the name tree
530533
////This avoids duplicates, but causes some viewers to display the wrong attachment files
@@ -558,60 +561,67 @@ private void AddFileAttachmentAnnots(List<EmbeddedFile> embeddedFiles, Dictionar
558561
{
559562
var embeddedFile = embeddedFileGrp.First();
560563

561-
var descs = embeddedFileGrp.Select(e => e.Description).ToList();
562-
563-
var objs = GetObjsFromNameTreeStartingWith(destsDict, "X_" + embeddedFile.Hash);
564-
if (objs.Count == 0) continue;
564+
var dests = GetObjsFromNameTreeStartingWith(destsDict, "X_" + embeddedFile.Hash);
565+
if (dests.Count == 0)
566+
{
567+
_logger.LogTrace($"AddFileAttachmentAnnots: No Dest found matching 'X_{embeddedFile.Hash}...'");
568+
continue;
569+
}
565570

566571
var annotFileSpecList = annotFileSpecDict[embeddedFileGrp.Key];
572+
if (annotFileSpecList.Count == 0)
573+
{
574+
throw new Exception("AddFileAttachmentAnnots: There should be a filespec for every embedded file.");
575+
}
576+
577+
if(annotFileSpecList.Count != dests.Count)
578+
{
579+
throw new Exception("AddFileAttachmentAnnots: The number of matching links should match the number of file specs.");
580+
}
581+
582+
int objNum = 0;
567583
foreach (var annotFileSpec in annotFileSpecList) //There can be multiple annotations pointing to the same embedded file
568584
{
585+
var fileSpecIndRef = annotFileSpec.indRef;
569586

570-
for (int objNum = 0; objNum < objs.Count; objNum++)
587+
var (dest, destIndRef) = dests[objNum];
588+
var linkAnnots = linkAnnotations[destIndRef.ToString()];
589+
foreach (var linkAnnot in linkAnnots)
571590
{
572-
573-
var fileSpecIndRef = annotFileSpec.indRef;
574-
if (objNum > 0)
575-
{
576-
//if there are multiple annotations pointing to the same embedded file, create a new filespec for each annotation
577-
var embeddedFileIndRef = annotFileSpec.filespec.GetAsDict(PdfName.EF).GetAsIndirectObject(PdfName.F);
578-
fileSpecIndRef = AddFilespec(embeddedFileGrp.ElementAt(objNum), embeddedFileIndRef);
579-
}
580-
581-
var (obj, indRef) = objs[objNum];
582-
var linkAnnots = linkAnnotations[indRef.ToString()];
583-
foreach (var linkAnnot in linkAnnots)
584-
{
585-
var d = new PdfDate();
586-
587-
//convert the link annotation into a file attachment annotation
588-
linkAnnot.Put(PdfName.Subtype, PdfName.Fileattachment);
589-
linkAnnot.Put(PdfName.Name, new PdfName("Paperclip"));
590-
linkAnnot.Put(PdfName.Nm, new PdfString(embeddedFileGrp.Key, PdfObject.TEXT_UNICODE));
591-
linkAnnot.Put(PdfName.Contents, new PdfString(embeddedFileGrp.ElementAt(objNum).Description, PdfObject.TEXT_UNICODE));
592-
linkAnnot.Put(PdfName.T, new PdfString($"{System.Reflection.Assembly.GetExecutingAssembly().GetName().Name} {System.Reflection.Assembly.GetExecutingAssembly().GetName().Version}", PdfObject.TEXT_UNICODE));
593-
linkAnnot.Put(PdfName.Creationdate, d);
594-
linkAnnot.Put(PdfName.M, d);
595-
linkAnnot.Put(PdfName.Fs, fileSpecIndRef);
596-
597-
//to ensure PDF/A-3 compliance, the file attachment annotation must be listed in an AF array somewhere in the document
598-
var af = new PdfArray();
599-
af.Add(fileSpecIndRef);
600-
linkAnnot.Put(new PdfName("AF"), af);
601-
602-
var ap = new PdfDictionary();
603-
ap.Put(PdfName.N, annotAppearanceStream.IndirectReference);
604-
linkAnnot.Put(PdfName.Ap, ap);
605-
linkAnnot.Remove(PdfName.A);
606-
linkAnnot.Remove(PdfName.H);
607-
linkAnnot.Remove(PdfName.Structparent);
608-
}
591+
UpdateLinkAnnot(linkAnnot, embeddedFileGrp.Key, embeddedFileGrp.ElementAt(objNum).Description, fileSpecIndRef, annotAppearanceStream);
609592
}
593+
objNum++;
610594
}
611-
612595
}
613596
}
614597

598+
private void UpdateLinkAnnot(PdfDictionary linkAnnot, string nm, string desc, PdfIndirectReference fileSpecIndRef, PdfIndirectObject annotAppearanceStream)
599+
{
600+
var d = new PdfDate();
601+
602+
//convert the link annotation into a file attachment annotation
603+
linkAnnot.Put(PdfName.Subtype, PdfName.Fileattachment);
604+
linkAnnot.Put(PdfName.Name, new PdfName("Paperclip"));
605+
linkAnnot.Put(PdfName.Nm, new PdfString(nm, PdfObject.TEXT_UNICODE));
606+
linkAnnot.Put(PdfName.Contents, new PdfString(desc, PdfObject.TEXT_UNICODE));
607+
linkAnnot.Put(PdfName.T, new PdfString($"{System.Reflection.Assembly.GetExecutingAssembly().GetName().Name} {System.Reflection.Assembly.GetExecutingAssembly().GetName().Version}", PdfObject.TEXT_UNICODE));
608+
linkAnnot.Put(PdfName.Creationdate, d);
609+
linkAnnot.Put(PdfName.M, d);
610+
linkAnnot.Put(PdfName.Fs, fileSpecIndRef);
611+
612+
//to ensure PDF/A-3 compliance, the file attachment annotation must be listed in an AF array somewhere in the document
613+
var af = new PdfArray();
614+
af.Add(fileSpecIndRef);
615+
linkAnnot.Put(new PdfName("AF"), af);
616+
617+
var ap = new PdfDictionary();
618+
ap.Put(PdfName.N, annotAppearanceStream.IndirectReference);
619+
linkAnnot.Put(PdfName.Ap, ap);
620+
linkAnnot.Remove(PdfName.A);
621+
linkAnnot.Remove(PdfName.H);
622+
linkAnnot.Remove(PdfName.Structparent);
623+
}
624+
615625
private PdfIndirectObject AddAnnotAppearanceStream()
616626
{
617627
_logger.LogTrace("ITextSharpPdfEnhancer: AddAnnotAppearanceStream");
@@ -1292,6 +1302,9 @@ protected virtual void Dispose(bool disposing)
12921302
}
12931303
}
12941304

1305+
/// <summary>
1306+
/// Cleanup and close the PdfReader and PdfStamper objects
1307+
/// </summary>
12951308
private void CloseAndDispose()
12961309
{
12971310
_reader.RemoveUnusedObjects(); //this gets rid of orphaned XMP metadata objects, maybe among others

EaPdf/Helpers/Pdf/XepToPdfTransformer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ public int Transform(string sourceFoFilePath, string outputPdfFilePath, string?
8282

8383
//Get rid of superfluous or misleading messages
8484

85-
messages.RemoveAll(m => m.message.StartsWith("[warning] PDF Version 1.4 doesn't support Tab Order")); //this is corrected in post-processing, so ne need for the warning
85+
messages.RemoveAll(m => m.message.StartsWith("[warning] PDF Version 1.4 doesn't support Tab Order")); //this is corrected in post-processing, so no need for the warning
8686

8787
List<(LogLevel level, string message)> ret = new();
8888

0 commit comments

Comments
 (0)