Skip to content

Commit febfa4d

Browse files
theolivenbaumBobLd
authored andcommitted
Fix usage of List.Contains
1 parent 0ebbe05 commit febfa4d

File tree

3 files changed

+65
-9
lines changed

3 files changed

+65
-9
lines changed

src/UglyToad.PdfPig.DocumentLayoutAnalysis/DecorationTextBlockClassifier.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -128,13 +128,13 @@ public static IReadOnlyList<IReadOnlyList<TextBlock>> Get(IReadOnlyList<IReadOnl
128128
throw new ArgumentException("The algorithm cannot be used with a document of less than 2 pages.", nameof(pagesTextBlocks));
129129
}
130130

131-
ConcurrentDictionary<int, List<TextBlock>> pageDecorations = new ConcurrentDictionary<int, List<TextBlock>>();
131+
ConcurrentDictionary<int, OrderedSet<TextBlock>> pageDecorations = new ConcurrentDictionary<int, OrderedSet<TextBlock>>();
132132

133133
ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism };
134134

135135
Parallel.For(0, pagesTextBlocks.Count, parallelOptions, p =>
136136
{
137-
if (!pageDecorations.TryAdd(p, new List<TextBlock>()))
137+
if (!pageDecorations.TryAdd(p, new OrderedSet<TextBlock>()))
138138
{
139139
throw new ArgumentException("Cannot add element with index " + p + " in ConcurrentDictionary.");
140140
}
@@ -165,7 +165,7 @@ public static IReadOnlyList<IReadOnlyList<TextBlock>> Get(IReadOnlyList<IReadOnl
165165
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
166166
if (score >= similarityThreshold)
167167
{
168-
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current);
168+
pageDecorations[p].TryAdd(current);
169169
}
170170
}
171171

@@ -180,7 +180,7 @@ public static IReadOnlyList<IReadOnlyList<TextBlock>> Get(IReadOnlyList<IReadOnl
180180
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
181181
if (score >= similarityThreshold)
182182
{
183-
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current);
183+
pageDecorations[p].TryAdd(current);
184184
}
185185
}
186186

@@ -195,7 +195,7 @@ public static IReadOnlyList<IReadOnlyList<TextBlock>> Get(IReadOnlyList<IReadOnl
195195
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
196196
if (score >= similarityThreshold)
197197
{
198-
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current);
198+
pageDecorations[p].TryAdd(current);
199199
}
200200
}
201201

@@ -210,12 +210,12 @@ public static IReadOnlyList<IReadOnlyList<TextBlock>> Get(IReadOnlyList<IReadOnl
210210
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
211211
if (score >= similarityThreshold)
212212
{
213-
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current);
213+
pageDecorations[p].TryAdd(current);
214214
}
215215
}
216216
});
217217

218-
return pageDecorations.OrderBy(x => x.Key).Select(x => x.Value).ToList();
218+
return pageDecorations.OrderBy(x => x.Key).Select(x => x.Value.GetList()).ToList();
219219
}
220220

221221
/// <summary>
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
namespace UglyToad.PdfPig.DocumentLayoutAnalysis
2+
{
3+
using System;
4+
using System.Collections;
5+
using System.Collections.Generic;
6+
using System.Text;
7+
8+
internal class OrderedSet<T>
9+
{
10+
private readonly HashSet<T> _set;
11+
private readonly List<T> _list;
12+
13+
public OrderedSet() : this(EqualityComparer<T>.Default)
14+
{
15+
16+
}
17+
18+
public OrderedSet(IEqualityComparer<T> comparer)
19+
{
20+
_set = new HashSet<T>(comparer);
21+
_list = new List<T>();
22+
}
23+
24+
public int Count => _set.Count;
25+
26+
public bool TryAdd(T item)
27+
{
28+
if (_set.Contains(item)) return false;
29+
30+
_list.Add(item);
31+
_set.Add(item);
32+
33+
return true;
34+
}
35+
36+
public void Clear()
37+
{
38+
_list.Clear();
39+
_set.Clear();
40+
}
41+
42+
public bool Contains(T item)
43+
{
44+
return item is not null && _set.Contains(item);
45+
}
46+
47+
public void CopyTo(T[] array, int arrayIndex)
48+
{
49+
_list.CopyTo(array, arrayIndex);
50+
}
51+
public List<T> GetList()
52+
{
53+
return _list;
54+
}
55+
}
56+
}

src/UglyToad.PdfPig/PdfExtensions.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,12 @@ private static double GetEstimatedSizeMultiplier(IFilter filter)
122122
/// Returns an equivalent token where any indirect references of child objects are
123123
/// recursively traversed and resolved.
124124
/// </summary>
125-
internal static T? Resolve<T>(this T? token, IPdfTokenScanner scanner, List<IndirectReference>? visited = null) where T : IToken
125+
internal static T? Resolve<T>(this T? token, IPdfTokenScanner scanner, HashSet<IndirectReference>? visited = null) where T : IToken
126126
{
127127
return (T?)ResolveInternal(token, scanner, visited ?? []);
128128
}
129129

130-
private static IToken? ResolveInternal(this IToken? token, IPdfTokenScanner scanner, List<IndirectReference> visited)
130+
private static IToken? ResolveInternal(this IToken? token, IPdfTokenScanner scanner, HashSet<IndirectReference> visited)
131131
{
132132
if (token is StreamToken stream)
133133
{

0 commit comments

Comments
 (0)