@@ -66,40 +66,34 @@ public RegexBasedLocationExtractionStrategy(Regex pattern) {
6666
6767 public virtual ICollection < IPdfTextLocation > GetResultantLocations ( ) {
6868 // align characters in "logical" order
69- JavaCollectionsUtil . Sort ( parseResult , new TextChunkLocationBasedComparator ( new DefaultTextChunkLocationComparator ( ) ) ) ;
69+ JavaCollectionsUtil . Sort ( parseResult , new TextChunkLocationBasedComparator ( new DefaultTextChunkLocationComparator
70+ ( ) ) ) ;
7071 // process parse results
7172 IList < IPdfTextLocation > retval = new List < IPdfTextLocation > ( ) ;
7273 CharacterRenderInfo . StringConversionInfo txt = CharacterRenderInfo . MapString ( parseResult ) ;
73- Match mat = iText . IO . Util . StringUtil . Match ( pattern , txt . text ) ;
74- while ( mat . Success )
75- {
76- int ? startIndex = GetStartIndex ( txt . indexMap , mat . Index , txt . text ) ;
77- int ? endIndex = GetEndIndex ( txt . indexMap , mat . Index + mat . Length - 1 ) ;
78- if ( startIndex != null && endIndex != null && startIndex <= endIndex )
79- {
80- foreach ( Rectangle r in ToRectangles ( parseResult . SubList ( startIndex . Value , endIndex . Value + 1 ) ) )
81- {
82- retval . Add ( new DefaultPdfTextLocation ( 0 , r , iText . IO . Util . StringUtil . Group ( mat , 0 ) ) ) ;
74+ Matcher mat = iText . IO . Util . Matcher . Match ( pattern , txt . text ) ;
75+ while ( mat . Find ( ) ) {
76+ int ? startIndex = GetStartIndex ( txt . indexMap , mat . Start ( ) , txt . text ) ;
77+ int ? endIndex = GetEndIndex ( txt . indexMap , mat . End ( ) - 1 ) ;
78+ if ( startIndex != null && endIndex != null && startIndex <= endIndex ) {
79+ foreach ( Rectangle r in ToRectangles ( parseResult . SubList ( startIndex . Value , endIndex . Value + 1 ) ) ) {
80+ retval . Add ( new DefaultPdfTextLocation ( 0 , r , mat . Group ( 0 ) ) ) ;
8381 }
8482 }
85-
86- mat = mat . NextMatch ( ) ;
8783 }
8884 /* sort
8985 * even though the return type is Collection<Rectangle>, we apply a sorting algorithm here
9086 * This is to ensure that tests that use this functionality (for instance to generate pdf with
9187 * areas of interest highlighted) will not break when compared.
9288 */
93- JavaCollectionsUtil . Sort ( retval , new _IComparer_54 ( ) ) ;
94-
89+ JavaCollectionsUtil . Sort ( retval , new _IComparer_103 ( ) ) ;
9590 // ligatures can produces same rectangle
96- removeDuplicates ( retval ) ;
97-
91+ RemoveDuplicates ( retval ) ;
9892 return retval ;
9993 }
10094
101- private sealed class _IComparer_54 : IComparer < IPdfTextLocation > {
102- public _IComparer_54 ( ) {
95+ private sealed class _IComparer_103 : IComparer < IPdfTextLocation > {
96+ public _IComparer_103 ( ) {
10397 }
10498
10599 public int Compare ( IPdfTextLocation l1 , IPdfTextLocation l2 ) {
@@ -113,8 +107,8 @@ public int Compare(IPdfTextLocation l1, IPdfTextLocation l2) {
113107 }
114108 }
115109 }
116-
117- private void removeDuplicates ( IList < IPdfTextLocation > sortedList ) {
110+
111+ private void RemoveDuplicates ( IList < IPdfTextLocation > sortedList ) {
118112 IPdfTextLocation lastItem = null ;
119113 int orgSize = sortedList . Count ;
120114 for ( int i = orgSize - 1 ; i >= 0 ; i -- ) {
@@ -127,7 +121,6 @@ private void removeDuplicates(IList<IPdfTextLocation> sortedList) {
127121 }
128122 }
129123
130-
131124 public virtual void EventOccurred ( IEventData data , EventType type ) {
132125 if ( data is TextRenderInfo ) {
133126 parseResult . AddAll ( ToCRI ( ( TextRenderInfo ) data ) ) ;
@@ -140,30 +133,39 @@ public virtual ICollection<EventType> GetSupportedEvents() {
140133
141134 /// <summary>
142135 /// Convert
143- /// <c> TextRenderInfo</c >
136+ /// <see cref="iText.Kernel.Pdf.Canvas.Parser.Data. TextRenderInfo"/ >
144137 /// to
145- /// <c>CharacterRenderInfo</c>
138+ /// <see cref="CharacterRenderInfo"/>
139+ /// This method is public and not final so that custom implementations can choose to override it.
140+ /// </summary>
141+ /// <remarks>
142+ /// Convert
143+ /// <see cref="iText.Kernel.Pdf.Canvas.Parser.Data.TextRenderInfo"/>
144+ /// to
145+ /// <see cref="CharacterRenderInfo"/>
146146 /// This method is public and not final so that custom implementations can choose to override it.
147147 /// Other implementations of
148148 /// <c>CharacterRenderInfo</c>
149149 /// may choose to store different properties than
150150 /// merely the
151- /// <c> Rectangle</c >
151+ /// <see cref="iText.Kernel.Geom. Rectangle"/ >
152152 /// describing the bounding box. E.g. a custom implementation might choose to
153153 /// store
154- /// <c> Color</c >
154+ /// <see cref="iText.Kernel.Colors. Color"/ >
155155 /// information as well, to better match the content surrounding the redaction
156- /// <c>Rectangle</c>
157- /// .
158- /// </summary>
156+ /// <see cref="iText.Kernel.Geom.Rectangle"/>.
157+ /// </remarks>
159158 /// <param name="tri">
160- /// <see cref="TextRenderInfo"/>
159+ ///
160+ /// <see cref="iText.Kernel.Pdf.Canvas.Parser.Data.TextRenderInfo"/>
161161 /// object
162162 /// </param>
163- /// <returns>a list of
164- /// <see cref="CharacterRenderInfo"/>s
165- /// which represents the passed
166- /// <see cref="TextRenderInfo"/>
163+ /// <returns>
164+ /// a list of
165+ /// <see cref="CharacterRenderInfo"/>
166+ /// s which represents the passed
167+ /// <see cref="iText.Kernel.Pdf.Canvas.Parser.Data.TextRenderInfo"/>
168+ /// ?
167169 /// </returns>
168170 protected internal virtual IList < CharacterRenderInfo > ToCRI ( TextRenderInfo tri ) {
169171 IList < CharacterRenderInfo > cris = new List < CharacterRenderInfo > ( ) ;
@@ -175,28 +177,37 @@ protected internal virtual IList<CharacterRenderInfo> ToCRI(TextRenderInfo tri)
175177
176178 /// <summary>
177179 /// Converts
178- /// <c> CharacterRenderInfo</c >
180+ /// <see cref=" CharacterRenderInfo"/ >
179181 /// objects to
180- /// <c>Rectangles</c>
182+ /// <see cref="iText.Kernel.Geom.Rectangle"/>
183+ /// s
184+ /// This method is protected and not final so that custom implementations can choose to override it.
185+ /// </summary>
186+ /// <remarks>
187+ /// Converts
188+ /// <see cref="CharacterRenderInfo"/>
189+ /// objects to
190+ /// <see cref="iText.Kernel.Geom.Rectangle"/>
191+ /// s
181192 /// This method is protected and not final so that custom implementations can choose to override it.
182193 /// E.g. other implementations may choose to add padding/margin to the Rectangles.
183194 /// This method also offers a convenient access point to the mapping of
184- /// <c> CharacterRenderInfo</c >
195+ /// <see cref=" CharacterRenderInfo"/ >
185196 /// to
186- /// <c>Rectangle</c>
187- /// .
197+ /// <see cref="iText.Kernel.Geom.Rectangle"/>.
188198 /// This mapping enables (custom implementations) to match color of text in redacted Rectangles,
189199 /// or match color of background, by the mere virtue of offering access to the
190- /// <c> CharacterRenderInfo</c >
200+ /// <see cref=" CharacterRenderInfo"/ >
191201 /// objects
192202 /// that generated the
193- /// <c> Rectangle</c>
194- /// .
195- /// </summary >
196- /// <param name="cris"> list of
203+ /// <see cref="iText.Kernel.Geom. Rectangle"/>.
204+ /// </remarks>
205+ /// <param name="cris" >
206+ /// list of
197207 /// <see cref="CharacterRenderInfo"/>
198- /// objects</param>
199- /// <returns>an array containing elements of this list</returns>
208+ /// objects
209+ /// </param>
210+ /// <returns>an array containing the elements of this list</returns>
200211 protected internal virtual IList < Rectangle > ToRectangles ( IList < CharacterRenderInfo > cris ) {
201212 IList < Rectangle > retval = new List < Rectangle > ( ) ;
202213 if ( cris . IsEmpty ( ) ) {
@@ -214,13 +225,12 @@ protected internal virtual IList<Rectangle> ToRectangles(IList<CharacterRenderIn
214225 resultRectangle = Rectangle . GetCommonRectangle ( resultRectangle , cri . GetBoundingBox ( ) ) ;
215226 }
216227 retval . Add ( resultRectangle ) ;
217-
218228 prev = curr ;
219229 }
220230 // return
221231 return retval ;
222232 }
223-
233+
224234 private static int ? GetStartIndex ( IDictionary < int , int ? > indexMap , int index , String txt ) {
225235 while ( ! indexMap . ContainsKey ( index ) && index < txt . Length ) {
226236 index ++ ;
0 commit comments