Skip to content

Commit e67ee35

Browse files
author
Joris Schellekens
committed
correct off-by-one mistake in RegexBasedLocationExtractionStrategy
add tests for RegexBasedLocationExtractionStrategy DEVSIX-1816
1 parent 1c8b16b commit e67ee35

File tree

3 files changed

+57
-1
lines changed

3 files changed

+57
-1
lines changed

kernel/src/main/java/com/itextpdf/kernel/pdf/canvas/parser/listener/RegexBasedLocationExtractionStrategy.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public Collection<IPdfTextLocation> getResultantLocations() {
8787
Matcher mat = pattern.matcher(txt.text);
8888
while (mat.find()) {
8989
int startIndex = txt.indexMap.get(mat.start());
90-
int endIndex = txt.indexMap.get(mat.end());
90+
int endIndex = txt.indexMap.get(mat.end() - 1);
9191
for (Rectangle r : toRectangles(parseResult.subList(startIndex, endIndex))) {
9292
retval.add(new DefaultPdfTextLocation(0, r, mat.group(0)));
9393
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package com.itextpdf.kernel.pdf.canvas.parser.listener;
2+
3+
import com.itextpdf.kernel.pdf.PdfDocument;
4+
import com.itextpdf.kernel.pdf.PdfReader;
5+
import com.itextpdf.kernel.pdf.canvas.parser.PdfCanvasProcessor;
6+
import com.itextpdf.test.ExtendedITextTest;
7+
import org.junit.Assert;
8+
import org.junit.Test;
9+
10+
import java.io.File;
11+
import java.io.IOException;
12+
import java.util.ArrayList;
13+
import java.util.Collection;
14+
import java.util.Iterator;
15+
import java.util.List;
16+
import java.util.regex.Pattern;
17+
18+
public class RegexBasedLocationExtractionStrategyTest extends ExtendedITextTest {
19+
20+
private static final String sourceFolder = "./src/test/resources/com/itextpdf/kernel/parser/RegexBasedLocationExtractionStrategyTest/";
21+
22+
@Test
23+
public void test01() throws IOException {
24+
System.out.println(new File(sourceFolder).getAbsolutePath());
25+
26+
PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "in01.pdf"));
27+
28+
// build strategy
29+
RegexBasedLocationExtractionStrategy extractionStrategy = new RegexBasedLocationExtractionStrategy(Pattern.compile("\\{\\{Signature\\}\\}"));
30+
31+
// get locations
32+
List<IPdfTextLocation> locationList = new ArrayList<>();
33+
for (int x = 1; x <= pdfDocument.getNumberOfPages(); x++) {
34+
new PdfCanvasProcessor(extractionStrategy).processPageContent(pdfDocument.getPage(x));
35+
for(IPdfTextLocation location : extractionStrategy.getResultantLocations()) {
36+
if(location != null) {
37+
locationList.add(location);
38+
}
39+
}
40+
}
41+
42+
// compare
43+
Assert.assertEquals(locationList.size(), 1);
44+
45+
IPdfTextLocation loc = locationList.get(0);
46+
47+
Assert.assertEquals(loc.getText(), "{{Signature}}");
48+
Assert.assertEquals(23, (int) loc.getRectangle().getX());
49+
Assert.assertEquals(375, (int) loc.getRectangle().getY());
50+
Assert.assertEquals(52, (int) loc.getRectangle().getWidth());
51+
Assert.assertEquals(11, (int) loc.getRectangle().getHeight());
52+
53+
// close
54+
pdfDocument.close();
55+
}
56+
}

0 commit comments

Comments
 (0)