Skip to content

Commit ef8b0fa

Browse files
committed
PDFBOX-6047: add text extraction options submenu
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1927521 13f79535-47bb-0310-9956-ffa450edef68
1 parent 296cf4b commit ef8b0fa

File tree

3 files changed

+79
-1
lines changed

3 files changed

+79
-1
lines changed

debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.apache.pdfbox.debugger.ui.ImageTypeMenu;
7070
import org.apache.pdfbox.debugger.ui.RenderDestinationMenu;
7171
import org.apache.pdfbox.debugger.ui.TextDialog;
72+
import org.apache.pdfbox.debugger.ui.TextStripperMenu;
7273
import org.apache.pdfbox.pdmodel.common.PDRectangle;
7374
import org.apache.pdfbox.pdmodel.fixup.AcroFormDefaultFixup;
7475
import org.apache.pdfbox.pdmodel.fixup.PDDocumentFixup;
@@ -215,7 +216,7 @@ private void collectFieldLocations() throws IOException
215216
// (checking widget.getPage() also works, but it is sometimes null)
216217
if (dictionarySet.contains(widget.getCOSObject()) && widget.getRectangle() != null)
217218
{
218-
rectMap.put(widget.getRectangle(), "Field name: " + field.getFullyQualifiedName());
219+
rectMap.put(widget.getRectangle(), "Field name: " + field.getFullyQualifiedName() + ", value: " + field.getValueAsString());
219220
}
220221
}
221222
}
@@ -310,6 +311,8 @@ private void startExtracting()
310311
PDFTextStripper stripper = new PDFTextStripper();
311312
stripper.setStartPage(pageIndex + 1);
312313
stripper.setEndPage(pageIndex + 1);
314+
stripper.setSortByPosition(TextStripperMenu.isSorted());
315+
stripper.setIgnoreContentStreamSpaceGlyphs(TextStripperMenu.isIgnoreSpaces());
313316
textDialog.setText(stripper.getText(document));
314317
}
315318
catch (IOException ex)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.pdfbox.debugger.ui;
18+
19+
import javax.swing.JCheckBoxMenuItem;
20+
import javax.swing.JMenu;
21+
22+
/**
23+
*
24+
* @author Tilman Hausherr
25+
*/
26+
public class TextStripperMenu extends MenuBase
27+
{
28+
private static TextStripperMenu instance;
29+
private static JCheckBoxMenuItem sortOptionMenuItem;
30+
private static JCheckBoxMenuItem ignoreSpacesOptionMenuItem;
31+
32+
/**
33+
* Constructor.
34+
*/
35+
private TextStripperMenu()
36+
{
37+
JMenu menu = new JMenu("Text extraction options");
38+
39+
sortOptionMenuItem = new JCheckBoxMenuItem("sort");
40+
menu.add(sortOptionMenuItem);
41+
42+
ignoreSpacesOptionMenuItem = new JCheckBoxMenuItem("ignore spaces");
43+
menu.add(ignoreSpacesOptionMenuItem);
44+
45+
setMenu(menu);
46+
}
47+
48+
/**
49+
* Provides the TextStripperMenu instance.
50+
*
51+
* @return TextStripperMenu instance.
52+
*/
53+
public static TextStripperMenu getInstance()
54+
{
55+
if (instance == null)
56+
{
57+
instance = new TextStripperMenu();
58+
}
59+
return instance;
60+
}
61+
62+
public static boolean isSorted()
63+
{
64+
return sortOptionMenuItem.isSelected();
65+
}
66+
67+
public static boolean isIgnoreSpaces()
68+
{
69+
return ignoreSpacesOptionMenuItem.isSelected();
70+
}
71+
}

debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,10 @@ private JMenu createViewMenu()
229229
extractTextMenuItem.setEnabled(false);
230230
viewMenu.add(extractTextMenuItem);
231231

232+
TextStripperMenu textStripperMenu = TextStripperMenu.getInstance();
233+
textStripperMenu.setEnableMenu(false);
234+
viewMenu.add(textStripperMenu.getMenu());
235+
232236
viewMenu.addSeparator();
233237

234238
repairAcroFormMenuItem = new JCheckBoxMenuItem(REPAIR_ACROFORM);

0 commit comments

Comments
 (0)