Skip to content

Commit f4e8035

Browse files
committed
Popup warning for training with LSTM or WordStr box files
1 parent 9de0ac0 commit f4e8035

File tree

6 files changed

+14
-14
lines changed

6 files changed

+14
-14
lines changed

src/net/sourceforge/tessboxeditor/Gui.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@
3434
import java.awt.image.BufferedImage;
3535
import java.io.*;
3636
import java.nio.charset.StandardCharsets;
37-
import java.nio.file.Files;
38-
import java.nio.file.Paths;
3937
import java.text.*;
4038
import java.util.*;
4139
import java.util.logging.Level;
@@ -1583,7 +1581,7 @@ void loadBoxes(File boxFile) {
15831581
try {
15841582
boxPages.clear();
15851583

1586-
String str = readBoxFile(boxFile);
1584+
String str = Utils.readTextFile(boxFile);
15871585
// load into textarea
15881586
this.jTextAreaBoxData.setText(str);
15891587
boxPages = parseBoxString(str, imageList);
@@ -1607,10 +1605,6 @@ void loadBoxes(File boxFile) {
16071605
}
16081606
}
16091607

1610-
String readBoxFile(File boxFile) throws IOException {
1611-
return new String(Files.readAllBytes(Paths.get(boxFile.getPath())), StandardCharsets.UTF_8);
1612-
}
1613-
16141608
List<TessBoxCollection> parseBoxString(String boxStr, List<BufferedImage> imageList) throws IOException {
16151609
List<TessBoxCollection> allBoxPages = new ArrayList<TessBoxCollection>();
16161610
isWordStrFormat = false;

src/net/sourceforge/tessboxeditor/GuiWithEdit.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import static net.sourceforge.tessboxeditor.Gui.prefs;
4444
import net.sourceforge.tessboxeditor.datamodel.TessBox;
4545
import net.sourceforge.tessboxeditor.datamodel.TessBoxCollection;
46+
import net.sourceforge.vietocr.util.Utils;
4647

4748
public class GuiWithEdit extends GuiWithMRU implements PropertyChangeListener {
4849

@@ -361,7 +362,7 @@ protected Void doInBackground() throws Exception {
361362
}
362363

363364
List<BufferedImage> imageList = ImageIOHelper.getImageList(imageFile);
364-
String str = readBoxFile(boxFile);
365+
String str = Utils.readTextFile(boxFile);
365366
List<TessBoxCollection> boxPages = parseBoxString(str, imageList);
366367
performSegment(imageList, boxPages, instance);
367368

src/net/sourceforge/tessboxeditor/TessTrainer.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -426,11 +426,16 @@ public boolean accept(File dir, String filename) {
426426
*
427427
* @return
428428
*/
429-
String[] getImageFilesWithBox() {
429+
String[] getImageFilesWithBox() throws IOException {
430430
List<String> filesWithBox = new ArrayList<String>();
431431
for (String file : getImageFiles()) {
432432
String withoutExt = TextUtilities.stripExtension(file);
433-
if (new File(inputDataDir, withoutExt + ".box").exists()) {
433+
File boxFile = new File(inputDataDir, withoutExt + ".box");
434+
if (boxFile.exists()) {
435+
String str = Utils.readTextFile(boxFile);
436+
if (str.contains("WordStr") || str.contains("\t")) {
437+
throw new RuntimeException("Cannot train with LSTM or WordStr box files.\nTraining for Tesseract 4.0x is not supported.");
438+
}
434439
filesWithBox.add(file);
435440
}
436441
}

src/net/sourceforge/tessboxeditor/config.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
33
<properties>
44
<comment>Configuration File</comment>
5-
<entry key="ReleaseDate">2020/3/6</entry>
5+
<entry key="ReleaseDate">2020/3/21</entry>
66
<entry key="Version">v2.3.0</entry>
77
</properties>

src/net/sourceforge/vietocr/util/Utils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,9 @@ public static String join(Collection<?> s, String delimiter) {
137137
*
138138
* @param textFile
139139
* @return
140-
* @throws Exception
140+
* @throws IOException
141141
*/
142-
public static String readTextFile(File textFile) throws Exception {
142+
public static String readTextFile(File textFile) throws IOException {
143143
return new String(Files.readAllBytes(textFile.toPath()), StandardCharsets.UTF_8); // Java 7 API
144144
}
145145

versionchanges.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ Version 2.2.1 (25 August 2019)
135135
Version 2.2.2 (8 October 2019)
136136
- Update dependencies
137137

138-
Version 2.3.0 (6 March 2020)
138+
Version 2.3.0 (21 March 2020)
139139
- Support LSTM & WordStr box format
140140
- Support reordering boxes through table row drag-and-drop
141141
- Fix column alignment

0 commit comments

Comments
 (0)