Skip to content

Commit 5e082ae

Browse files
committed
changed id for arendt specific directory walk
1 parent 00df9e1 commit 5e082ae

File tree

3 files changed

+3
-12
lines changed

3 files changed

+3
-12
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<modelVersion>4.0.0</modelVersion>
44
<groupId>de.sub.goettingen.arendt</groupId>
55
<artifactId>ocr2tei</artifactId>
6-
<version>1.1-SNAPSHOT</version>
6+
<version>1.1.1-SNAPSHOT</version>
77
<name>OCR2XML</name>
88
<dependencies>
99
<dependency>

src/main/java/de/sub/goettingen/arendt/ocrmapping/Cli.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
173173
// with file IV-NUMBER_TEI_Originalfilename.xml
174174
if (Cli.commandDir.specificWalk) {
175175
String newTEIDirname = file.getParent().getFileName().toString().replace("_xml", "_TEI");
176-
documentId = "Volume-"+newTEIDirname;
176+
documentId = newTEIDirname.substring(0, newTEIDirname.length()-4);
177177
Path newTEIDir= file.getParent().resolveSibling(newTEIDirname);
178178

179179
String teiFilename = newTEIDirname +"_"+ file.getFileName().toString().replace("ocr_", "");

src/main/java/de/sub/goettingen/arendt/ocrmapping/ReadXML.java

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,7 @@ public static void main(String args[]) throws Exception {
5959
//TEI tei = convert2Tei("/tmp/ocr_1504081850380.xml", true);
6060
TEI tei = convert2Tei("/Users/Johannes/Documents/Arendt/oc2tei/cloud/Hannah-Arendt/Scans/Band-XIV-NL/XIV-NL-02/ocr-03/Arendt-XIV-NL-02-03_xml/ocr_1504604413610.xml",
6161
true, "Lokaltest", true);
62-
// distinct-values(//formatting/@lang)
63-
//https://stackoverflow.com/questions/8607464/how-to-count-each-instance-of-distinct-values-in-xquery
64-
/*
65-
*
66-
for $v in distinct-values(//formatting/@lang)
67-
return
68-
($v, count(index-of(//formatting/@lang, $v)))
6962

70-
http://www.baeldung.com/java-xpath
71-
*/
7263
OutputXML.writeXMLOutputToStdout(tei);
7364
}
7465

@@ -116,7 +107,7 @@ public static TEI convert2Tei (String inputFilePath,
116107
// this should exactly represent that file. We can't get the original filename from the XML
117108
// also if we have a document Id (CLI) option we add this as prefix, too.
118109
if (documentId != null)
119-
oPb.setN(documentId + String.format("%06d", pageNumber));
110+
oPb.setN(documentId + "-" + String.format("%08d", pageNumber));
120111
else
121112
oPb.setN(String.format("%06d", pageNumber));
122113

0 commit comments

Comments
 (0)