Skip to content

Commit 9db5b4c

Browse files
committed
🚧 Add OCR-D parameter for plain text encoding
1 parent 5578ce8 commit 9db5b4c

File tree

2 files changed

+7
-0
lines changed

2 files changed

+7
-0
lines changed

‎src/dinglehopper/ocrd-tool.json‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
"enum": ["region", "line"],
2626
"default": "region",
2727
"description": "PAGE XML hierarchy level to extract the text from"
28+
},
29+
"plain_encoding": {
30+
"type": "string",
31+
"default": "autodetect",
32+
"description": "Encoding (e.g. \"utf-8\") of plain text files"
2833
}
2934
}
3035
}

‎src/dinglehopper/ocrd_cli.py‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def process_page_file(self, *input_files: Optional[OcrdFileType]) -> None:
2626
assert self.parameter
2727
metrics = self.parameter["metrics"]
2828
textequiv_level = self.parameter["textequiv_level"]
29+
plain_encoding = self.parameter["plain_encoding"]
2930

3031
# wrong number of inputs: let fail
3132
gt_file, ocr_file = input_files
@@ -52,6 +53,7 @@ def process_page_file(self, *input_files: Optional[OcrdFileType]) -> None:
5253
self.output_file_grp,
5354
metrics=metrics,
5455
textequiv_level=textequiv_level,
56+
plain_encoding=plain_encoding,
5557
)
5658

5759
# Add reports to the workspace

0 commit comments

Comments
 (0)