Skip to content

Commit f53438a

Browse files
chg: Add default line_limit
* Adds console feedback * Linting 💄
1 parent 96bba34 commit f53438a

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

deep_reference_parser/prodigy/prodigy_to_tsv.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
import numpy as np
1313
import plac
1414

15-
from ..io import read_jsonl
15+
from wasabi import Printer
1616

17+
from ..io import read_jsonl
1718
from ..logger import logger
1819

20+
msg = Printer()
21+
1922

2023
class TokenLabelPairs:
2124
"""
@@ -211,12 +214,16 @@ def yield_token_label_pair(self, doc, lists=False):
211214
int
212215
)
213216
)
214-
def prodigy_to_tsv(input_file, output_file, respect_lines, respect_docs, line_limit):
217+
def prodigy_to_tsv(input_file, output_file, respect_lines, respect_docs, line_limit=250):
215218
"""
216219
Convert token annotated jsonl to token annotated tsv ready for use in the
217220
Rodrigues model.
218221
"""
219222

223+
msg.info(f"Respect line endings: {respect_lines}")
224+
msg.info(f"Respect doc endings: {respect_docs}")
225+
msg.info(f"Line limit: {line_limit}")
226+
220227
annotated_data = read_jsonl(input_file)
221228

222229
logger.info("Loaded %s prodigy docs", len(annotated_data))
@@ -236,4 +243,3 @@ def prodigy_to_tsv(input_file, output_file, respect_lines, respect_docs, line_li
236243

237244
logger.info("Wrote %s token/label pairs to %s", len(token_label_pairs),
238245
output_file)
239-

0 commit comments

Comments
 (0)