Skip to content

Commit e1f56aa

Browse files
committed
Merge remote-tracking branch 'origin/dev' into corefud_v1.3
2 parents 6defc5c + f0b4cee commit e1f56aa

File tree

13 files changed

+113
-12
lines changed

13 files changed

+113
-12
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
'Programming Language :: Python :: 3.10',
6565
'Programming Language :: Python :: 3.11',
6666
'Programming Language :: Python :: 3.12',
67+
'Programming Language :: Python :: 3.13',
6768
],
6869

6970
# What does your project relate to?

stanza/models/depparse/scorer.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
logger = logging.getLogger('stanza')
1111

12-
def score_named_dependencies(pred_doc, gold_doc):
12+
def score_named_dependencies(pred_doc, gold_doc, output_latex=False):
1313
if len(pred_doc.sentences) != len(gold_doc.sentences):
1414
logger.warning("Not evaluating individual dependency F1 on accound of document length mismatch")
1515
return
@@ -32,7 +32,13 @@ def score_named_dependencies(pred_doc, gold_doc):
3232
labels = sorted(set(tp.keys()).union(fp.keys()).union(fn.keys()))
3333
max_len = max(len(x) for x in labels)
3434
log_lines = []
35-
log_line_fmt = "%" + str(max_len) + "s: p %.4f r %.4f f1 %.4f (%d actual)"
35+
#log_line_fmt = "%" + str(max_len) + "s: p %.4f r %.4f f1 %.4f (%d actual)"
36+
if output_latex:
37+
log_lines.append(r"\begin{tabular}{lrr}")
38+
log_lines.append(r"Reln & F1 & Total \\")
39+
log_line_fmt = "{label} & {f1:0.4f} & {actual} \\\\"
40+
else:
41+
log_line_fmt = "{label:>" + str(max_len) + "s}: p {precision:0.4f} r {recall:0.4f} f1 {f1:0.4f} ({actual} actual)"
3642
for label in labels:
3743
if tp[label] == 0:
3844
precision = 0
@@ -42,7 +48,17 @@ def score_named_dependencies(pred_doc, gold_doc):
4248
precision = tp[label] / (tp[label] + fp[label])
4349
recall = tp[label] / (tp[label] + fn[label])
4450
f1 = 2 * (precision * recall) / (precision + recall)
45-
log_lines.append(log_line_fmt % (label, precision, recall, f1, tp[label] + fn[label]))
51+
actual = tp[label] + fn[label]
52+
template = {
53+
'label': label,
54+
'precision': precision,
55+
'recall': recall,
56+
'f1': f1,
57+
'actual': actual
58+
}
59+
log_lines.append(log_line_fmt.format(**template))
60+
if output_latex:
61+
log_lines.append(r"\end{tabular}")
4662
logger.info("F1 scores for each dependency:\n Note that unlabeled attachment errors hurt the labeled attachment scores\n%s" % "\n".join(log_lines))
4763

4864
def score(system_conllu_file, gold_conllu_file, verbose=True):

stanza/models/parser.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def build_argparse():
4747
parser.add_argument('--eval_file', type=str, default=None, help='Input file for data loader.')
4848
parser.add_argument('--output_file', type=str, default=None, help='Output CoNLL-U file.')
4949
parser.add_argument('--no_gold_labels', dest='gold_labels', action='store_false', help="Don't score the eval file - perhaps it has no gold labels, for example. Cannot be used at training time")
50+
parser.add_argument('--output_latex', default=False, action='store_true', help='Output the per-relation table in Latex form')
5051
parser.add_argument('--mode', default='train', choices=['train', 'predict'])
5152
parser.add_argument('--lang', type=str, help='Language')
5253
parser.add_argument('--shorthand', type=str, help="Treebank shorthand")
@@ -422,7 +423,7 @@ def evaluate_trainer(args, trainer, pretrain):
422423
if word.deprel is None:
423424
raise ValueError("Gold document {} has a None at sentence {} word {}\n{:C}".format(args['eval_file'], sent_idx, word_idx, sentence))
424425

425-
scorer.score_named_dependencies(batch.doc, gold_doc)
426+
scorer.score_named_dependencies(batch.doc, gold_doc, args['output_latex'])
426427
_, _, score = scorer.score(system_pred_file, args['eval_file'])
427428

428429
logger.info("Parser score:")

stanza/pipeline/demo/Astloch-Bold.ttf

73.2 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.

stanza/pipeline/demo/demo_server.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ def get_file(path):
1212
return res
1313

1414
@app.route('/<path:path>')
15+
@app.route('/static/fonts/<path:path>')
1516
def static_file(path):
1617
if path in ['stanza-brat.css', 'stanza-brat.js', 'stanza-parseviewer.js', 'loading.gif',
17-
'favicon.png', 'stanza-logo.png']:
18+
'favicon.png', 'stanza-logo.png',
19+
'Astloch-Bold.ttf', 'Liberation_Sans-Regular.ttf', 'PT_Sans-Caption-Web-Regular.ttf']:
1820
return app.send_static_file(path)
1921
elif path in 'index.html':
2022
return app.send_static_file('stanza-brat.html')

stanza/pipeline/demo/stanza-brat.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<html>
2-
<head profile="http://www.w3.org/2005/10/profile">
2+
<head profile="https://www.w3.org/2005/10/profile">
33
<link rel='icon' href='favicon.png' type='image/png'/ >
44
<!-- JQuery -->
55
<script src="https://code.jquery.com/jquery-2.1.4.min.js"></script>
@@ -15,7 +15,7 @@
1515
<script type="text/javascript" src="https://nlp.stanford.edu/js/brat/client/lib/head.load.min.js"></script>
1616
<!-- d3 -->
1717
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.17/d3.min.js"></script>
18-
<script type="text/javascript" src="http://cdnjs.cloudflare.com/ajax/libs/dagre-d3/0.4.17/dagre-d3.min.js"></script>
18+
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/dagre-d3/0.4.17/dagre-d3.min.js"></script>
1919

2020
<!-- CoreNLP -->
2121
<link rel="stylesheet" type="text/css" href="stanza-brat.css"/>

stanza/resources/prepare_resources.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,11 @@ def get_default_processors(resources, lang):
421421
expected_lemma = default_package + "_nocharlm"
422422
if expected_lemma in resources[lang]['lemma']:
423423
default_processors['lemma'] = expected_lemma
424+
else:
425+
expected_lemma = default_package + "_charlm"
426+
if expected_lemma in resources[lang]['lemma']:
427+
default_processors['lemma'] = expected_lemma
428+
print("WARNING: nocharlm lemmatizer for %s model does not exist, but %s does" % (default_package, expected_lemma))
424429
elif lang not in allowed_empty_languages:
425430
default_processors['lemma'] = 'identity'
426431

@@ -603,6 +608,11 @@ def process_packages(args):
603608
lemma_package = package + "_nocharlm"
604609
if lemma_package in resources[lang]["lemma"]:
605610
processors["lemma"] = lemma_package
611+
else:
612+
lemma_package = package + "_charlm"
613+
if lemma_package in resources[lang]['lemma']:
614+
processors['lemma'] = lemma_package
615+
print("WARNING: nocharlm lemmatizer for %s model does not exist, but %s does" % (package, lemma_package))
606616

607617
if "depparse" in resources[lang] and "pos" in processors:
608618
depparse_package = None

stanza/server/semgrex.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def annotate_doc(doc, semgrex_result, semgrex_patterns, matches_only, exclude_ma
115115
sentence.add_comment("# semgrex pattern |%s| did not match!" % semgrex_pattern)
116116
else:
117117
sentence_matched = True
118+
highlight_tokens = []
118119
for match in pattern_result.match:
119120
match_word = "%d:%s" % (match.matchIndex, sentence.words[match.matchIndex-1].text)
120121
if len(match.node) == 0:
@@ -124,7 +125,8 @@ def annotate_doc(doc, semgrex_result, semgrex_patterns, matches_only, exclude_ma
124125
for node in match.node]
125126
node_matches = " " + " ".join(node_matches)
126127
sentence.add_comment("# semgrex pattern |%s| matched at %s%s" % (semgrex_pattern, match_word, node_matches))
127-
sentence.add_comment("# highlight tokens = %d" % match.matchIndex)
128+
highlight_tokens.append(match.matchIndex)
129+
sentence.add_comment("# highlight tokens = %s" % (" ".join("%d" % x for x in highlight_tokens)))
128130
if sentence_matched:
129131
matching_sentences.append(sentence)
130132
else:

0 commit comments

Comments
 (0)