|
469 | 469 | https://github.com/UniversalDependencies/UD_Hebrew-IAHLTknesset |
470 | 470 | - change to the dev branch in that repo |
471 | 471 | python3 stanza/utils/datasets/ner/prepare_ner_dataset.py he_iahlt |
| 472 | +
|
| 473 | +ang_ewt is an Old English dataset available here: |
| 474 | + https://github.com/dmetola/Old_English-OEDT/tree/main |
| 475 | + As more information, including a citation, will be added here |
| 476 | + - install in NERBASE: |
| 477 | + mkdir $NERBASE/ang |
| 478 | + cd $NERBASE/ang |
| 479 | + git clone [email protected]:dmetola/Old_English-OEDT.git |
| 480 | + - python3 stanza/utils/datasets/ner/prepare_ner_dataset.py ang_ewt |
472 | 481 | """ |
473 | 482 |
|
474 | 483 | import glob |
@@ -1471,8 +1480,13 @@ def process_he_iahlt(paths, short_name): |
1471 | 1480 | base_output_path = paths["NER_DATA_DIR"] |
1472 | 1481 | convert_he_iahlt.convert_iahlt(udbase, base_output_path, "he_iahlt") |
1473 | 1482 |
|
| 1483 | +def process_ang_ewt(paths, short_name): |
| 1484 | + assert short_name == 'ang_ewt' |
| 1485 | + base_input_path = os.path.join(paths["NERBASE"], "ang", "Old_English-OEDT") |
| 1486 | + convert_bio_to_json(base_input_path, paths["NER_DATA_DIR"], short_name) |
1474 | 1487 |
|
1475 | 1488 | DATASET_MAPPING = { |
| 1489 | + "ang_ewt": process_ang_ewt, |
1476 | 1490 | "ar_aqmar": process_ar_aqmar, |
1477 | 1491 | "bn_daffodil": process_bn_daffodil, |
1478 | 1492 | "da_ddt": process_da_ddt, |
|
0 commit comments