|
11 | 11 | from danlp.datasets.word_sim import WordSim353Da |
12 | 12 | from danlp.utils import write_simple_ner_dataset, read_simple_ner_dataset |
13 | 13 |
|
| 14 | +DANLP_STORAGE_URL = 'http://danlp-downloads.alexandra.dk' |
14 | 15 |
|
15 | 16 | class TestNerDatasets(unittest.TestCase): |
16 | 17 |
|
@@ -95,35 +96,35 @@ def test_ddt_dataset_with_spacy(self): |
95 | 96 |
|
96 | 97 | self.assertIsInstance(corpus, GoldCorpus) |
97 | 98 | self.assertEqual(self.train_len, num_sents_train) |
98 | | -# temporary omitted due to changes in storage |
99 | | -# def test_wikiann_dataset(self): |
100 | | -# # Change to a sample of the full wikiann to ease test computation |
101 | | -# DATASETS['wikiann']['url'] = "https://danlp.s3.eu-central-1.amazonaws.com/test-datasets/da.tar.gz" |
102 | | -# DATASETS['wikiann']['size'] = 2502 |
103 | | -# DATASETS['wikiann']['md5_checksum'] = 'd0271de38ae23f215b5117450efb9ace' |
| 99 | + |
| 100 | + def test_wikiann_dataset(self): |
| 101 | + # Change to a sample of the full wikiann to ease test computation |
| 102 | + DATASETS['wikiann']['url'] = DANLP_STORAGE_URL+ "/tests/da.tar.gz" |
| 103 | + DATASETS['wikiann']['size'] = 2502 |
| 104 | + DATASETS['wikiann']['md5_checksum'] = 'd0271de38ae23f215b5117450efb9ace' |
104 | 105 |
|
105 | | -# wikiann = WikiAnn() |
| 106 | + wikiann = WikiAnn() |
106 | 107 |
|
107 | | -# corpus = wikiann.load_with_flair() |
| 108 | + corpus = wikiann.load_with_flair() |
108 | 109 |
|
109 | | -# self.assertEqual([len(corpus.train), len(corpus.dev), len(corpus.test)], [21, 2, 3]) |
| 110 | + self.assertEqual([len(corpus.train), len(corpus.dev), len(corpus.test)], [21, 2, 3]) |
110 | 111 |
|
111 | | -# ner_tags = corpus.make_tag_dictionary('ner').idx2item |
112 | | -# asserted_ner_tags = [ |
113 | | -# b'B-ORG', b'B-PER', b'B-LOC', |
114 | | -# b'I-ORG', b'I-PER', b'I-LOC', |
115 | | -# b'O', b'<START>', b'<STOP>', b'<unk>' |
116 | | -# ] |
117 | | -# self.assertCountEqual(ner_tags, asserted_ner_tags) |
| 112 | + ner_tags = corpus.make_tag_dictionary('ner').idx2item |
| 113 | + asserted_ner_tags = [ |
| 114 | + b'B-ORG', b'B-PER', b'B-LOC', |
| 115 | + b'I-ORG', b'I-PER', b'I-LOC', |
| 116 | + b'O', b'<START>', b'<STOP>', b'<unk>' |
| 117 | + ] |
| 118 | + self.assertCountEqual(ner_tags, asserted_ner_tags) |
118 | 119 |
|
119 | | -# spacy_gold = wikiann.load_with_spacy() |
120 | | -# self.assertIsInstance(spacy_gold, GoldCorpus) |
| 120 | + spacy_gold = wikiann.load_with_spacy() |
| 121 | + self.assertIsInstance(spacy_gold, GoldCorpus) |
121 | 122 |
|
122 | | -# num_train_sents = len(list(spacy_gold.train_tuples)[0][1]) |
123 | | -# num_dev_sents = len(list(spacy_gold.dev_tuples)[0][1]) |
124 | | -# self.assertEqual(num_dev_sents + num_train_sents, 26) |
| 123 | + num_train_sents = len(list(spacy_gold.train_tuples)[0][1]) |
| 124 | + num_dev_sents = len(list(spacy_gold.dev_tuples)[0][1]) |
| 125 | + self.assertEqual(num_dev_sents + num_train_sents, 26) |
125 | 126 |
|
126 | | -# shutil.rmtree(wikiann.dataset_dir) |
| 127 | + shutil.rmtree(wikiann.dataset_dir) |
127 | 128 |
|
128 | 129 | class TestSimilarityDatasets(unittest.TestCase): |
129 | 130 | def test_wordsim353(self): |
|
0 commit comments