Skip to content
This repository was archived by the owner on Mar 22, 2025. It is now read-only.

Commit 757a480

Browse files
committed
Change storage location
The storage location is updated to the URL http://danlp-downloads.alexandra.dk and the URL can remain the same if the storage server changes
1 parent ae514f0 commit 757a480

File tree

2 files changed

+24
-23
lines changed

2 files changed

+24
-23
lines changed

danlp/download.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
DEFAULT_CACHE_DIR = os.path.join(str(Path.home()), '.danlp')
1515

16-
DANLP_STORAGE_URL = 'https://danlp.alexandra.dk/304bd159d5de'
16+
DANLP_STORAGE_URL = 'http://danlp-downloads.alexandra.dk'
1717

1818
# The naming convention of the word embedding are on the form <dataset>.<lang>.<type>
1919
# The <type> can be subword vectors=swv or word vectors=wv

tests/test_datasets.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from danlp.datasets.word_sim import WordSim353Da
1212
from danlp.utils import write_simple_ner_dataset, read_simple_ner_dataset
1313

14+
DANLP_STORAGE_URL = 'http://danlp-downloads.alexandra.dk'
1415

1516
class TestNerDatasets(unittest.TestCase):
1617

@@ -95,35 +96,35 @@ def test_ddt_dataset_with_spacy(self):
9596

9697
self.assertIsInstance(corpus, GoldCorpus)
9798
self.assertEqual(self.train_len, num_sents_train)
98-
# temporary omitted due to changes in storage
99-
# def test_wikiann_dataset(self):
100-
# # Change to a sample of the full wikiann to ease test computation
101-
# DATASETS['wikiann']['url'] = "https://danlp.s3.eu-central-1.amazonaws.com/test-datasets/da.tar.gz"
102-
# DATASETS['wikiann']['size'] = 2502
103-
# DATASETS['wikiann']['md5_checksum'] = 'd0271de38ae23f215b5117450efb9ace'
99+
100+
def test_wikiann_dataset(self):
101+
# Change to a sample of the full wikiann to ease test computation
102+
DATASETS['wikiann']['url'] = DANLP_STORAGE_URL+ "/tests/da.tar.gz"
103+
DATASETS['wikiann']['size'] = 2502
104+
DATASETS['wikiann']['md5_checksum'] = 'd0271de38ae23f215b5117450efb9ace'
104105

105-
# wikiann = WikiAnn()
106+
wikiann = WikiAnn()
106107

107-
# corpus = wikiann.load_with_flair()
108+
corpus = wikiann.load_with_flair()
108109

109-
# self.assertEqual([len(corpus.train), len(corpus.dev), len(corpus.test)], [21, 2, 3])
110+
self.assertEqual([len(corpus.train), len(corpus.dev), len(corpus.test)], [21, 2, 3])
110111

111-
# ner_tags = corpus.make_tag_dictionary('ner').idx2item
112-
# asserted_ner_tags = [
113-
# b'B-ORG', b'B-PER', b'B-LOC',
114-
# b'I-ORG', b'I-PER', b'I-LOC',
115-
# b'O', b'<START>', b'<STOP>', b'<unk>'
116-
# ]
117-
# self.assertCountEqual(ner_tags, asserted_ner_tags)
112+
ner_tags = corpus.make_tag_dictionary('ner').idx2item
113+
asserted_ner_tags = [
114+
b'B-ORG', b'B-PER', b'B-LOC',
115+
b'I-ORG', b'I-PER', b'I-LOC',
116+
b'O', b'<START>', b'<STOP>', b'<unk>'
117+
]
118+
self.assertCountEqual(ner_tags, asserted_ner_tags)
118119

119-
# spacy_gold = wikiann.load_with_spacy()
120-
# self.assertIsInstance(spacy_gold, GoldCorpus)
120+
spacy_gold = wikiann.load_with_spacy()
121+
self.assertIsInstance(spacy_gold, GoldCorpus)
121122

122-
# num_train_sents = len(list(spacy_gold.train_tuples)[0][1])
123-
# num_dev_sents = len(list(spacy_gold.dev_tuples)[0][1])
124-
# self.assertEqual(num_dev_sents + num_train_sents, 26)
123+
num_train_sents = len(list(spacy_gold.train_tuples)[0][1])
124+
num_dev_sents = len(list(spacy_gold.dev_tuples)[0][1])
125+
self.assertEqual(num_dev_sents + num_train_sents, 26)
125126

126-
# shutil.rmtree(wikiann.dataset_dir)
127+
shutil.rmtree(wikiann.dataset_dir)
127128

128129
class TestSimilarityDatasets(unittest.TestCase):
129130
def test_wordsim353(self):

0 commit comments

Comments
 (0)