-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_data_content.py
More file actions
50 lines (41 loc) · 1.49 KB
/
test_data_content.py
File metadata and controls
50 lines (41 loc) · 1.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# Copyright 2025 (AGPL-3.0-or-later), Miles K. Bertrand et al.
import pytest
import re
import datamanage
files = []
for name, file in datamanage.getwalk('breviarium-1888'):
files.append(file)
@pytest.mark.parametrize('file', files)
def test_data(file) -> None:
pile = datamanage.load_data(file)
for entry in pile:
tags = entry['tags']
for k, v in entry.items():
assert re.search('^[a-z-]+$', k), entry
assert k != 'forwards-to', entry
if k == 'tags':
if type(v) is list:
assert all(all(re.search('^[a-z-]+$', y) for y in x) for x in v), entry
else:
assert all(re.search('^[a-z-]+$', x) for x in v), entry
assert all(not x in v for x in list('`ẃŕṕśǵḱĺźćǘńḿ')), entry
if 'collecta' in entry['tags']:
assert 'terminatio' in entry, entry
oes = set()
def idoes(txt):
txt = txt.replace('\u0301', '&')
fd = re.findall('[a-zA-ZáéíóúýÁÉÍÓÚÝæœÆŒǽǼ&]+', txt)
for f in fd:
if ('œ' in f or 'Œ' in f) and not any(x in f for x in list('&áéíóúýÁÉÍÓÚÝ')):
oes.add(f)
for file in files:
pile = datamanage.load_data(file)
for entry in pile:
for k, v in entry.items():
if type(v) is list:
for i in v:
if type(i) is str:
idoes(i)
elif type(v) is str:
idoes(v)
print(oes)