-
Notifications
You must be signed in to change notification settings - Fork 128
Expand file tree
/
Copy pathtest_annotation_database_parser.py
More file actions
151 lines (117 loc) · 4.8 KB
/
test_annotation_database_parser.py
File metadata and controls
151 lines (117 loc) · 4.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
import shutil
import sys
import tempfile
import pandas as pd
import pytest
from histomicstk.annotations_and_masks.annotation_database_parser import (
dump_annotations_locally, parse_annotations_to_local_tables)
thisDir = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, os.path.join(thisDir, '../../../tests'))
import htk_test_utilities as utilities # noqa
from htk_test_utilities import getTestFilePath, girderClient # noqa
class Cfg:
def __init__(self):
self.gc = None
self.folderid = None
cfg = Cfg()
# pytest runs tests in the order they appear in the module
@pytest.mark.usefixtures('girderClient') # noqa
def test_prep(girderClient): # noqa
cfg.gc = girderClient
# get original item
iteminfo = cfg.gc.get('/item', parameters={'text': 'TCGA-A2-A0YE-01Z-00-DX1'})[0]
# create the folder to "back up"
folderinfo = cfg.gc.post(
'/folder', data={
'parentId': iteminfo['folderId'],
'name': 'test-parser',
})
cfg.folderid = folderinfo['_id']
# create subfolder to test recursion
subf = cfg.gc.post(
'/folder', data={
'parentId': cfg.folderid,
'name': 'test-parser-sub',
})
# copy the item multiple times to create dummy database
for i in range(2):
for fid in (cfg.folderid, subf['_id']):
_ = cfg.gc.post(
'/item/%s/copy' % iteminfo['_id'], data={
'name': 'test_dbsqlite-%d' % i,
'copyAnnotations': True,
'folderId': fid,
})
class TestDatabaseParser:
"""Test girder database parser."""
def test_dump_annotations_locally_1(self):
"""Test dump annotations locally."""
import sqlalchemy as db
from sqlalchemy import text
savepath = tempfile.mkdtemp()
# recursively save annotations -- JSONs + sqlite for folders/items
dump_annotations_locally(
cfg.gc, folderid=cfg.folderid, local=savepath,
save_json=True, save_sqlite=True)
assert not len({
'test-parser.json',
'test-parser.sqlite',
'test_dbsqlite-0.json',
'test_dbsqlite-0_annotations.json',
'test_dbsqlite-1.json',
'test_dbsqlite-1_annotations.json',
'test-parser-sub',
} - set(os.listdir(savepath)))
sql_engine = db.create_engine('sqlite:///%s/test-parser.sqlite' % savepath)
dbcon = sql_engine.connect()
result = pd.read_sql_query(text("""SELECT count(*) FROM 'folders';"""), dbcon)
assert int(result.iloc[0, 0]) == 2
result = pd.read_sql_query(text("""SELECT count(*) FROM 'items';"""), dbcon)
assert int(result.iloc[0, 0]) == 4
# cleanup
shutil.rmtree(savepath)
def test_dump_annotations_locally_2(self):
"""Test dump annotations locally."""
import sqlalchemy as db
from sqlalchemy import text
savepath = tempfile.mkdtemp()
# recursively save annotations -- parse to csv + sqlite
dump_annotations_locally(
cfg.gc, folderid=cfg.folderid, local=savepath,
save_json=False, save_sqlite=True,
callback=parse_annotations_to_local_tables,
callback_kwargs={
'save_csv': True,
'save_sqlite': True,
},
)
assert not len({
'test-parser.sqlite',
'test_dbsqlite-0_docs.csv',
'test_dbsqlite-0_elements.csv',
'test_dbsqlite-1_docs.csv',
'test_dbsqlite-1_elements.csv',
'test-parser-sub'} - set(os.listdir(savepath)))
files = list(os.listdir(os.path.join(savepath, 'test-parser-sub')))
assert len([j for j in files if j.endswith('_docs.csv')]) == 2
assert len([j for j in files if j.endswith('_elements.csv')]) == 2
sql_engine = db.create_engine('sqlite:///%s/test-parser.sqlite' % savepath)
dbcon = sql_engine.connect()
result = pd.read_sql_query(text(
"""SELECT * FROM 'annotation_docs';"""), dbcon)
assert result.shape == (32, 13)
assert set(result.columns) == {
'_modelType', '_version', 'annotation_girder_id',
'created', 'creatorId', 'element_count',
'element_details', 'groups', 'itemId', 'item_name',
'public', 'updated', 'updatedId'}
result = pd.read_sql_query(text(
"""SELECT * FROM 'annotation_elements';"""), dbcon)
assert result.shape == (304, 13)
assert set(result.columns) == {
'annotation_girder_id', 'bbox_area', 'color',
'coords_x', 'coords_y', 'element_girder_id', 'group', 'label',
'type', 'xmax', 'xmin', 'ymax', 'ymin'}
# cleanup
shutil.rmtree(savepath)