Skip to content

Commit 83a4551

Browse files
new tcia endpoints
1 parent 68752ce commit 83a4551

File tree

10 files changed

+277
-42
lines changed

10 files changed

+277
-42
lines changed

src/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# encoding: utf-8
2+
3+
"""Initialization module for gdcapiwrapper package."""
4+
5+
__version__ = "0.1"

src/gdcapiwrapper/__init__.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +0,0 @@
1-
# encoding: utf-8
2-
3-
import os
4-
import requests
5-
6-
__version__ = "0.1"
7-
GDC_API_TOKEN = os.environ.get("GCC_API_TOKEN", None)
8-
GDC_API_BASE_URL = os.environ.get("GDC_API_BASE_URL", "https://api.gdc.cancer.gov/")
9-
10-
11-
class APIBaseURLStatusError(Exception):
12-
pass
13-
14-
15-
class APITokenMissingError(Exception):
16-
pass
17-
18-
19-
request = requests.get(f"{GDC_API_BASE_URL}/status")
20-
21-
22-
if request.status_code != 200:
23-
raise APIBaseURLStatusError(
24-
f"{GDC_API_BASE_URL} status: {request.status_code}."
25-
"The resource seems to be unavailable"
26-
)
27-
28-
session = requests.Session()
29-
session.params = {"api_token": GDC_API_TOKEN, "api_base_url": GDC_API_BASE_URL}
30-
31-
from .data import Data # isort:skip # noqa

src/gdcapiwrapper/enums.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# encoding: utf-8
2+
3+
from enum import Enum
4+
5+
6+
class FORMAT_TYPE(Enum):
7+
"""Enumerated values representing the various types of file format."""
8+
9+
# ---member definitions---
10+
CSV = "CSV"
11+
HTML = "HTML"
12+
JSON = "JSON"
13+
XML = "XML"
14+
15+
# ---allowed formats for TCIA apis---
16+
TCIA_ALLOWED_FORMATS = frozenset((CSV, HTML, JSON, XML))

src/gdcapiwrapper/exceptions.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# encoding: utf-8
2+
3+
4+
class APIBaseURLStatusError(Exception):
5+
pass
6+
7+
8+
class APITokenMissingError(Exception):
9+
pass

src/gdcapiwrapper/tcga/__init__.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# encoding: utf-8
2+
3+
import os
4+
import requests
5+
6+
from ..exceptions import APIBaseURLStatusError
7+
8+
9+
TCGA_API_TOKEN = os.environ.get("TCGA_API_TOKEN", None)
10+
TCGA_API_BASE_URL = os.environ.get("TCGA_API_BASE_URL", "https://api.gdc.cancer.gov/")
11+
12+
13+
request = requests.get(f"{TCGA_API_BASE_URL}/status")
14+
15+
16+
if request.status_code != 200:
17+
raise APIBaseURLStatusError(
18+
f"{TCGA_API_BASE_URL} status: {request.status_code}."
19+
"The resource seems to be unavailable"
20+
)
21+
22+
session = requests.Session()
23+
session.params = {"api_token": TCGA_API_TOKEN, "api_base_url": TCGA_API_BASE_URL}
24+
25+
from .tcga import Data # isort:skip # noqa

src/gdcapiwrapper/data.py renamed to src/gdcapiwrapper/tcga/tcga.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# encoding: utf-8
22

3-
43
import os
54
import re
65
from datetime import datetime
@@ -11,15 +10,15 @@
1110
from tqdm import tqdm
1211

1312
from . import session
14-
from .util import copyfileobj
13+
from ..util import copyfileobj
1514

1615
__data_endpoint__ = "data"
1716

1817
base_url = f"{session.params.get('api_base_url')}/{__data_endpoint__}"
1918

2019

2120
class Data(object):
22-
""" Provides Data objects for https://api.gdc.cancer.gov/data/ `Data Endpoints`
21+
"""Provides Data objects for https://api.gdc.cancer.gov/data/ `Data Endpoints`
2322
2423
Includes endpoints for file(s) download
2524
"""
@@ -46,7 +45,7 @@ def download(
4645
"""
4746
url = f"{base_url}/{uuid}"
4847

49-
local_filename = uuid if not name else name
48+
local_filename = name if name else uuid
5049
with requests.get(url, stream=True) as r:
5150
total_size = int(r.headers.get("content-length", 0))
5251
bar = tqdm(total=total_size, unit="iB", unit_scale=True)

src/gdcapiwrapper/tcia/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# encoding: utf-8
2+
3+
import os
4+
import requests
5+
6+
7+
TCIA_API_TOKEN = os.environ.get("TCIA_API_TOKEN", None)
8+
TCIA_API_BASE_URL = os.environ.get(
9+
"TCIA_API_BASE_URL", "https://services.cancerimagingarchive.net/services/v4/TCIA"
10+
)
11+
12+
13+
session = requests.Session()
14+
session.params = {"api_token": TCIA_API_TOKEN, "api_base_url": TCIA_API_BASE_URL}
15+
16+
from .tcia import Data # isort:skip # noqa

src/gdcapiwrapper/tcia/tcia.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# encoding: utf-8
2+
3+
import os
4+
from typing import Tuple
5+
6+
import requests
7+
from responses import Response
8+
from tqdm import tqdm
9+
10+
from ..enums import FORMAT_TYPE as FT
11+
from . import session
12+
from ..util import copyfileobj
13+
14+
__data_endpoint__ = "query"
15+
16+
base_url = f"{session.params.get('api_base_url')}/{__data_endpoint__}"
17+
18+
19+
class Data(object):
20+
"""Provides Data objects for
21+
https://services.cancerimagingarchive.net/services/v4/TCIA/ `Data Endpoints`
22+
"""
23+
24+
@classmethod
25+
def download_single_image(
26+
cls,
27+
series_instance_uid: str,
28+
sop_instance_uid: str,
29+
path: str = ".",
30+
name: str = None,
31+
) -> Tuple[Response, str]:
32+
"""Returns a SINGLE DICOM Object.
33+
34+
A single image is identified by its SeriesInstanceUID and SOPInstanceUID.
35+
This API will always be used following the `sop_instance_uids`
36+
37+
Parameters
38+
---------
39+
series_instance_uid : str
40+
SeriesInstance UID
41+
sop_instance_uid: str
42+
SOPInstanceUID UID
43+
path: str
44+
Local path where save file (default: current path)
45+
name: str
46+
Filename. If not provided it will be saved with SOPInstance UID as name
47+
48+
Returns
49+
-------
50+
tuple
51+
response, filename absolute path
52+
"""
53+
url = (
54+
f"{base_url}/getSingleImage?SeriesInstanceUID={series_instance_uid}&"
55+
f"SOPInstanceUID={sop_instance_uid}"
56+
)
57+
local_filename = name if name else f"{sop_instance_uid}.dcm"
58+
with requests.get(url, stream=True) as r:
59+
total_size = int(r.headers.get("content-length", 0))
60+
bar = tqdm(total=total_size, unit="iB", unit_scale=True)
61+
with open(os.path.join(path, local_filename), "wb") as f:
62+
copyfileobj(r.raw, f, bar)
63+
return r, local_filename
64+
65+
@classmethod
66+
def download_series_instance_images(
67+
cls, series_instance_uid: str, path: str = ".", name: str = None
68+
) -> Tuple[Response, str]:
69+
"""Returns a single Zip file with set of images for the given SeriesInstance.
70+
71+
Parameters
72+
---------
73+
series_instance_uid : str
74+
SeriesInstance UID
75+
path: str
76+
Local path where save file (default: current path)
77+
name: str
78+
Filename. If not provided it will be saved with SOPInstance UID as name
79+
80+
Returns
81+
-------
82+
tuple
83+
response, filename absolute path
84+
"""
85+
url = f"{base_url}/getImage?SeriesInstanceUID={series_instance_uid}"
86+
local_filename = name if name else f"{series_instance_uid}.zip"
87+
with requests.get(url, stream=True) as r:
88+
total_size = int(r.headers.get("content-length", 0))
89+
bar = tqdm(total=total_size, unit="iB", unit_scale=True)
90+
with open(os.path.join(path, local_filename), "wb") as f:
91+
copyfileobj(r.raw, f, bar)
92+
return r, local_filename
93+
94+
@classmethod
95+
def sop_instance_uids(
96+
cls,
97+
series_instance_uid: str,
98+
format_: str = "JSON",
99+
path: str = ".",
100+
name: str = None,
101+
) -> Tuple[Response, str]:
102+
"""Return a list of SOPInstanceUID for a given SeriesInstanceUID
103+
104+
Parameters
105+
---------
106+
series_instance_uid : str
107+
SeriesInstance UID
108+
format_ : str
109+
Output format. This endpoint supports CSV/HTML/XML/JSON
110+
path: str
111+
Local path where save file (default: current path)
112+
name: str
113+
Filename. If not provided it will be saved with SeriesInstance UID as name
114+
115+
Returns
116+
-------
117+
tuple
118+
response, filename absolute path or json
119+
"""
120+
if format_.upper() not in FT.TCIA_ALLOWED_FORMATS.value:
121+
raise ValueError(
122+
f"Format not allowed. Allowed formats:"
123+
f"{list(FT.TCIA_ALLOWED_FORMATS.value)}, got {format_}."
124+
)
125+
url = (
126+
f"{base_url}/getSOPInstanceUIDs?SeriesInstanceUID={series_instance_uid}&"
127+
f"format={format_}"
128+
)
129+
r = requests.get(url)
130+
if format_.upper() == "JSON":
131+
return r, r.json()
132+
133+
local_filename = name if name else f"{series_instance_uid}.{format_.lower()}"
134+
with open(os.path.join(path, local_filename), "wb") as f:
135+
f.write(r.content)
136+
return r, local_filename

tests/mockserver.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212

1313
class MockServerRequestHandler(BaseHTTPRequestHandler):
14-
API_PATTERN = re.compile(r"/data|/")
14+
API_PATTERN = re.compile(r"/data|query|/")
1515

1616
def do_GET(self):
1717
if re.search(self.API_PATTERN, self.path):

tests/unit/test_data.py

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
import pytest
77
from requests.exceptions import ChunkedEncodingError
88

9-
from gdcapiwrapper.data import Data
9+
from gdcapiwrapper.tcga import Data as TCGAData
10+
from gdcapiwrapper.tcia import Data as TCIAData
1011

1112
from ..mockserver import get_free_port, start_mock_server
1213

1314

14-
class TestData(object):
15+
class TestTCGAData(object):
1516
@classmethod
1617
def setup_class(cls):
1718
cls.mock_server_port = get_free_port()
@@ -20,8 +21,10 @@ def setup_class(cls):
2021
def test_download(self, tmpdir):
2122
base_url = "http://localhost:{port}/data".format(port=self.mock_server_port)
2223

23-
with mock.patch.dict("gdcapiwrapper.data.__dict__", {"base_url": base_url}):
24-
response, filename = Data.download(
24+
with mock.patch.dict(
25+
"gdcapiwrapper.tcga.tcga.__dict__", {"base_url": base_url}
26+
):
27+
response, filename = TCGAData.download(
2528
uuid="fakeuuid", path=tmpdir, name="fakefilename"
2629
)
2730

@@ -31,12 +34,69 @@ def test_download(self, tmpdir):
3134
def test_download_multiple(self, tmpdir):
3235
base_url = "http://localhost:{port}".format(port=self.mock_server_port)
3336
try:
34-
with mock.patch.dict("gdcapiwrapper.data.__dict__", {"base_url": base_url}):
35-
response, filename = Data.download_multiple(
37+
with mock.patch.dict(
38+
"gdcapiwrapper.tcga.tcga.__dict__", {"base_url": base_url}
39+
):
40+
response, filename = TCGAData.download_multiple(
3641
uuid_list=["1", "2"], path=tmpdir
3742
)
3843
except ChunkedEncodingError:
3944
pytest.skip("Flaky ConnectionResetError")
4045

4146
assert response.ok is True
4247
assert os.path.exists(os.path.join(tmpdir, "fake.gzip")) is True
48+
49+
50+
class TestTCIAData(object):
51+
@classmethod
52+
def setup_class(cls):
53+
cls.mock_server_port = get_free_port()
54+
start_mock_server(cls.mock_server_port)
55+
56+
def test_json_sop_instance_uids(self):
57+
base_url = "http://localhost:{port}/query".format(port=self.mock_server_port)
58+
with mock.patch.dict(
59+
"gdcapiwrapper.tcia.tcia.__dict__", {"base_url": base_url}
60+
):
61+
response, json = TCIAData.sop_instance_uids(series_instance_uid="fakeuid")
62+
63+
assert response.ok is True
64+
assert json == []
65+
66+
def test_other_formats_sop_instance_uids(self, tmpdir):
67+
base_url = "http://localhost:{port}/query".format(port=self.mock_server_port)
68+
with mock.patch.dict(
69+
"gdcapiwrapper.tcia.tcia.__dict__", {"base_url": base_url}
70+
):
71+
response, filename = TCIAData.sop_instance_uids(
72+
series_instance_uid="fakeuid", format_="CSV", path=tmpdir
73+
)
74+
75+
assert response.ok is True
76+
assert os.path.exists(os.path.join(tmpdir, "fakeuid.csv")) is True
77+
78+
def test_download_single_image(self, tmpdir):
79+
base_url = "http://localhost:{port}/query".format(port=self.mock_server_port)
80+
with mock.patch.dict(
81+
"gdcapiwrapper.tcia.tcia.__dict__", {"base_url": base_url}
82+
):
83+
response, filename = TCIAData.download_single_image(
84+
series_instance_uid="fakeuid",
85+
sop_instance_uid="sopfakeuid",
86+
path=tmpdir,
87+
)
88+
89+
assert response.ok is True
90+
assert os.path.exists(os.path.join(tmpdir, "sopfakeuid.dcm")) is True
91+
92+
def download_series_instance_images(self, tmpdir):
93+
base_url = "http://localhost:{port}/query".format(port=self.mock_server_port)
94+
with mock.patch.dict(
95+
"gdcapiwrapper.tcia.tcia.__dict__", {"base_url": base_url}
96+
):
97+
response, filename = TCIAData.download_series_instance_images(
98+
series_instance_uid="fakeuid", path=tmpdir
99+
)
100+
101+
assert response.ok is True
102+
assert os.path.exists(os.path.join(tmpdir, "fakeuid.zip")) is True

0 commit comments

Comments
 (0)