Skip to content

Commit 62ee1a2

Browse files
Merge pull request #1 from histolab/basic-api-request-implementation
Basic api request implementation
2 parents 1e8a577 + 5577e47 commit 62ee1a2

17 files changed

+357
-16
lines changed

.coveragerc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# .coveragerc to control coverage.py
2+
[run]
3+
branch = True
4+
omit =
5+
*/site-packages/*
6+
*/distutils/*
7+
8+
[report]
9+
show_missing = true
10+
precision = 2
11+
# Regexes for lines to exclude from consideration
12+
exclude_lines =
13+
# Have to re-enable the standard pragma
14+
pragma: no cover
15+
16+
# Don't complain about missing debug-only code:
17+
def __repr__
18+
if self\.debug
19+
20+
# Don't complain if tests don't hit defensive assertion code:
21+
raise AssertionError
22+
raise NotImplementedError
23+
24+
# Don't complain if non-runnable code isn't run:
25+
if 0:
26+
if __name__ == .__main__.:
27+
28+
ignore_errors = True
29+
30+
[html]
31+
directory = coverage_html_report

.flake8

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Recommended flake8 settings while editing zoom, we use Black for the final linting/say in how code is formatted
2+
#
3+
# pip install flake8 flake8-bugbear
4+
#
5+
# This will warn/error on things that black does not fix, on purpose.
6+
7+
[flake8]
8+
# max line length is set to 88 in black, here it is set to 80 and we enable bugbear's B950 warning, which is:
9+
#
10+
# B950: Line too long. This is a pragmatic equivalent of pycodestyle’s E501: it
11+
# considers “max-line-length” but only triggers when the value has been
12+
# exceeded by more than 10%. You will no longer be forced to reformat code due
13+
# to the closing parenthesis being one character too far to satisfy the linter.
14+
# At the same time, if you do significantly violate the line length, you will
15+
# receive a message that states what the actual limit is. This is inspired by
16+
# Raymond Hettinger’s “Beyond PEP 8” talk and highway patrol not stopping you
17+
# if you drive < 5mph too fast. Disable E501 to avoid duplicate warnings.
18+
19+
exclude = src/histolab/filter.py # remove this line when filter is ok
20+
21+
max-line-length = 80
22+
max-complexity = 12
23+
select = E,F,W,C,B,B9
24+
ignore =
25+
# E123 closing bracket does not match indentation of opening bracket’s line
26+
E123
27+
# E203 whitespace before ‘:’ (Not PEP8 compliant, Python Black)
28+
E203
29+
# E501 line too long (82 > 79 characters) (replaced by B950 from flake8-bugbear, https://github.com/PyCQA/flake8-bugbear)
30+
E501
31+
# W503 line break before binary operator (Not PEP8 compliant, Python Black)
32+
W503
33+
# C901 function too complex - since many of zz9 functions are too complex with a lot of if branching
34+
C901

.gitignore

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
#IDE
2+
.idea
3+
14
# Byte-compiled / optimized / DLL files
25
__pycache__/
36
*.py[cod]
@@ -50,23 +53,12 @@ coverage.xml
5053
*.py,cover
5154
.hypothesis/
5255
.pytest_cache/
56+
coverage_html_report
5357

5458
# Translations
5559
*.mo
5660
*.pot
5761

58-
# Django stuff:
59-
*.log
60-
local_settings.py
61-
db.sqlite3
62-
db.sqlite3-journal
63-
64-
# Flask stuff:
65-
instance/
66-
.webassets-cache
67-
68-
# Scrapy stuff:
69-
.scrapy
7062

7163
# Sphinx documentation
7264
docs/_build/
@@ -90,13 +82,11 @@ ipython_config.py
9082
# having no cross-platform support, pipenv may install dependencies that don't work, or not
9183
# install all needed dependencies.
9284
#Pipfile.lock
85+
Pipfile
9386

9487
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
9588
__pypackages__/
9689

97-
# Celery stuff
98-
celerybeat-schedule
99-
celerybeat.pid
10090

10191
# SageMath parsed files
10292
*.sage.py
@@ -110,6 +100,7 @@ ENV/
110100
env.bak/
111101
venv.bak/
112102

103+
113104
# Spyder project settings
114105
.spyderproject
115106
.spyproject

.pre-commit-config.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
repos:
2+
- repo: https://github.com/ambv/black
3+
rev: stable
4+
hooks:
5+
- id: black
6+
language_version: python3.7
7+
- repo: https://gitlab.com/pycqa/flake8
8+
rev: 3.7.9
9+
hooks:
10+
- id: flake8

.travis.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Wire up travis
2+
language: python
3+
sudo: false
4+
5+
python:
6+
- "3.6"
7+
- "3.7"
8+
9+
10+
# command to install dependencies
11+
install:
12+
- pip install -r requirements.txt
13+
- pip install -r requirements-dev.txt
14+
15+
# command to run tests
16+
script:
17+
- flake8 .
18+
- pytest --cov=gdcapiwrapper
19+
20+
after_success:
21+
- coveralls
22+
23+
notifications:
24+
slack: mpba:fBrjUWlUSa3XKeCA8zFsTFu2

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,12 @@
1-
# gdc-api-wrapper
21
Genomic Data Commons API wrapper
2+
================================
3+
A simple Python wrapper for the [GDC Application Programming Interface (API)](https://portal.gdc.cancer.gov/)
4+
5+
[![Build Status](https://travis-ci.com/histolab/gdc-api-wrapper.svg?branch=master)](https://travis-ci.com/histolab/gdc-api-wrapper)
6+
[![Coverage Status](https://coveralls.io/repos/github/histolab/gdc-api-wrapper/badge.svg?branch=master)](https://coveralls.io/github/histolab/gdc-api-wrapper?branch=master)
7+
8+
The GDC API drives the GDC Data and Submission Portals and provides programmatic access to GDC functionality. This includes searching for, downloading, and submitting data and metadata.
9+
10+
## Features implemented
11+
- Downloading a Single File using GET
12+
- Downloading Multiple Files using POST

gdcapiwrapper/__init__.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# encoding: utf-8
2+
3+
import os
4+
5+
import requests
6+
7+
GDC_API_TOKEN = os.environ.get("GCC_API_TOKEN", None)
8+
GDC_API_BASE_URL = os.environ.get("GDC_API_BASE_URL", "https://api.gdc.cancer.gov/")
9+
10+
11+
class APIBaseURLStatusError(Exception):
12+
pass
13+
14+
15+
class APITokenMissingError(Exception):
16+
pass
17+
18+
19+
request = requests.get(f"{GDC_API_BASE_URL}/status")
20+
if request.status_code != 200:
21+
raise APIBaseURLStatusError(
22+
f"{GDC_API_BASE_URL} status: {request.status_code}."
23+
"The resource seems to be unavailable"
24+
)
25+
26+
session = requests.Session()
27+
session.params = {"api_token": GDC_API_TOKEN, "api_base_url": GDC_API_BASE_URL}
28+
29+
from .data import Data # isort:skip # noqa

gdcapiwrapper/data.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# encoding: utf-8
2+
3+
4+
import os
5+
import re
6+
from datetime import datetime
7+
from typing import Tuple
8+
9+
import requests
10+
from responses import Response
11+
from tqdm import tqdm
12+
13+
from . import session
14+
from .util import copyfileobj
15+
16+
__data_endpoint__ = "data"
17+
18+
base_url = f"{session.params.get('api_base_url')}/{__data_endpoint__}"
19+
20+
21+
class Data(object):
22+
@classmethod
23+
def download(
24+
cls, uuid: str, path: str = ".", name: str = None
25+
) -> Tuple[Response, str]:
26+
url = f"{base_url}/{uuid}"
27+
local_filename = uuid if not name else name
28+
with requests.get(url, stream=True) as r:
29+
total_size = int(r.headers.get("content-length", 0))
30+
bar = tqdm(total=total_size, unit="iB", unit_scale=True)
31+
with open(os.path.join(path, local_filename), "wb") as f:
32+
copyfileobj(r.raw, f, bar)
33+
return r, local_filename
34+
35+
@classmethod
36+
def download_multiple(
37+
cls, uuid_list: list, path: str = "."
38+
) -> Tuple[Response, str]:
39+
with requests.post(base_url, stream=True, data={"ids": uuid_list}) as r:
40+
d = r.headers["content-disposition"]
41+
fname = re.findall("filename=(.+)", d)[0]
42+
local_filename = (
43+
fname
44+
if fname
45+
else f"gdc_download_{datetime.now().strftime('%Y%m%d%H%M%S')}.tar.gz"
46+
)
47+
total_size = int(r.headers.get("content-length", 0))
48+
bar = tqdm(total=total_size, unit="iB", unit_scale=True)
49+
with open(os.path.join(path, local_filename), "wb") as f:
50+
for data in r.iter_content(chunk_size=1024):
51+
size = f.write(data)
52+
bar.update(size)
53+
return r, local_filename

gdcapiwrapper/util.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# encoding: utf-8
2+
3+
4+
def copyfileobj(fsrc, fdst, progressbar, length=16 * 1024):
5+
"""copy data from file-like object fsrc to file-like object fdst"""
6+
while 1:
7+
buf = fsrc.read(length)
8+
progressbar.update(len(buf))
9+
if not buf:
10+
break
11+
fdst.write(buf)

pyproject.toml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[tool.black]
2+
line-length = 88
3+
include = '\.pyi?$'
4+
exclude = '''
5+
/(
6+
\.git
7+
| \.hg
8+
| \.mypy_cache
9+
| \.tox
10+
| \.venv
11+
| _build
12+
| buck-out
13+
| build
14+
| dist
15+
)/
16+
'''

0 commit comments

Comments
 (0)