Merge pull request #1 from histolab/basic-api-request-implementation

ernestoarbitrio · web-flow · commit 62ee1a2052b1 · 2020-06-26T00:35:14.000+02:00
Basic api request implementation
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,31 @@
+# .coveragerc to control coverage.py
+[run]
+branch = True
+omit =
+    */site-packages/*
+    */distutils/*
+
+[report]
+show_missing = true
+precision = 2
+# Regexes for lines to exclude from consideration
+exclude_lines =
+    # Have to re-enable the standard pragma
+    pragma: no cover
+
+    # Don't complain about missing debug-only code:
+    def __repr__
+    if self\.debug
+
+    # Don't complain if tests don't hit defensive assertion code:
+    raise AssertionError
+    raise NotImplementedError
+
+    # Don't complain if non-runnable code isn't run:
+    if 0:
+    if __name__ == .__main__.:
+
+ignore_errors = True
+
+[html]
+directory = coverage_html_report
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,34 @@
+# Recommended flake8 settings while editing zoom, we use Black for the final linting/say in how code is formatted
+#
+# pip install flake8 flake8-bugbear
+#
+# This will warn/error on things that black does not fix, on purpose.
+
+[flake8]
+# max line length is set to 88 in black, here it is set to 80 and we enable bugbear's B950 warning, which is:
+#
+# B950: Line too long. This is a pragmatic equivalent of pycodestyle’s E501: it
+# considers “max-line-length” but only triggers when the value has been
+# exceeded by more than 10%. You will no longer be forced to reformat code due
+# to the closing parenthesis being one character too far to satisfy the linter.
+# At the same time, if you do significantly violate the line length, you will
+# receive a message that states what the actual limit is. This is inspired by
+# Raymond Hettinger’s “Beyond PEP 8” talk and highway patrol not stopping you
+# if you drive < 5mph too fast. Disable E501 to avoid duplicate warnings.
+
+exclude = src/histolab/filter.py # remove this line when filter is ok
+
+max-line-length = 80
+max-complexity = 12
+select = E,F,W,C,B,B9
+ignore =
+    # E123 closing bracket does not match indentation of opening bracket’s line
+    E123
+    # E203 whitespace before ‘:’ (Not PEP8 compliant, Python Black)
+    E203
+    # E501 line too long (82 > 79 characters) (replaced by B950 from flake8-bugbear, https://github.com/PyCQA/flake8-bugbear)
+    E501
+    # W503 line break before binary operator (Not PEP8 compliant, Python Black)
+    W503
+    # C901 function too complex - since many of zz9 functions are too complex with a lot of if branching
+    C901
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+#IDE
+.idea
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -50,23 +53,12 @@ coverage.xml
 *.py,cover
 .hypothesis/
 .pytest_cache/
+coverage_html_report
 
 # Translations
 *.mo
 *.pot
 
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
 
 # Sphinx documentation
 docs/_build/
@@ -90,13 +82,11 @@ ipython_config.py
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
+Pipfile
 
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/
 
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
 
 # SageMath parsed files
 *.sage.py
@@ -110,6 +100,7 @@ ENV/
 env.bak/
 venv.bak/
 
+
 # Spyder project settings
 .spyderproject
 .spyproject
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
+repos:
+-   repo: https://github.com/ambv/black
+    rev: stable
+    hooks:
+    - id: black
+      language_version: python3.7
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.7.9
+    hooks:
+    - id: flake8
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,24 @@
+# Wire up travis
+language: python
+sudo: false
+
+python:
+  - "3.6"
+  - "3.7"
+
+
+# command to install dependencies
+install:
+  - pip install -r requirements.txt
+  - pip install -r requirements-dev.txt
+
+# command to run tests
+script:
+  - flake8 .
+  - pytest --cov=gdcapiwrapper
+
+after_success:
+  - coveralls
+
+notifications:
+  slack: mpba:fBrjUWlUSa3XKeCA8zFsTFu2
diff --git a/README.md b/README.md
@@ -1,2 +1,12 @@
-# gdc-api-wrapper
 Genomic Data Commons API wrapper
+================================
+A simple Python wrapper for the [GDC Application Programming Interface (API)](https://portal.gdc.cancer.gov/)
+
+[![Build Status](https://travis-ci.com/histolab/gdc-api-wrapper.svg?branch=master)](https://travis-ci.com/histolab/gdc-api-wrapper)
+[![Coverage Status](https://coveralls.io/repos/github/histolab/gdc-api-wrapper/badge.svg?branch=master)](https://coveralls.io/github/histolab/gdc-api-wrapper?branch=master)
+
+The GDC API drives the GDC Data and Submission Portals and provides programmatic access to GDC functionality. This includes searching for, downloading, and submitting data and metadata.
+
+## Features implemented
+- Downloading a Single File using GET 
+- Downloading Multiple Files using POST
diff --git a/gdcapiwrapper/__init__.py b/gdcapiwrapper/__init__.py
@@ -0,0 +1,29 @@
+# encoding: utf-8
+
+import os
+
+import requests
+
+GDC_API_TOKEN = os.environ.get("GCC_API_TOKEN", None)
+GDC_API_BASE_URL = os.environ.get("GDC_API_BASE_URL", "https://api.gdc.cancer.gov/")
+
+
+class APIBaseURLStatusError(Exception):
+    pass
+
+
+class APITokenMissingError(Exception):
+    pass
+
+
+request = requests.get(f"{GDC_API_BASE_URL}/status")
+if request.status_code != 200:
+    raise APIBaseURLStatusError(
+        f"{GDC_API_BASE_URL} status: {request.status_code}."
+        "The resource seems to be unavailable"
+    )
+
+session = requests.Session()
+session.params = {"api_token": GDC_API_TOKEN, "api_base_url": GDC_API_BASE_URL}
+
+from .data import Data  # isort:skip # noqa
diff --git a/gdcapiwrapper/data.py b/gdcapiwrapper/data.py
@@ -0,0 +1,53 @@
+# encoding: utf-8
+
+
+import os
+import re
+from datetime import datetime
+from typing import Tuple
+
+import requests
+from responses import Response
+from tqdm import tqdm
+
+from . import session
+from .util import copyfileobj
+
+__data_endpoint__ = "data"
+
+base_url = f"{session.params.get('api_base_url')}/{__data_endpoint__}"
+
+
+class Data(object):
+    @classmethod
+    def download(
+        cls, uuid: str, path: str = ".", name: str = None
+    ) -> Tuple[Response, str]:
+        url = f"{base_url}/{uuid}"
+        local_filename = uuid if not name else name
+        with requests.get(url, stream=True) as r:
+            total_size = int(r.headers.get("content-length", 0))
+            bar = tqdm(total=total_size, unit="iB", unit_scale=True)
+            with open(os.path.join(path, local_filename), "wb") as f:
+                copyfileobj(r.raw, f, bar)
+        return r, local_filename
+
+    @classmethod
+    def download_multiple(
+        cls, uuid_list: list, path: str = "."
+    ) -> Tuple[Response, str]:
+        with requests.post(base_url, stream=True, data={"ids": uuid_list}) as r:
+            d = r.headers["content-disposition"]
+            fname = re.findall("filename=(.+)", d)[0]
+            local_filename = (
+                fname
+                if fname
+                else f"gdc_download_{datetime.now().strftime('%Y%m%d%H%M%S')}.tar.gz"
+            )
+            total_size = int(r.headers.get("content-length", 0))
+            bar = tqdm(total=total_size, unit="iB", unit_scale=True)
+            with open(os.path.join(path, local_filename), "wb") as f:
+                for data in r.iter_content(chunk_size=1024):
+                    size = f.write(data)
+                    bar.update(size)
+        return r, local_filename
diff --git a/gdcapiwrapper/util.py b/gdcapiwrapper/util.py
@@ -0,0 +1,11 @@
+# encoding: utf-8
+
+
+def copyfileobj(fsrc, fdst, progressbar, length=16 * 1024):
+    """copy data from file-like object fsrc to file-like object fdst"""
+    while 1:
+        buf = fsrc.read(length)
+        progressbar.update(len(buf))
+        if not buf:
+            break
+        fdst.write(buf)
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,16 @@
+[tool.black]
+line-length = 88
+include = '\.pyi?$'
+exclude = '''
+/(
+    \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | _build
+  | buck-out
+  | build
+  | dist
+)/
+'''
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,7 @@
+pytest
+pytest-cov==2.8.1
+coveralls
+coverage
+flake8
+ipdb
+isort
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+requests[security]
+responses
+tqdm
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,21 @@
+[tool:pytest]
+looponfailroots = tests
+python_classes = Test Describe
+python_files = test_*.py
+python_functions = test_ it_ they_ but_ and_it_ or_it
+testpaths =
+    tests
+
+[flake8]
+ignore =
+    E203  ; whitespace before ':' like list[len(x) :] as Black prefers it
+    W503  ; line break before binary operator (e.g. '+', 'and', '|')
+max-line-length = 88
+
+[settings]
+multi_line_output=3
+include_trailing_comma=True
+force_grid_wrap=0
+use_parentheses=True
+line_length=88
+not_skip = __init__.py
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/mockserver.py b/tests/mockserver.py
@@ -0,0 +1,59 @@
+# encoding: utf-8
+
+import json
+import re
+import socket
+import time
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from threading import Thread
+
+import requests
+
+
+class MockServerRequestHandler(BaseHTTPRequestHandler):
+    API_PATTERN = re.compile(r"/data|/")
+
+    def do_GET(self):
+        if re.search(self.API_PATTERN, self.path):
+            # Add response status code.
+            self.send_response(requests.codes.ok)
+
+            # Add response headers.
+            self.send_header("Content-Type", "application/json; charset=utf-8")
+            self.end_headers()
+
+            # Add response content.
+            response_content = json.dumps([])
+            self.wfile.write(response_content.encode("utf-8"))
+            return
+
+    def do_POST(self):
+        if re.search(self.API_PATTERN, self.path):
+            # Add response status code.
+            self.send_response(requests.codes.ok)
+
+            # Add response headers.
+            self.send_header("Content-Type", "application/json; charset=utf-8")
+            self.send_header("content-disposition", "attachment; filename=fake.gzip")
+            self.end_headers()
+
+            # Add response content.
+            response_content = json.dumps([])
+            self.wfile.write(response_content.encode("utf-8"))
+            return
+
+
+def get_free_port():
+    s = socket.socket(socket.AF_INET, type=socket.SOCK_STREAM)
+    s.bind(("localhost", 0))
+    address, port = s.getsockname()
+    s.close()
+    return port
+
+
+def start_mock_server(port):
+    time.sleep(1)
+    mock_server = HTTPServer(("localhost", port), MockServerRequestHandler)
+    mock_server_thread = Thread(target=mock_server.serve_forever)
+    mock_server_thread.setDaemon(True)
+    mock_server_thread.start()
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
diff --git a/tests/unit/test_data.py b/tests/unit/test_data.py