100% test coverage on wr.torch

igorborgest · igorborgest · commit 910e3b69a467 · 2020-04-27T20:11:08.000-03:00
diff --git a/.github/workflows/static-checking.yml b/.github/workflows/static-checking.yml
@@ -24,12 +24,8 @@ jobs:
         uses: actions/setup-python@v1
         with:
           python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements.txt
-          pip install -r requirements-dev.txt
-          pip install -r requirements-torch.txt
+      - name: Setup Environment
+        run: ./setup-dev-env.sh
       - name: CloudFormation Lint
         run: cfn-lint -t testing/cloudformation.yaml
       - name: Documentation Lint
diff --git a/awswrangler/torch.py b/awswrangler/torch.py
@@ -4,7 +4,6 @@
 import os
 import pathlib
 import re
-import tarfile
 from collections.abc import Iterable
 from io import BytesIO
 from typing import Any, Callable, Iterator, List, Optional, Tuple, Union
@@ -64,12 +63,12 @@ def _fetch_data(self, path: str) -> Any:
     def _load_data(data: io.BytesIO, path: str) -> Any:
         if path.endswith(".pt"):
             data = torch.load(data)
-        elif path.endswith(".tar.gz") or path.endswith(".tgz"):
-            tarfile.open(fileobj=data)
+        elif path.endswith(".tar.gz") or path.endswith(".tgz"):  # pragma: no cover
             raise NotImplementedError("Tar loader not implemented!")
+            # tarfile.open(fileobj=data)
             # tar = tarfile.open(fileobj=data)
             # for member in tar.getmembers():
-        else:
+        else:  # pragma: no cover
             raise NotImplementedError()
 
         return data
@@ -86,10 +85,10 @@ def __getitem__(self, index):
     def __len__(self):
         return len(self._paths)
 
-    def _data_fn(self, data) -> Any:
+    def _data_fn(self, data) -> Any:  # pragma: no cover
         raise NotImplementedError()
 
-    def _label_fn(self, path: str) -> Any:
+    def _label_fn(self, path: str) -> Any:  # pragma: no cover
         raise NotImplementedError()
 
 
@@ -100,7 +99,7 @@ def _label_fn(self, path: str) -> torch.Tensor:
         label = int(re.findall(r"/(.*?)=(.*?)/", path)[-1][1])
         return torch.tensor([label])  # pylint: disable=not-callable
 
-    def _data_fn(self, data) -> Any:
+    def _data_fn(self, data) -> Any:  # pragma: no cover
         raise NotImplementedError()
 
 
@@ -383,9 +382,8 @@ def __iter__(self) -> Union[Iterator[torch.Tensor], Iterator[Tuple[torch.Tensor,
                 pass
             elif isinstance(data, Iterable) and all([isinstance(d, torch.Tensor) for d in data]):
                 data = zip(*data)
-            else:
+            else:  # pragma: no cover
                 raise NotImplementedError(f"ERROR: Type: {type(data)} has not been implemented!")
-
             for d in data:
                 yield d
 
@@ -436,7 +434,7 @@ def __init__(
     def __iter__(self) -> Union[Iterator[torch.Tensor], Iterator[Tuple[torch.Tensor, torch.Tensor]]]:
         """Iterate over the Dataset."""
         if torch.utils.data.get_worker_info() is not None:  # type: ignore
-            raise NotImplementedError()
+            raise NotImplementedError()  # pragma: no cover
         db._validate_engine(con=self._con)  # pylint: disable=protected-access
         with self._con.connect() as con:
             cursor: Any = con.execute(self._sql)
diff --git a/testing/test_awswrangler/test_data_lake.py b/testing/test_awswrangler/test_data_lake.py
@@ -708,7 +708,7 @@ def test_parquet_validate_schema(bucket, database):
     df2 = pd.DataFrame({"id2": [1, 2, 3], "val": ["foo", "boo", "bar"]})
     path_file2 = f"s3://{bucket}/test_parquet_file_validate/1.parquet"
     wr.s3.to_parquet(df=df2, path=path_file2)
-    wr.s3.wait_objects_exist(paths=[path_file2])
+    wr.s3.wait_objects_exist(paths=[path_file2], use_threads=False)
     df3 = wr.s3.read_parquet(path=path, validate_schema=False)
     assert len(df3.index) == 6
     assert len(df3.columns) == 3
diff --git a/testing/test_awswrangler/test_torch.py b/testing/test_awswrangler/test_torch.py
@@ -84,7 +84,8 @@ def test_torch_sql(parameters, db_type, chunksize):
 
 @pytest.mark.parametrize("chunksize", [None, 1, 10])
 @pytest.mark.parametrize("db_type", ["mysql", "redshift", "postgresql"])
-def test_torch_sql_label(parameters, db_type, chunksize):
+@pytest.mark.parametrize("label_col", [2, "c"])
+def test_torch_sql_label(parameters, db_type, chunksize, label_col):
     schema = parameters[db_type]["schema"]
     table = f"test_torch_sql_label_{db_type}_{str(chunksize).lower()}"
     engine = wr.catalog.get_engine(connection=f"aws-data-wrangler-{db_type}")
@@ -99,7 +100,9 @@ def test_torch_sql_label(parameters, db_type, chunksize):
         chunksize=None,
         method=None,
     )
-    ts = list(wr.torch.SQLDataset(f"SELECT * FROM {schema}.{table}", con=engine, chunksize=chunksize, label_col=2))
+    ts = list(
+        wr.torch.SQLDataset(f"SELECT * FROM {schema}.{table}", con=engine, chunksize=chunksize, label_col=label_col)
+    )
     assert torch.all(ts[0][0].eq(torch.tensor([1.0, 4.0])))
     assert torch.all(ts[0][1].eq(torch.tensor([7], dtype=torch.long)))
     assert torch.all(ts[1][0].eq(torch.tensor([2.0, 5.0])))