Fix linting errors/warnings and remove unused imports

FelixAbrahamsson · FelixAbrahamsson · commit b5e49fa99840 · 2020-06-30T15:24:18.000+02:00
diff --git a/datastream/dataset.py b/datastream/dataset.py
@@ -4,8 +4,6 @@
     Tuple, Callable, Any, Union, List, TypeVar, Generic, Dict, Optional
 )
 from pathlib import Path
-from functools import partial
-from itertools import repeat, chain
 import numpy as np
 import pandas as pd
 import torch
@@ -15,9 +13,10 @@
 T = TypeVar('T')
 R = TypeVar('R')
 
+
 class Dataset(BaseModel, torch.utils.data.Dataset, Generic[T]):
     '''
-    A ``Dataset[T]`` is a mapping that allows pipelining of functions in a 
+    A ``Dataset[T]`` is a mapping that allows pipelining of functions in a
     readable syntax returning an item of type ``T``.
 
         >>> from datastream import Dataset
@@ -67,8 +66,8 @@ def from_subscriptable(subscriptable) -> Dataset:
         Create ``Dataset`` based on subscriptable i.e. implements
         ``__getitem__`` and ``__len__``. Should only be used for simple
         examples as a ``Dataset`` created with this method does not support
-        methods that require a source dataframe (i.e. :func:``Dataset.split`` 
-        and :func:``Dataset.subset``)
+        methods that require a source dataframe (i.e. :func:`Dataset.split`
+        and :func:`Dataset.subset`)
         '''
 
         return (
@@ -138,7 +137,7 @@ def map(
     def subset(
         self, mask_fn: Callable[
             [pd.DataFrame], Union[pd.Series, np.array, List[bool]]
-        ]  
+        ]
     ) -> Dataset[T]:
         '''
         Select a subset of the dataset using a function that receives the
@@ -191,7 +190,7 @@ def split(
         safely use a seed instead of a filepath.
 
         Saved splits can continue from the old split and handles:
-        
+
         * New examples
         * Changing test size
         * Adapt after removing examples from dataset
@@ -441,7 +440,7 @@ def test_combine_dataset():
         )
         for index, inner_indices in enumerate(indices)
     )
-   
+
 
 def test_split_dataset():
     dataset = Dataset.from_dataframe(pd.DataFrame(dict(
diff --git a/datastream/datastream.py b/datastream/datastream.py
@@ -5,9 +5,7 @@
     Dict,
     List,
     Callable,
-    Any,
     Optional,
-    Iterable,
     TypeVar,
     Generic,
     Union,
@@ -28,6 +26,7 @@
 T = TypeVar('T')
 R = TypeVar('R')
 
+
 class Datastream(BaseModel, Generic[T]):
     '''
     ``Datastream[T]`` combines a ``Dataset[T]`` and a sampler into a stream of
@@ -294,7 +293,9 @@ def ZippedMergedDatastream():
             (ZippedMergedDatastream(), 5),
         ])
 
-        it = iter(datastream.data_loader(batch_size=16, n_batches_per_epoch=10))
+        it = iter(datastream.data_loader(
+            batch_size=16, n_batches_per_epoch=10
+        ))
         for _ in range(10):
             print(next(it))
 
diff --git a/datastream/samplers.py b/datastream/samplers.py
@@ -1,13 +1,10 @@
 from __future__ import annotations
 from pydantic import BaseModel
-from typing import Tuple, Dict, Callable, Any, Optional, Iterable
+from typing import Tuple, Callable, Iterable
 from functools import partial
-from itertools import repeat, chain, islice
-from collections import namedtuple
-import numpy as np
-import pandas as pd
+from itertools import chain
 import torch
-from datastream.tools import starcompose, star, repeat_map_chain
+from datastream.tools import starcompose, repeat_map_chain
 from datastream import Dataset
 
 
@@ -47,7 +44,7 @@ def weight(self, index):
 
     def update_weights_(self, function):
         self.sampler.weights[:] = function(self.sampler.weights)
-        
+
     def update_example_weight_(self, weight, index):
         if hasattr(weight, 'item'):
             weight = weight.item()
@@ -213,7 +210,9 @@ def update_weights_(self, function):
 
     def update_example_weight_(self, weights, index):
         inner_indices = self.from_mapping(index)
-        for sampler, weight, inner_index in zip(self.samplers, weights, inner_indices):
+        for sampler, weight, inner_index in zip(
+            self.samplers, weights, inner_indices
+        ):
             sampler.update_example_weight_(
                 weight, inner_index
             )
@@ -318,7 +317,6 @@ def load_state_dict(self, state_dict):
             sampler.load_state_dict(state_dict)
 
 
-
 class RepeatSampler(BaseModel, torch.utils.data.Sampler):
     sampler: torch.utils.data.Sampler
     length: int