Linting with flake8 (#111)

mistercrunch · web-flow · commit e8d978b4544c · 2018-01-05T15:51:44.000-08:00
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,10 @@
+[flake8]
+application-import-names = pydruid
+exclude =
+    docs
+    env
+    tests
+    .eggs
+    build
+import-order-style = google
+max-line-length = 90
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,5 @@ __pycache__
 \#*#
 .#*
 *~
+.tox
+env
diff --git a/.travis.yml b/.travis.yml
@@ -1,9 +1,10 @@
 language: python
 python:
-  - "2.6"
   - "2.7"
-  - "3.2"
-  - "3.3"
   - "3.4"
-install: "python setup.py install"
+  - "3.6"
+install:
+  - pip install flake8
+  - python setup.py install
+before_script: flake8
 script: py.test
diff --git a/pydruid/async_client.py b/pydruid/async_client.py
@@ -29,10 +29,12 @@
 
 class AsyncPyDruid(BaseDruidClient):
     """
-    Asynchronous PyDruid client which mirrors functionality of the synchronous PyDruid, but it executes queries
+    Asynchronous PyDruid client which mirrors functionality of the synchronous
+    PyDruid, but it executes queries
     asynchronously (using an asynchronous http client from Tornado framework).
 
-    Returns Query objects that can be used for exporting query results into TSV files or pandas.DataFrame objects
+    Returns Query objects that can be used for exporting query results into
+    TSV files or pandas.DataFrame objects
     for subsequent analysis.
 
     :param str url: URL of Broker node in the Druid cluster
@@ -83,7 +85,8 @@ class AsyncPyDruid(BaseDruidClient):
 
             >>> print top.result
             >>> [{'timestamp': '2013-10-04T00:00:00.000Z',
-                'result': [{'count': 7.0, 'user_name': 'user_1'}, {'count': 6.0, 'user_name': 'user_2'}]}]
+                'result': [{'count': 7.0, 'user_name': 'user_1'},
+                {'count': 6.0, 'user_name': 'user_2'}]}]
 
             >>> df = top.export_pandas()
             >>> print df
@@ -100,7 +103,8 @@ def _post(self, query):
         http_client = AsyncHTTPClient()
         try:
             headers, querystr, url = self._prepare_url_headers_and_body(query)
-            response = yield http_client.fetch(url, method='POST', headers=headers, body=querystr)
+            response = yield http_client.fetch(
+                url, method='POST', headers=headers, body=querystr)
         except HTTPError as e:
             self.__handle_http_error(e, query)
         else:
diff --git a/pydruid/client.py b/pydruid/client.py
diff --git a/pydruid/query.py b/pydruid/query.py
@@ -27,8 +27,10 @@
 
 class Query(collections.MutableSequence):
     """
-    Query objects are produced by PyDruid clients and can be used for exporting query results into TSV files or
-    pandas.DataFrame objects for subsequent analysis. They also hold information about the issued query.
+    Query objects are produced by PyDruid clients and can be used for
+    exporting query results into TSV files or
+    pandas.DataFrame objects for subsequent analysis. They also hold
+    information about the issued query.
 
     Query acts as a wrapper over raw result list of dictionaries.
 
@@ -100,7 +102,8 @@ def export_tsv(self, dest_path):
             header.append('timestamp')
             header.append('version')
         else:
-            raise NotImplementedError('TSV export not implemented for query type: {0}'.format(self.query_type))
+            raise NotImplementedError(
+                'TSV export not implemented for query type: {0}'.format(self.query_type))
 
         w.writerow(header)
 
@@ -177,7 +180,9 @@ def export_pandas(self):
                 for item in self.result:
                     nres += [e.get('event') for e in item['result'].get('events')]
             else:
-                raise NotImplementedError('Pandas export not implemented for query type: {0}'.format(self.query_type))
+                raise NotImplementedError(
+                    'Pandas export not implemented for query '
+                    'type: {0}'.format(self.query_type))
 
             df = pandas.DataFrame(nres)
             return df
@@ -210,18 +215,22 @@ def parse_datasource(datasource, query_type):
         """
         Parse an input datasource object into valid dictionary
 
-        Input can be a string, in which case it is simply returned, or a list, when it is turned into
-        a UNION datasource.
+        Input can be a string, in which case it is simply returned, or a
+        list, when it is turned into a UNION datasource.
 
         :param datasource: datasource parameter
         :param string query_type: query type
         :raise ValueError: if input is not string or list of strings
         """
         if not (
                     isinstance(datasource, six.string_types) or
-                    (isinstance(datasource, list) and all([isinstance(x, six.string_types) for x in datasource]))
+                    (
+                        isinstance(datasource, list) and
+                        all([isinstance(x, six.string_types) for x in datasource])
+                    )
                 ):
-            raise ValueError('Datasource definition not valid. Must be string or list of strings')
+            raise ValueError(
+                'Datasource definition not valid. Must be string or list of strings')
         if isinstance(datasource, six.string_types):
             return datasource
         else:
@@ -232,8 +241,9 @@ def validate_query(query_type, valid_parts, args):
         """
         Validate the query parts so only allowed objects are sent.
 
-        Each query type can have an optional 'context' object attached which is used to set certain
-        query context settings, etc. timeout or priority. As each query can have this object, there's
+        Each query type can have an optional 'context' object attached which
+        is used to set certain query context settings, etc. timeout or
+        priority. As each query can have this object, there's
         no need for it to be sent - it might as well be added here.
 
         :param string query_type: a type of query
@@ -265,7 +275,8 @@ def build_query(self, query_type, args):
             if key == 'aggregations':
                 query_dict[key] = build_aggregators(val)
             elif key == 'post_aggregations':
-                query_dict['postAggregations'] = Postaggregator.build_post_aggregators(val)
+                query_dict['postAggregations'] = \
+                    Postaggregator.build_post_aggregators(val)
             elif key == 'context':
                 query_dict['context'] = val
             elif key == 'datasource':
@@ -290,8 +301,10 @@ def build_query(self, query_type, args):
 
     def topn(self, args):
         """
-        A TopN query returns a set of the values in a given dimension, sorted by a specified metric. Conceptually, a
-        topN can be thought of as an approximate GroupByQuery over a single dimension with an Ordering spec. TopNs are
+        A TopN query returns a set of the values in a given dimension,
+        sorted by a specified metric. Conceptually, a
+        topN can be thought of as an approximate GroupByQuery over a
+        single dimension with an Ordering spec. TopNs are
         faster and more resource efficient than GroupBy for this use case.
 
         :param dict args: dict of arguments
@@ -310,7 +323,8 @@ def topn(self, args):
 
     def timeseries(self, args):
         """
-        A timeseries query returns the values of the requested metrics (in aggregate) for each timestamp.
+        A timeseries query returns the values of the requested metrics
+        (in aggregate) for each timestamp.
 
         :param dict args: dict of args
 
@@ -327,7 +341,8 @@ def timeseries(self, args):
 
     def groupby(self, args):
         """
-        A group-by query groups a results set (the requested aggregate metrics) by the specified dimension(s).
+        A group-by query groups a results set (the requested aggregate
+        metrics) by the specified dimension(s).
 
         :param dict args: dict of args
 
diff --git a/pydruid/utils/aggregators.py b/pydruid/utils/aggregators.py
@@ -18,8 +18,13 @@
 from .filters import Filter
 
 
-def thetasketch(raw_column, isinputthetasketch = False, size = 16384):
-    return {"type": "thetaSketch", "fieldName": raw_column, "isInputThetaSketch": isinputthetasketch, "size": size}
+def thetasketch(raw_column, isinputthetasketch=False, size=16384):
+    return {
+        "type": "thetaSketch",
+        "fieldName": raw_column,
+        "isInputThetaSketch": isinputthetasketch,
+        "size": size,
+    }
 
 
 def min(raw_metric):
@@ -79,12 +84,16 @@ def filtered(filter, agg):
             "filter": Filter.build_filter(filter),
             "aggregator": agg}
 
+
 def javascript(columns_list, fn_aggregate, fn_combine, fn_reset):
-    return {"type": "javascript",
-            "fieldNames": columns_list,
-            "fnAggregate":fn_aggregate,
-            "fnCombine":fn_combine,
-            "fnReset":fn_reset}
+    return {
+        "type": "javascript",
+        "fieldNames": columns_list,
+        "fnAggregate": fn_aggregate,
+        "fnCombine": fn_combine,
+        "fnReset": fn_reset,
+    }
+
 
 def build_aggregators(agg_input):
     return [_build_aggregator(name, kwargs)
diff --git a/pydruid/utils/filters.py b/pydruid/utils/filters.py
@@ -128,7 +128,8 @@ def __eq__(self, func):
 
 class Bound(Filter):
     """
-    Bound filter can be used to filter by comparing dimension values to an upper value or/and a lower value. 
+    Bound filter can be used to filter by comparing dimension values to an
+    upper value or/and a lower value.
 
     :ivar str dimension: Dimension to filter on.
     :ivar str lower: Lower bound.
@@ -137,9 +138,11 @@ class Bound(Filter):
     :ivar bool upperStrict: Strict upper inclusion. Initial value: False
     :ivar bool alphaNumeric: Numeric comparison. Initial value: False
     """
-    def __init__(self, dimension, lower, upper, lowerStrict=False, upperStrict=False, alphaNumeric=False):
-
-        Filter.__init__(self,
+    def __init__(
+            self, dimension, lower, upper, lowerStrict=False,
+            upperStrict=False, alphaNumeric=False):
+        Filter.__init__(
+            self,
             type='bound', dimension=dimension,
             lower=lower, upper=upper,
             lowerStrict=lowerStrict, upperStrict=upperStrict,
@@ -148,14 +151,15 @@ def __init__(self, dimension, lower, upper, lowerStrict=False, upperStrict=False
 
 class Interval(Filter):
     """
-    Interval filter can be used to filter by comparing dimension(__time) values to a list of intervals.
+    Interval filter can be used to filter by comparing dimension(__time)
+    values to a list of intervals.
 
     :ivar str dimension: Dimension to filter on.
     :ivar list intervals: List of ISO-8601 intervals of data to filter out.
     """
     def __init__(self, dimension, intervals):
 
-        Filter.__init__(self,
+        Filter.__init__(
+            self,
             type='interval', dimension=dimension,
             intervals=intervals)
-
diff --git a/pydruid/utils/having.py b/pydruid/utils/having.py
@@ -22,7 +22,7 @@
 class Having:
     def __init__(self, **args):
 
-        if args['type'] in ('equalTo', 'lessThan','greaterThan'):
+        if args['type'] in ('equalTo', 'lessThan', 'greaterThan'):
             self.having = {'having': {'type': args['type'],
                                       'aggregation': args['aggregation'],
                                       'value': args['value']}}
@@ -50,12 +50,12 @@ def _combine(self, typ, x):
         if self.having['having']['type'] == typ:
             havingSpecs = self.having['having']['havingSpecs'] + [x.having['having']]
             return Having(type=typ, havingSpecs=havingSpecs)
-        elif x.having['having']['type']==typ:
+        elif x.having['having']['type'] == typ:
             havingSpecs = [self.having['having']] + x.having['having']['havingSpecs']
             return Having(type=typ, havingSpecs=havingSpecs)
         else:
             return Having(type=typ,
-                      havingSpecs=[self.having['having'], x.having['having']])
+                          havingSpecs=[self.having['having'], x.having['having']])
 
     def __and__(self, x):
         return self._combine('and', x)
diff --git a/pydruid/utils/postaggregator.py b/pydruid/utils/postaggregator.py
@@ -110,7 +110,9 @@ def __init__(self, fields, output_name=None):
 
         Postaggregator.__init__(self, None, None, name)
         self.post_aggregator = {
-                'type': 'doubleGreatest', 'name': name, 'fields': [f.post_aggregator for f in fields]}
+                'type': 'doubleGreatest',
+                'name': name,
+                'fields': [f.post_aggregator for f in fields]}
 
 
 class DoubleLeast(Postaggregator):
@@ -123,7 +125,9 @@ def __init__(self, fields, output_name=None):
 
         Postaggregator.__init__(self, None, None, name)
         self.post_aggregator = {
-                'type': 'doubleLeast', 'name': name, 'fields': [f.post_aggregator for f in fields]}
+                'type': 'doubleLeast',
+                'name': name,
+                'fields': [f.post_aggregator for f in fields]}
 
 
 class LongGreatest(Postaggregator):
@@ -136,7 +140,9 @@ def __init__(self, fields, output_name=None):
 
         Postaggregator.__init__(self, None, None, name)
         self.post_aggregator = {
-                'type': 'longGreatest', 'name': name, 'fields': [f.post_aggregator for f in fields]}
+                'type': 'longGreatest',
+                'name': name,
+                'fields': [f.post_aggregator for f in fields]}
 
 
 class LongLeast(Postaggregator):
@@ -149,7 +155,9 @@ def __init__(self, fields, output_name=None):
 
         Postaggregator.__init__(self, None, None, name)
         self.post_aggregator = {
-                'type': 'longLeast', 'name': name, 'fields': [f.post_aggregator for f in fields]}
+                'type': 'longLeast',
+                'name': name,
+                'fields': [f.post_aggregator for f in fields]}
 
 
 class ThetaSketchOp:
@@ -162,15 +170,15 @@ def __init__(self, fn, fields, name):
 
     def __or__(self, other):
         return ThetaSketchOp('UNION', self.fields(other),
-                              self.name + '_OR_' + other.name)
+                             self.name + '_OR_' + other.name)
 
     def __and__(self, other):
         return ThetaSketchOp('INTERSECT', self.fields(other),
-                              self.name + '_AND_' + other.name)
+                             self.name + '_AND_' + other.name)
 
     def __ne__(self, other):
         return ThetaSketchOp('NOT', self.fields(other),
-                              self.name + '_NOT_' + other.name)
+                             self.name + '_NOT_' + other.name)
 
     def fields(self, other):
         return [self.post_aggregator, other.post_aggregator]
@@ -194,10 +202,11 @@ def __init__(self, name):
 
 class ThetaSketchEstimate(Postaggregator):
     def __init__(self, fields):
+        field = fields.post_aggregator \
+            if type(fields) in [ThetaSketch, ThetaSketchOp] else fields
         self.post_aggregator = {
             'type': 'thetaSketchEstimate',
             'name': 'thetasketchestimate',
-            'field': fields.post_aggregator if type(fields) in [ThetaSketch, ThetaSketchOp] else fields}
+            'field': field,
+        }
         self.name = 'thetasketchestimate'
-
-
diff --git a/pydruid/utils/query_utils.py b/pydruid/utils/query_utils.py
@@ -20,7 +20,7 @@
 # this is necessary because the values in druid are not all ASCII.
 
 
-class UnicodeWriter:
+class UnicodeWriter(object):
 
     # delimiter="\t"
     def __init__(self, f, dialect="excel-tab", encoding="utf-8", **kwds):
@@ -31,7 +31,8 @@ def __init__(self, f, dialect="excel-tab", encoding="utf-8", **kwds):
     def __encode(self, data):
         data = str(data) if isinstance(data, six.integer_types) else data
         if not six.PY3:
-            data = data.encode('utf-8') if isinstance(data, unicode) else data
+            data = data.encode('utf-8') \
+                if isinstance(data, unicode) else data  # noqa
             data = data.decode('utf-8')
             return self.encoder.encode(data)
         return data
diff --git a/tests/utils/test_postaggregators.py b/tests/utils/test_postaggregators.py

-Original file line number
+Diff line change
 \#*#
 .#*
 *~
 +.tox
 +env