Improved csv I/O support and refacored io dict and utils.

fabiocaccamo · fabiocaccamo · commit b61eb4ea4479 · 2019-11-07T17:45:45.000+01:00
diff --git a/benedict/dicts/__init__.py b/benedict/dicts/__init__.py
@@ -52,8 +52,13 @@ def fromkeys(cls, sequence, value=None):
 
     @staticmethod
     @benediction
-    def from_base64(s, format='json', **kwargs):
-        return IODict.from_base64(s, format, **kwargs)
+    def from_base64(s, subformat='json', encoding='utf-8', **kwargs):
+        return IODict.from_base64(s, subformat=subformat, encoding=encoding, **kwargs)
+
+    @staticmethod
+    @benediction
+    def from_csv(s, columns=None, columns_row=True, **kwargs):
+        return IODict.from_csv(s, columns=columns, columns_row=columns_row, **kwargs)
 
     @staticmethod
     @benediction
diff --git a/benedict/dicts/io.py b/benedict/dicts/io.py
@@ -8,30 +8,34 @@
 class IODict(dict):
 
     def __init__(self, *args, **kwargs):
-        # if first argument is data-string,
-        # try to decode it using all decoders.
+        # if first argument is data-string try to decode it.
+        # use 'format' kwarg to specify the decoder to use, default 'json'.
         if len(args) and isinstance(args[0], string_types):
-            d = IODict._from_any_data_string(args[0], **kwargs)
+            s = args[0]
+            format = kwargs.pop('format', 'json').lower()
+            if format in ['b64', 'base64']:
+                kwargs.setdefault('subformat', 'json')
+            # decode data-string and initialize with dict data.
+            d = IODict._decode(s, format, **kwargs)
             if d and isinstance(d, dict):
-                args = list(args)
-                args[0] = d
-                args = tuple(args)
+                super(IODict, self).__init__(d)
             else:
                 raise ValueError('Invalid string data input.')
-        super(IODict, self).__init__(*args, **kwargs)
+        else:
+            super(IODict, self).__init__(*args, **kwargs)
 
     @staticmethod
-    def _decode(s, decoder, **kwargs):
+    def _decode(s, format, **kwargs):
         d = None
         try:
             content = io_util.read_content(s)
-            # decode content using the given decoder
-            data = decoder(content, **kwargs)
+            # decode content using the given format
+            data = io_util.decode(content, format, **kwargs)
             if isinstance(data, dict):
                 d = data
             elif isinstance(data, list):
                 # force list to dict
-                d = { 'values':data }
+                d = { 'values': data }
             else:
                 raise ValueError(
                     'Invalid data type: {}, expected dict or list.'.format(type(data)))
@@ -41,90 +45,66 @@ def _decode(s, decoder, **kwargs):
         return d
 
     @staticmethod
-    def _encode(d, encoder, filepath=None, **kwargs):
-        s = encoder(d, **kwargs)
+    def _encode(d, format, **kwargs):
+        filepath = kwargs.pop('filepath', None)
+        s = io_util.encode(d, format, **kwargs)
         if filepath:
             io_util.write_file(filepath, s)
         return s
 
     @staticmethod
-    def _from_any_data_string(s, **kwargs):
-        funcs = [
-            IODict.from_base64,
-            IODict.from_json,
-            IODict.from_query_string,
-            IODict.from_toml,
-            IODict.from_xml,
-            IODict.from_yaml,
-        ]
-        for f in funcs:
-            try:
-                options = kwargs.copy()
-                d = f(s, **options)
-                return d
-            except ValueError:
-                pass
+    def from_base64(s, subformat='json', encoding='utf-8', **kwargs):
+        kwargs['subformat'] = subformat
+        kwargs['encoding'] = encoding
+        return IODict._decode(s, 'base64', **kwargs)
 
     @staticmethod
-    def from_base64(s, format='json', encoding='utf-8', **kwargs):
-        kwargs['format'] = format
-        kwargs['encoding'] = encoding
-        return IODict._decode(s,
-            decoder=io_util.decode_base64, **kwargs)
+    def from_csv(s, columns=None, columns_row=True, **kwargs):
+        kwargs['columns'] = columns
+        kwargs['columns_row'] = columns_row
+        return IODict._decode(s, 'csv', **kwargs)
 
     @staticmethod
     def from_json(s, **kwargs):
-        return IODict._decode(s,
-            decoder=io_util.decode_json, **kwargs)
+        return IODict._decode(s, 'json', **kwargs)
 
     @staticmethod
     def from_query_string(s, **kwargs):
-        return IODict._decode(s,
-            decoder=io_util.decode_query_string, **kwargs)
+        return IODict._decode(s, 'query_string', **kwargs)
 
     @staticmethod
     def from_toml(s, **kwargs):
-        return IODict._decode(s,
-            decoder=io_util.decode_toml, **kwargs)
+        return IODict._decode(s, 'toml', **kwargs)
 
     @staticmethod
     def from_xml(s, **kwargs):
-        return IODict._decode(s,
-            decoder=io_util.decode_xml, **kwargs)
+        return IODict._decode(s, 'xml', **kwargs)
 
     @staticmethod
     def from_yaml(s, **kwargs):
-        return IODict._decode(s,
-            decoder=io_util.decode_yaml, **kwargs)
+        return IODict._decode(s, 'yaml', **kwargs)
 
-    def to_base64(self, filepath=None, format='json', encoding='utf-8', **kwargs):
-        kwargs['format'] = format
+    def to_base64(self, subformat='json', encoding='utf-8', **kwargs):
+        kwargs['subformat'] = subformat
         kwargs['encoding'] = encoding
-        return IODict._encode(self,
-            encoder=io_util.encode_base64,
-            filepath=filepath, **kwargs)
-
-    def to_json(self, filepath=None, **kwargs):
-        return IODict._encode(self,
-            encoder=io_util.encode_json,
-            filepath=filepath, **kwargs)
-
-    def to_query_string(self, filepath=None, **kwargs):
-        return IODict._encode(self,
-            encoder=io_util.encode_query_string,
-            filepath=filepath, **kwargs)
-
-    def to_toml(self, filepath=None, **kwargs):
-        return IODict._encode(self,
-            encoder=io_util.encode_toml,
-            filepath=filepath, **kwargs)
-
-    def to_xml(self, filepath=None, **kwargs):
-        return IODict._encode(self,
-            encoder=io_util.encode_xml,
-            filepath=filepath, **kwargs)
-
-    def to_yaml(self, filepath=None, **kwargs):
-        return IODict._encode(self,
-            encoder=io_util.encode_yaml,
-            filepath=filepath, **kwargs)
+        return IODict._encode(self, 'base64', **kwargs)
+
+    def to_csv(self, key='values', columns=None, columns_row=True, **kwargs):
+        kwargs['columns'] = columns
+        kwargs['columns_row'] = columns_row
+        return IODict._encode(self[key], 'csv', **kwargs)
+
+    def to_json(self, **kwargs):
+        return IODict._encode(self, 'json', **kwargs)
+
+    def to_query_string(self, **kwargs):
+        return IODict._encode(self, 'query_string', **kwargs)
+
+    def to_toml(self, **kwargs):
+        return IODict._encode(self, 'toml', **kwargs)
+
+    def to_xml(self, **kwargs):
+        return IODict._encode(self, 'xml', **kwargs)
+
+    def to_yaml(self, **kwargs):
+        return IODict._encode(self, 'yaml', **kwargs)
diff --git a/benedict/utils/io_util.py b/benedict/utils/io_util.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 from six import binary_type, string_types, StringIO
+from slugify import slugify
 
 import base64
 import csv
@@ -27,6 +28,16 @@
     from urlparse import parse_qs
 
 
+def decode(s, format, **kwargs):
+    decode_func = _get_format_decoder(format)
+    if decode_func:
+        decode_opts = kwargs.copy()
+        data = decode_func(s.strip(), **decode_opts)
+        return data
+    else:
+        raise ValueError('Invalid format: {}.'.format(format))
+
+
 def decode_base64(s, **kwargs):
     # fix urlencoded chars
     s = unquote(s)
@@ -35,18 +46,12 @@ def decode_base64(s, **kwargs):
     if m != 0:
         s += '=' * (4 - m)
     data = base64.b64decode(s)
-    format = kwargs.pop('format', None)
-    encoding = kwargs.pop('encoding', 'utf-8' if format else None)
+    subformat = kwargs.pop('subformat', None)
+    encoding = kwargs.pop('encoding', 'utf-8' if subformat else None)
     if encoding:
         data = data.decode(encoding)
-        if format:
-            decoders = {
-                'json': decode_json,
-                'toml': decode_toml,
-                'yaml': decode_yaml,
-                'xml': decode_xml,
-            }
-            decode_func = decoders.get(format.lower(), '')
+        if subformat:
+            decode_func = _get_format_decoder(subformat)
             if decode_func:
                 data = decode_func(data, **kwargs)
     return data
@@ -109,18 +114,21 @@ def decode_yaml(s, **kwargs):
     return data
 
 
+def encode(d, format, **kwargs):
+    encode_func = _get_format_encoder(format)
+    if encode_func:
+        s = encode_func(d, **kwargs)
+        return s
+    else:
+        raise ValueError('Invalid format: {}.'.format(format))
+
+
 def encode_base64(d, **kwargs):
     data = d
-    format = kwargs.pop('format', None)
-    encoding = kwargs.pop('encoding', 'utf-8' if format else None)
-    if not isinstance(data, string_types) and format:
-        encoders = {
-            'json': encode_json,
-            'toml': encode_toml,
-            'yaml': encode_yaml,
-            'xml': encode_xml,
-        }
-        encode_func = encoders.get(format.lower(), '')
+    subformat = kwargs.pop('subformat', None)
+    encoding = kwargs.pop('encoding', 'utf-8' if subformat else None)
+    if not isinstance(data, string_types) and subformat:
+        encode_func = _get_format_encoder(subformat)
         if encode_func:
             data = encode_func(data, **kwargs)
     if isinstance(data, string_types) and encoding:
@@ -225,3 +233,60 @@ def write_file(filepath, content):
     handler.write(content)
     handler.close()
     return True
+
+
+_formats = {
+    'b64': {
+        'decoder': decode_base64,
+        'encoder': encode_base64,
+    },
+    'base64': {
+        'decoder': decode_base64,
+        'encoder': encode_base64,
+    },
+    'csv': {
+        'decoder': decode_csv,
+        'encoder': encode_csv,
+    },
+    'json': {
+        'decoder': decode_json,
+        'encoder': encode_json,
+    },
+    'qs': {
+        'decoder': decode_query_string,
+        'encoder': encode_query_string,
+    },
+    'query_string': {
+        'decoder': decode_query_string,
+        'encoder': encode_query_string,
+    },
+    'toml': {
+        'decoder': decode_toml,
+        'encoder': encode_toml,
+    },
+    'yaml': {
+        'decoder': decode_yaml,
+        'encoder': encode_yaml,
+    },
+    'yml': {
+        'decoder': decode_yaml,
+        'encoder': encode_yaml,
+    },
+    'xml': {
+        'decoder': decode_xml,
+        'encoder': encode_xml,
+    },
+}
+
+
+def _get_format(format):
+    return _formats.get(
+        slugify(format, separator='_'), {})
+
+
+def _get_format_decoder(format):
+    return _get_format(format).get('decoder', None)
+
+
+def _get_format_encoder(format):
+    return _get_format(format).get('encoder', None)
diff --git a/tests/input/invalid-content.csv b/tests/input/invalid-content.csv
@@ -0,0 +1,2 @@
+Lorem ipsum consectetur sint id aute officia sed excepteur consectetur labore laboris dolore in labore consequat ut in eu ut deserunt.
+Elit aliqua velit aliquip voluptate consequat reprehenderit occaecat dolor ut esse aute laboris cillum fugiat esse est laborum.
diff --git a/tests/input/valid-content.csv b/tests/input/valid-content.csv
@@ -0,0 +1,5 @@
+id,name,age,height,weight
+1,Alice,20,62,120.6
+2,Freddie,21,74,190.6
+3,Bob,17,68,120.0
+4,François,32,75,110.05
diff --git a/tests/test_benedict.py b/tests/test_benedict.py
diff --git a/tests/test_io_dict.py b/tests/test_io_dict.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+Lorem ipsum consectetur sint id aute officia sed excepteur consectetur labore laboris dolore in labore consequat ut in eu ut deserunt.`
	`2`	`+Elit aliqua velit aliquip voluptate consequat reprehenderit occaecat dolor ut esse aute laboris cillum fugiat esse est laborum.`