Skip to content

Commit 83d2eef

Browse files
authored
Implemented table.size/hash (#256)
* Implemented table.size/hash * Bump tabulator
1 parent 8e092af commit 83d2eef

File tree

5 files changed

+83
-10
lines changed

5 files changed

+83
-10
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,14 @@ Constructor to instantiate `Table` class. If `references` argument is provided,
206206

207207
- `(Schema)` - returns schema class instance
208208

209+
#### `table.size`
210+
211+
- `(int/None)` - returns the table's size in BYTES if it's already read using e.g. `table.read`, otherwise returns `None`. In the middle of an iteration it returns size of already read contents
212+
213+
#### `table.hash`
214+
215+
- `(str/None)` - returns the table's SHA256 hash if it's already read using e.g. `table.read`, otherwise returns `None`. In the middle of an iteration it returns hash of already read contents
216+
209217
#### `table.iter(keyed=Fase, extended=False, cast=True, relations=False, foreign_keys_values=False)`
210218

211219
Iterates through the table data and emits rows cast based on table schema. Data casting can be disabled.

data/data.csv.zip

259 Bytes
Binary file not shown.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def read(*paths):
2929
'unicodecsv>=0.14',
3030
'isodate>=0.5.4',
3131
'rfc3986>=1.1.0',
32-
'tabulator>=1.20',
32+
'tabulator>=1.29',
3333
]
3434
TESTS_REQUIRE = [
3535
'mock',

tableschema/table.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class Table(object):
2222

2323
def __init__(self, source, schema=None, strict=False,
2424
post_cast=[], storage=None, **options):
25-
"""https://github.com/frictionlessdata/tableschema-py#schema
25+
"""https://github.com/frictionlessdata/tableschema-py#table
2626
"""
2727

2828
# Set attributes
@@ -56,19 +56,33 @@ def __init__(self, source, schema=None, strict=False,
5656

5757
@property
5858
def headers(self):
59-
"""https://github.com/frictionlessdata/tableschema-py#schema
59+
"""https://github.com/frictionlessdata/tableschema-py#table
6060
"""
6161
return self.__headers
6262

6363
@property
6464
def schema(self):
65-
"""https://github.com/frictionlessdata/tableschema-py#schema
65+
"""https://github.com/frictionlessdata/tableschema-py#table
6666
"""
6767
return self.__schema
6868

69+
@property
70+
def size(self):
71+
"""https://github.com/frictionlessdata/tableschema-py#table
72+
"""
73+
if self.__stream:
74+
return self.__stream.size
75+
76+
@property
77+
def hash(self):
78+
"""https://github.com/frictionlessdata/tableschema-py#table
79+
"""
80+
if self.__stream:
81+
return self.__stream.hash
82+
6983
def iter(self, keyed=False, extended=False, cast=True, relations=False,
7084
foreign_keys_values=False):
71-
"""https://github.com/frictionlessdata/tableschema-py#schema
85+
"""https://github.com/frictionlessdata/tableschema-py#table
7286
"""
7387

7488
# Prepare unique checks
@@ -160,7 +174,7 @@ def iter(self, keyed=False, extended=False, cast=True, relations=False,
160174

161175
def read(self, keyed=False, extended=False, cast=True, relations=False, limit=None,
162176
foreign_keys_values=False):
163-
"""https://github.com/frictionlessdata/tableschema-py#schema
177+
"""https://github.com/frictionlessdata/tableschema-py#table
164178
"""
165179
result = []
166180
rows = self.iter(keyed=keyed, extended=extended, cast=cast, relations=relations,
@@ -172,7 +186,7 @@ def read(self, keyed=False, extended=False, cast=True, relations=False, limit=No
172186
return result
173187

174188
def infer(self, limit=100, confidence=0.75):
175-
"""https://github.com/frictionlessdata/tableschema-py#schema
189+
"""https://github.com/frictionlessdata/tableschema-py#table
176190
"""
177191
if self.__schema is None or self.__headers is None:
178192

@@ -198,7 +212,7 @@ def infer(self, limit=100, confidence=0.75):
198212
return self.__schema.descriptor
199213

200214
def save(self, target, storage=None, **options):
201-
"""https://github.com/frictionlessdata/tableschema-py#schema
215+
"""https://github.com/frictionlessdata/tableschema-py#table
202216
"""
203217

204218
# Save (tabulator)

tests/test_table.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from __future__ import absolute_import
55
from __future__ import unicode_literals
66

7+
import six
78
import pytest
89
from copy import deepcopy
910
from mock import Mock, patch
@@ -193,6 +194,56 @@ def test_read_with_headers_field_names_mismatch():
193194
assert 'match schema field names' in str(excinfo.value)
194195

195196

197+
# Stats
198+
199+
def test_size():
200+
table = Table('data/data.csv')
201+
table.read()
202+
assert table.size == 63
203+
204+
205+
@pytest.mark.skipif(six.PY2, reason='Support only for Python3')
206+
def test_size_compressed():
207+
table = Table('data/data.csv.zip')
208+
table.read()
209+
assert table.size == 63
210+
211+
212+
def test_size_remote():
213+
table = Table(BASE_URL % 'data/data.csv')
214+
table.read()
215+
assert table.size == 63
216+
217+
218+
def test_size_not_read():
219+
table = Table(BASE_URL % 'data/data.csv')
220+
assert table.size is None
221+
222+
223+
def test_hash():
224+
table = Table('data/data.csv')
225+
table.read()
226+
assert table.hash == '328adab247692a1a405e83c2625d52e366389eabf8a1824931187877e8644774'
227+
228+
229+
@pytest.mark.skipif(six.PY2, reason='Support only for Python3')
230+
def test_hash_compressed():
231+
table = Table('data/data.csv.zip')
232+
table.read()
233+
assert table.hash == '328adab247692a1a405e83c2625d52e366389eabf8a1824931187877e8644774'
234+
235+
236+
def test_hash_remote():
237+
table = Table(BASE_URL % 'data/data.csv')
238+
table.read()
239+
assert table.hash == '328adab247692a1a405e83c2625d52e366389eabf8a1824931187877e8644774'
240+
241+
242+
def test_hash():
243+
table = Table(BASE_URL % 'data/data.csv')
244+
assert table.hash is None
245+
246+
196247
# Foreign keys
197248

198249
FK_SOURCE = [
@@ -367,5 +418,5 @@ def test_multiple_foreign_keys_same_field_invalid():
367418
table = Table(FK_SOURCE, schema=schema)
368419
with pytest.raises(exceptions.RelationError) as excinfo:
369420
table.read(relations=relations)
370-
assert 'Foreign key' in str(excinfo.value)
371-
421+
assert 'Foreign key' in str(excinfo.value)
422+

0 commit comments

Comments
 (0)