fixes for pypi

alimanfoo · alimanfoo · commit 778283f1362c · 2015-12-18T14:22:37.000Z
diff --git a/README.md b/README.md
diff --git a/README.rst b/README.rst
@@ -0,0 +1,97 @@
+zarr
+====
+
+A minimal implementation of chunked, compressed, N-dimensional arrays for 
+Python.
+
+Installation
+------------
+
+Install from GitHub (requires NumPy and Cython pre-installed)::
+
+    $ pip install -U git+https://github.com/alimanfoo/zarr.git@master
+
+Status
+------
+
+Highly experimental, pre-alpha. Bug reports and pull requests very welcome.
+
+Design goals
+------------
+
+* Chunking in multiple dimensions
+* Resize any dimension
+* Concurrent reads
+* Concurrent writes
+* Release the GIL during compression and decompression
+
+Usage
+-----
+
+Create an array::
+
+    >>> import numpy as np
+    >>> import zarr
+    >>> z = zarr.empty((10000, 1000), dtype='i4', chunks=(1000, 100))
+    >>> z
+    zarr.ext.Array((10000, 1000), int32, chunks=(1000, 100), nbytes=38.1M, cbytes=0, cname=blosclz, clevel=5, shuffle=1)
+
+Fill it with some data::
+
+    >>> z[:] = np.arange(10000000, dtype='i4').reshape(10000, 1000)
+    >>> z
+    zarr.ext.Array((10000, 1000), int32, chunks=(1000, 100), nbytes=38.1M, cbytes=2.0M, cratio=19.3, cname=blosclz, clevel=5, shuffle=1)
+
+Obtain a NumPy array::
+
+    >>> z[:]
+    array([[      0,       1,       2, ...,     997,     998,     999],
+           [   1000,    1001,    1002, ...,    1997,    1998,    1999],
+           [   2000,    2001,    2002, ...,    2997,    2998,    2999],
+           ...,
+           [9997000, 9997001, 9997002, ..., 9997997, 9997998, 9997999],
+           [9998000, 9998001, 9998002, ..., 9998997, 9998998, 9998999],
+           [9999000, 9999001, 9999002, ..., 9999997, 9999998, 9999999]], dtype=int32)
+
+Resize the array and add more data::
+
+    >>> z.resize(20000, 1000)
+    >>> z
+    zarr.ext.Array((20000, 1000), int32, chunks=(1000, 100), nbytes=76.3M, cbytes=2.0M, cratio=38.5, cname=blosclz, clevel=5, shuffle=1)
+    >>> z[10000:, :] = np.arange(10000000, dtype='i4').reshape(10000, 1000)
+    >>> z
+    zarr.ext.Array((20000, 1000), int32, chunks=(1000, 100), nbytes=76.3M, cbytes=4.0M, cratio=19.3, cname=blosclz, clevel=5, shuffle=1)
+
+For convenience, an `append` method is also available, which can be used to
+append data to any axis:
+
+    >>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000)
+    >>> z = zarr.array(a, chunks=(1000, 100))
+    >>> z
+    zarr.ext.Array((10000, 1000), int32, chunks=(1000, 100), nbytes=38.1M, cbytes=2.0M, cratio=19.3, cname=blosclz, clevel=5, shuffle=1)
+    >>> z.append(a+a)
+    >>> z
+    zarr.ext.Array((20000, 1000), int32, chunks=(1000, 100), nbytes=76.3M, cbytes=3.6M, cratio=21.2, cname=blosclz, clevel=5, shuffle=1)
+    >>> z.append(np.vstack([a, a]), axis=1)
+    >>> z
+    zarr.ext.Array((20000, 2000), int32, chunks=(1000, 100), nbytes=152.6M, cbytes=7.6M, cratio=20.2, cname=blosclz, clevel=5, shuffle=1)
+
+Tuning
+------
+
+``zarr`` is designed for use in parallel computations working chunk-wise 
+over data. Try it with [dask.array](http://dask.pydata.org/en/latest/array.html).
+
+``zarr`` is optimised for accessing and storing data in contiguous slices, 
+of the same size or larger than chunks. It is not and will never be 
+optimised for single item access. 
+
+Chunks sizes >= 1M are generally good. Optimal chunk shape will depend on 
+the correlation structure in your data.
+
+Acknowledgments
+---------------
+
+``zarr`` uses [c-blosc](https://github.com/Blosc/c-blosc) internally for 
+compression and decompression and borrows code heavily from 
+[bcolz](http://bcolz.blosc.org/).
diff --git a/setup.py b/setup.py
@@ -26,17 +26,17 @@
 
 
 extra_compile_args = []
-if re.match("i.86|x86|AMD", platform.machine()) is not None:
+if re.match('i.86|x86|AMD', platform.machine()) is not None:
     # always enable SSE2 for AMD/Intel machines
     extra_compile_args.append('-DSHUFFLE_SSE2_ENABLED')
 
 is_32bit = ctypes.sizeof(ctypes.c_voidp) == 4
 if is_32bit:
     if os.name == 'posix':
-        extra_compile_args.append("-msse2")
+        extra_compile_args.append('-msse2')
     elif os.name == 'nt':
         # this is currently broken for windows
-        extra_compile_args.append("/arch:sse2")
+        extra_compile_args.append('/arch:sse2')
 
 
 import numpy as np
@@ -56,7 +56,7 @@
 description = 'A minimal implementation of chunked, compressed, ' \
               'N-dimensional arrays for Python.'
 
-with open('README.md') as f:
+with open('README.rst') as f:
     long_description = f.read()
 
 setup(
@@ -75,6 +75,8 @@
         'setuptools-scm>1.5.4'
     ],
     ext_modules=ext_modules,
+    package_dir={'': '.'},
+    packages=['zarr', 'zarr.tests'],
     classifiers=[
         'Development Status :: 2 - Pre-Alpha',
         'Intended Audience :: Developers',