Updated version to 0.4

Ståle Undheim · Ståle Undheim · commit ce072f0c177d · 2014-05-28T08:32:06.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@ ignored-files/
 *.sublime-*
 .pypirc
 dist/
+.tox
diff --git a/README.rst b/README.rst
@@ -1,17 +1,26 @@
 python-xlsx
 ===========
 
-A small footprint xslx reader that understands shared strings and can process
+A small footprint xlsx reader that understands shared strings and can process
 excel dates.
 
+
+Requirements
+------------
+
+No external requirements.  Supports Python versions 2.6+ and 3.2+.
+
+
 Usage
-+++++++
+-----
 
 ::
 
     book = Workbook('filename or filedescriptor') #Open xlsx file
     for sheet in book:
         print sheet.name
+        # for larger workbooks, use sheet.rowsIter() instead of
+        # sheet.rows().iteritems()
         for row, cells in sheet.rows().iteritems(): # or sheet.cols()
             print row # prints row number
             for cell in cells:
@@ -22,6 +31,7 @@ Usage
     some_sheet = book['some sheet name']
     ...
 
+
 Alternatives
 ------------
 
diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup
 
 setup(
-    version="0.2",
+    version="0.4",
     name='py-xlsx',
     description="""Tiny python code for parsing data from Microsoft's Office
     Open XML Spreadsheet format""",
@@ -13,10 +13,18 @@
         'License :: OSI Approved :: MIT License',
         'Operating System :: OS Independent',
         'Programming Language :: Python',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.6',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.2',
+        'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
     ],
     author='Staale Undheim',
     author_email='staale@staale.org',
     url='http://github.com/staale/python-xlsx',
+    tests_require = ['six'],
     packages=[
         "xlsx"
     ],
diff --git a/tox.ini b/tox.ini
@@ -0,0 +1,18 @@
+[tox]
+envlist = py26, py27, py32, py33, py34
+
+
+[testenv]
+usedevelop = True
+deps=
+    six
+    pytest
+
+commands =
+    py.test xlsx/tests
+
+
+# py34 env not available in tox <= 1.7, so create it ourselves.
+[testenv:py34]
+basepython = python3.4
+deps = {[testenv]deps}
diff --git a/xlsx/__init__.py b/xlsx/__init__.py
@@ -1,11 +1,15 @@
 # -*- coding: utf-8 -*-
 """ Small footprint xlsx reader """
+
+from __future__ import unicode_literals
+
 __author__="Ståle Undheim <staale@staale.org>"
 
 import re
 import zipfile
-from xldate import xldate_as_tuple
-from formatting import is_date_format_string
+from xlsx.xldate import xldate_as_tuple
+from xlsx.formatting import is_date_format_string
+from xlsx.timemachine import UnicodeMixin
 
 try:
     from xml.etree import cElementTree as ET
@@ -26,6 +30,7 @@ def __init__(self, filename):
 
         """
 
+        self.ziphandle = None
         self.ziphandle = zipfile.ZipFile(filename, 'r')
 
     def __getitem__(self, key):
@@ -41,7 +46,8 @@ def __getitem__(self, key):
     def __del__(self):
         """Close the zip file when finished"""
 
-        self.ziphandle.close()
+        if self.ziphandle:
+            self.ziphandle.close()
 
 class Workbook(object):
     """Main class that contains sheets organized by name or by id.
@@ -127,17 +133,16 @@ def __init__(self, workbook, id, name):
         self.loaded = False
         self.addrPattern = re.compile("([a-zA-Z]*)(\d*)")
         self.__cells = {}
-        self.__cols = {}
-        self.__rows = {}
+        self.__cols = None
+        self.__rows = None
 
-    def __load(self):
+    def rowsIter(self):
         sheetDoc = self.workbook.domzip["xl/worksheets/sheet%d.xml" % self.id]
         sheetData = sheetDoc.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}sheetData")
         # @type sheetData Element
-        rows = {}
-        columns = {}
         for rowNode in sheetData:
             rowNum = int(rowNode.get("r"))
+            rowCells = []
             for columnNode in rowNode:
                 colType = columnNode.get("t")
                 cellId = columnNode.get("r")
@@ -167,14 +172,22 @@ def __load(self):
                         formula = columnNode.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}f").text
                 except Exception:
                     raise #pass
-                if not rowNum in rows:
-                    rows[rowNum] = []
+                cell = Cell(rowNum, colNum, data, formula=formula)
+                rowCells.append(cell)
+            yield rowNum, rowCells
+
+    def __load(self):
+        rows = {}
+        columns = {}
+        for rowNum, row in self.rowsIter():
+            rows[rowNum] = row
+
+            for cell in row:
+                colNum = cell.column
                 if not colNum in columns:
                     columns[colNum] = []
-                cell = Cell(rowNum, colNum, data, formula=formula)
-                rows[rowNum].append(cell)
+                self.__cells[cell.id] = cell
                 columns[colNum].append(cell)
-                self.__cells[cellId] = cell
         self.__rows = rows
         self.__cols = columns
         self.loaded=True
@@ -207,10 +220,16 @@ def __iter__(self):
             self.__load()
         return self.__cells.__iter__()
 
+<<<<<<< HEAD
     def __repr__(self):
         return "%r[%r]"%(self.workbook, self.name)
 
 class Cell(object):
+=======
+
+class Cell(UnicodeMixin):
+
+>>>>>>> 5fa8fa8761d3bfcb3ce1b1b730913f6d4d0ab0c9
     def __init__(self, row, column, value, formula=None):
         self.row = int(row)
         self.column = column
@@ -248,5 +267,5 @@ def __ge__(self, other):
         return self.__cmp__(other) != -1
 
     def __unicode__(self):
-        return u"<Cell [%s] : \"%s\" (%s)>" % (self.id, self.value,
-                                               self.formula, )
+        return "<Cell [%s] : \"%s\" (%s)>" % (self.id, self.value,
+                                              self.formula, )
diff --git a/xlsx/formatting.py b/xlsx/formatting.py
@@ -26,32 +26,33 @@
 # 2007-09-08 SJM Work around corrupt STYLE record
 # 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file
 
+from __future__ import unicode_literals
 import re
 
 
-date_chars = u'ymdhs' # year, month/minute, day, hour, second
+date_chars = 'ymdhs' # year, month/minute, day, hour, second
 date_char_dict = {}
 for _c in date_chars + date_chars.upper():
     date_char_dict[_c] = 5
 del _c, date_chars
 
 skip_char_dict = {}
-for _c in u'$-+/(): ':
+for _c in '$-+/(): ':
     skip_char_dict[_c] = 1
 
 num_char_dict = {
-    u'0': 5,
-    u'#': 5,
-    u'?': 5,
+    '0': 5,
+    '#': 5,
+    '?': 5,
     }
 
 non_date_formats = {
-    u'0.00E+00':1,
-    u'##0.0E+0':1,
-    u'General' :1,
-    u'GENERAL' :1, # OOo Calc 1.1.4 does this.
-    u'general' :1,  # pyExcelerator 0.6.3 does this.
-    u'@'       :1,
+    '0.00E+00':1,
+    '##0.0E+0':1,
+    'General' :1,
+    'GENERAL' :1, # OOo Calc 1.1.4 does this.
+    'general' :1,  # pyExcelerator 0.6.3 does this.
+    '@'       :1,
     }
 
 fmt_bracketed_sub = re.compile(r'\[[^]]*\]').sub
@@ -69,35 +70,35 @@ def is_date_format_string(fmt):
     # TODO: u'[h]\\ \\h\\o\\u\\r\\s' ([h] means don't care about hours > 23)
     state = 0
     s = ''
-    ignorable = skip_char_dict.has_key
+    ignorable = lambda key: key in skip_char_dict
     for c in fmt:
         if state == 0:
-            if c == u'"':
+            if c == '"':
                 state = 1
-            elif c in ur"\_*":
+            elif c in r"\_*":
                 state = 2
             elif ignorable(c):
                 pass
             else:
                 s += c
         elif state == 1:
-            if c == u'"':
+            if c == '"':
                 state = 0
         elif state == 2:
             # Ignore char after backslash, underscore or asterisk
             state = 0
         assert 0 <= state <= 2
     s = fmt_bracketed_sub('', s)
-    if non_date_formats.has_key(s):
+    if s in non_date_formats:
         return False
     state = 0
     separator = ";"
     got_sep = 0
     date_count = num_count = 0
     for c in s:
-        if date_char_dict.has_key(c):
+        if c in date_char_dict:
             date_count += date_char_dict[c]
-        elif num_char_dict.has_key(c):
+        elif c in num_char_dict:
             num_count += num_char_dict[c]
         elif c == separator:
             got_sep = 1
diff --git a/xlsx/tests/test_basic.py b/xlsx/tests/test_basic.py
diff --git a/xlsx/timemachine.py b/xlsx/timemachine.py
diff --git a/xlsx/xldate.py b/xlsx/xldate.py

-Original file line number
+Diff line change
 *.sublime-*
 .pypirc
 dist/
 +.tox