Skip to content

Commit ce072f0

Browse files
author
Ståle Undheim
committed
Updated version to 0.4
2 parents 05c147b + 5fa8fa8 commit ce072f0

File tree

9 files changed

+235
-157
lines changed

9 files changed

+235
-157
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ ignored-files/
55
*.sublime-*
66
.pypirc
77
dist/
8+
.tox

README.rst

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,26 @@
11
python-xlsx
22
===========
33

4-
A small footprint xslx reader that understands shared strings and can process
4+
A small footprint xlsx reader that understands shared strings and can process
55
excel dates.
66

7+
8+
Requirements
9+
------------
10+
11+
No external requirements. Supports Python versions 2.6+ and 3.2+.
12+
13+
714
Usage
8-
+++++++
15+
-----
916

1017
::
1118

1219
book = Workbook('filename or filedescriptor') #Open xlsx file
1320
for sheet in book:
1421
print sheet.name
22+
# for larger workbooks, use sheet.rowsIter() instead of
23+
# sheet.rows().iteritems()
1524
for row, cells in sheet.rows().iteritems(): # or sheet.cols()
1625
print row # prints row number
1726
for cell in cells:
@@ -22,6 +31,7 @@ Usage
2231
some_sheet = book['some sheet name']
2332
...
2433

34+
2535
Alternatives
2636
------------
2737

setup.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from setuptools import setup
22

33
setup(
4-
version="0.2",
4+
version="0.4",
55
name='py-xlsx',
66
description="""Tiny python code for parsing data from Microsoft's Office
77
Open XML Spreadsheet format""",
@@ -13,10 +13,18 @@
1313
'License :: OSI Approved :: MIT License',
1414
'Operating System :: OS Independent',
1515
'Programming Language :: Python',
16+
'Programming Language :: Python :: 2',
17+
'Programming Language :: Python :: 2.6',
18+
'Programming Language :: Python :: 2.7',
19+
'Programming Language :: Python :: 3',
20+
'Programming Language :: Python :: 3.2',
21+
'Programming Language :: Python :: 3.3',
22+
'Programming Language :: Python :: 3.4',
1623
],
1724
author='Staale Undheim',
1825
author_email='staale@staale.org',
1926
url='http://github.com/staale/python-xlsx',
27+
tests_require = ['six'],
2028
packages=[
2129
"xlsx"
2230
],

tox.ini

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[tox]
2+
envlist = py26, py27, py32, py33, py34
3+
4+
5+
[testenv]
6+
usedevelop = True
7+
deps=
8+
six
9+
pytest
10+
11+
commands =
12+
py.test xlsx/tests
13+
14+
15+
# py34 env not available in tox <= 1.7, so create it ourselves.
16+
[testenv:py34]
17+
basepython = python3.4
18+
deps = {[testenv]deps}

xlsx/__init__.py

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
# -*- coding: utf-8 -*-
22
""" Small footprint xlsx reader """
3+
4+
from __future__ import unicode_literals
5+
36
__author__="Ståle Undheim <staale@staale.org>"
47

58
import re
69
import zipfile
7-
from xldate import xldate_as_tuple
8-
from formatting import is_date_format_string
10+
from xlsx.xldate import xldate_as_tuple
11+
from xlsx.formatting import is_date_format_string
12+
from xlsx.timemachine import UnicodeMixin
913

1014
try:
1115
from xml.etree import cElementTree as ET
@@ -26,6 +30,7 @@ def __init__(self, filename):
2630
2731
"""
2832

33+
self.ziphandle = None
2934
self.ziphandle = zipfile.ZipFile(filename, 'r')
3035

3136
def __getitem__(self, key):
@@ -41,7 +46,8 @@ def __getitem__(self, key):
4146
def __del__(self):
4247
"""Close the zip file when finished"""
4348

44-
self.ziphandle.close()
49+
if self.ziphandle:
50+
self.ziphandle.close()
4551

4652
class Workbook(object):
4753
"""Main class that contains sheets organized by name or by id.
@@ -127,17 +133,16 @@ def __init__(self, workbook, id, name):
127133
self.loaded = False
128134
self.addrPattern = re.compile("([a-zA-Z]*)(\d*)")
129135
self.__cells = {}
130-
self.__cols = {}
131-
self.__rows = {}
136+
self.__cols = None
137+
self.__rows = None
132138

133-
def __load(self):
139+
def rowsIter(self):
134140
sheetDoc = self.workbook.domzip["xl/worksheets/sheet%d.xml" % self.id]
135141
sheetData = sheetDoc.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}sheetData")
136142
# @type sheetData Element
137-
rows = {}
138-
columns = {}
139143
for rowNode in sheetData:
140144
rowNum = int(rowNode.get("r"))
145+
rowCells = []
141146
for columnNode in rowNode:
142147
colType = columnNode.get("t")
143148
cellId = columnNode.get("r")
@@ -167,14 +172,22 @@ def __load(self):
167172
formula = columnNode.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}f").text
168173
except Exception:
169174
raise #pass
170-
if not rowNum in rows:
171-
rows[rowNum] = []
175+
cell = Cell(rowNum, colNum, data, formula=formula)
176+
rowCells.append(cell)
177+
yield rowNum, rowCells
178+
179+
def __load(self):
180+
rows = {}
181+
columns = {}
182+
for rowNum, row in self.rowsIter():
183+
rows[rowNum] = row
184+
185+
for cell in row:
186+
colNum = cell.column
172187
if not colNum in columns:
173188
columns[colNum] = []
174-
cell = Cell(rowNum, colNum, data, formula=formula)
175-
rows[rowNum].append(cell)
189+
self.__cells[cell.id] = cell
176190
columns[colNum].append(cell)
177-
self.__cells[cellId] = cell
178191
self.__rows = rows
179192
self.__cols = columns
180193
self.loaded=True
@@ -207,10 +220,16 @@ def __iter__(self):
207220
self.__load()
208221
return self.__cells.__iter__()
209222

223+
<<<<<<< HEAD
210224
def __repr__(self):
211225
return "%r[%r]"%(self.workbook, self.name)
212226

213227
class Cell(object):
228+
=======
229+
230+
class Cell(UnicodeMixin):
231+
232+
>>>>>>> 5fa8fa8761d3bfcb3ce1b1b730913f6d4d0ab0c9
214233
def __init__(self, row, column, value, formula=None):
215234
self.row = int(row)
216235
self.column = column
@@ -248,5 +267,5 @@ def __ge__(self, other):
248267
return self.__cmp__(other) != -1
249268

250269
def __unicode__(self):
251-
return u"<Cell [%s] : \"%s\" (%s)>" % (self.id, self.value,
252-
self.formula, )
270+
return "<Cell [%s] : \"%s\" (%s)>" % (self.id, self.value,
271+
self.formula, )

xlsx/formatting.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,32 +26,33 @@
2626
# 2007-09-08 SJM Work around corrupt STYLE record
2727
# 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file
2828

29+
from __future__ import unicode_literals
2930
import re
3031

3132

32-
date_chars = u'ymdhs' # year, month/minute, day, hour, second
33+
date_chars = 'ymdhs' # year, month/minute, day, hour, second
3334
date_char_dict = {}
3435
for _c in date_chars + date_chars.upper():
3536
date_char_dict[_c] = 5
3637
del _c, date_chars
3738

3839
skip_char_dict = {}
39-
for _c in u'$-+/(): ':
40+
for _c in '$-+/(): ':
4041
skip_char_dict[_c] = 1
4142

4243
num_char_dict = {
43-
u'0': 5,
44-
u'#': 5,
45-
u'?': 5,
44+
'0': 5,
45+
'#': 5,
46+
'?': 5,
4647
}
4748

4849
non_date_formats = {
49-
u'0.00E+00':1,
50-
u'##0.0E+0':1,
51-
u'General' :1,
52-
u'GENERAL' :1, # OOo Calc 1.1.4 does this.
53-
u'general' :1, # pyExcelerator 0.6.3 does this.
54-
u'@' :1,
50+
'0.00E+00':1,
51+
'##0.0E+0':1,
52+
'General' :1,
53+
'GENERAL' :1, # OOo Calc 1.1.4 does this.
54+
'general' :1, # pyExcelerator 0.6.3 does this.
55+
'@' :1,
5556
}
5657

5758
fmt_bracketed_sub = re.compile(r'\[[^]]*\]').sub
@@ -69,35 +70,35 @@ def is_date_format_string(fmt):
6970
# TODO: u'[h]\\ \\h\\o\\u\\r\\s' ([h] means don't care about hours > 23)
7071
state = 0
7172
s = ''
72-
ignorable = skip_char_dict.has_key
73+
ignorable = lambda key: key in skip_char_dict
7374
for c in fmt:
7475
if state == 0:
75-
if c == u'"':
76+
if c == '"':
7677
state = 1
77-
elif c in ur"\_*":
78+
elif c in r"\_*":
7879
state = 2
7980
elif ignorable(c):
8081
pass
8182
else:
8283
s += c
8384
elif state == 1:
84-
if c == u'"':
85+
if c == '"':
8586
state = 0
8687
elif state == 2:
8788
# Ignore char after backslash, underscore or asterisk
8889
state = 0
8990
assert 0 <= state <= 2
9091
s = fmt_bracketed_sub('', s)
91-
if non_date_formats.has_key(s):
92+
if s in non_date_formats:
9293
return False
9394
state = 0
9495
separator = ";"
9596
got_sep = 0
9697
date_count = num_count = 0
9798
for c in s:
98-
if date_char_dict.has_key(c):
99+
if c in date_char_dict:
99100
date_count += date_char_dict[c]
100-
elif num_char_dict.has_key(c):
101+
elif c in num_char_dict:
101102
num_count += num_char_dict[c]
102103
elif c == separator:
103104
got_sep = 1

0 commit comments

Comments
 (0)