Merge pull request #184 from OpenDataServices/cove-954-update-openpyxl

Bjwebb · web-flow · commit a62361f74f82 · 2018-02-08T15:04:41.000Z
Update openpyxl
diff --git a/flattentool/input.py b/flattentool/input.py
@@ -15,7 +15,8 @@
 import traceback
 import datetime
 import pytz
-from openpyxl.utils import _get_column_letter, column_index_from_string
+from openpyxl.utils import column_index_from_string
+from openpyxl.utils.cell import _get_column_letter
 from flattentool.exceptions import DataErrorWarning
 from flattentool.lib import isint, parse_sheet_configuration
 
@@ -580,18 +581,18 @@ def get_sheet_headings(self, sheet_name):
             return []
 
         if self.vertical_orientation:
-            return [cell.value for cell in worksheet.columns[skip_rows][configuration_line:]]
+            return [cell.value for cell in worksheet[_get_column_letter(skip_rows + 1)][configuration_line:]]
 
         try:
-            return [cell.value for cell in worksheet.rows[skip_rows + configuration_line]]
+            return [cell.value for cell in worksheet[skip_rows + configuration_line + 1]]
         except IndexError:
             # If the heading line is after data in the spreadsheet. i.e when skipRows
             return []
 
     def get_sheet_configuration(self, sheet_name):
         worksheet = self.workbook[self.sheet_names_map[sheet_name]]
-        if worksheet.rows[0][0].value == '#':
-            return [cell.value for num, cell in enumerate(worksheet.rows[0]) if num != 0 and cell.value]
+        if worksheet['A1'].value == '#':
+            return [cell.value for num, cell in enumerate(worksheet[1]) if num != 0 and cell.value]
         else:
             return []
 
@@ -609,14 +610,14 @@ def get_sheet_lines(self, sheet_name):
 
         worksheet = self.workbook[self.sheet_names_map[sheet_name]]
         if self.vertical_orientation:
-            header_row = worksheet.columns[skip_rows]
-            remaining_rows = worksheet.columns[skip_rows + header_rows:]
+            header_row = worksheet[_get_column_letter(skip_rows + 1)]
+            remaining_rows = worksheet.iter_cols(min_col=skip_rows + header_rows + 1)
             if configuration_line:
                 header_row = header_row[1:]
-                remaining_rows = [row[1:] for row in remaining_rows]
+                remaining_rows = worksheet.iter_cols(min_col=skip_rows + header_rows + 1, min_row=2)
         else:
-            header_row = worksheet.rows[skip_rows + configuration_line]
-            remaining_rows = worksheet.rows[skip_rows + configuration_line + header_rows:]
+            header_row = worksheet[skip_rows + configuration_line + 1]
+            remaining_rows = worksheet.iter_rows(min_row=skip_rows + configuration_line + header_rows + 1)
 
         coli_to_header = {}
         for i, header in enumerate(header_row):
diff --git a/flattentool/tests/test_output.py b/flattentool/tests/test_output.py
@@ -33,9 +33,10 @@ def test_blank_sheets(tmpdir):
     # Check XLSX is empty
     wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath)
     assert wb.get_sheet_names() == ['release']
-    assert len(wb['release'].rows) == 1
-    assert len(wb['release'].rows[0]) == 1
-    assert wb['release'].rows[0][0].value == None
+    rows = list(wb['release'].rows)
+    assert len(rows) == 1
+    assert len(rows[0]) == 1
+    assert rows[0][0].value == None
     
     # Check CSV is Empty
     assert tmpdir.join('release').listdir() == [ tmpdir.join('release').join('release.csv') ]
@@ -55,10 +56,12 @@ def test_populated_header(tmpdir):
     # Check XLSX
     wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath)
     assert wb.get_sheet_names() == ['release', 'b']
-    assert len(wb['release'].rows) == 1
-    assert [ x.value for x in wb['release'].rows[0] ] == [ 'a', 'd' ]
-    assert len(wb['b'].rows) == 1
-    assert [ x.value for x in wb['b'].rows[0] ] == [ 'ocid', 'c' ]
+    rows = list(wb['release'].rows)
+    assert len(rows) == 1
+    assert [ x.value for x in rows[0] ] == [ 'a', 'd' ]
+    b_rows = list(wb['b'].rows)
+    assert len(b_rows) == 1
+    assert [ x.value for x in b_rows[0] ] == [ 'ocid', 'c' ]
 
     # Check CSV
     assert set(tmpdir.join('release').listdir()) == set([
@@ -84,10 +87,12 @@ def test_empty_lines(tmpdir):
     # Check XLSX
     wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath)
     assert wb.get_sheet_names() == ['release', 'b']
-    assert len(wb['release'].rows) == 1
-    assert [ x.value for x in wb['release'].rows[0] ] == [ 'a', 'd' ]
-    assert len(wb['b'].rows) == 1
-    assert [ x.value for x in wb['b'].rows[0] ] == [ 'ocid', 'c' ]
+    rows = list(wb['release'].rows)
+    assert len(rows) == 1
+    assert [ x.value for x in rows[0] ] == [ 'a', 'd' ]
+    b_rows = list(wb['b'].rows)
+    assert len(b_rows) == 1
+    assert [ x.value for x in b_rows[0] ] == [ 'ocid', 'c' ]
 
     # Check CSV
     assert set(tmpdir.join('release').listdir()) == set([
@@ -115,14 +120,16 @@ def test_populated_lines(tmpdir):
     # Check XLSX
     wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath)
     assert wb.get_sheet_names() == ['release', 'b']
-    assert len(wb['release'].rows) == 3
-    assert [ x.value for x in wb['release'].rows[0] ] == [ 'a' ]
-    assert [ x.value for x in wb['release'].rows[1] ] == [ 'cell1' ]
-    assert [ x.value for x in wb['release'].rows[2] ] == [ 'cell2' ]
-    assert len(wb['b'].rows) == 3
-    assert [ x.value for x in wb['b'].rows[0] ] == [ 'ocid', 'c' ]
-    assert [ x.value for x in wb['b'].rows[1] ] == [ None, 'cell3' ]
-    assert [ x.value for x in wb['b'].rows[2] ] == [ None, 'cell4' ]
+    rows = list(wb['release'].rows)
+    assert len(rows) == 3
+    assert [ x.value for x in rows[0] ] == [ 'a' ]
+    assert [ x.value for x in rows[1] ] == [ 'cell1' ]
+    assert [ x.value for x in rows[2] ] == [ 'cell2' ]
+    b_rows = list(wb['b'].rows)
+    assert len(b_rows) == 3
+    assert [ x.value for x in b_rows[0] ] == [ 'ocid', 'c' ]
+    assert [ x.value for x in b_rows[1] ] == [ None, 'cell3' ]
+    assert [ x.value for x in b_rows[2] ] == [ None, 'cell4' ]
 
     # Check CSV
     assert set(tmpdir.join('release').listdir()) == set([
@@ -146,10 +153,11 @@ def test_utf8(tmpdir):
     # Check XLSX
     wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath)
     assert wb.get_sheet_names() == ['release']
-    assert len(wb['release'].rows) == 3
-    assert [ x.value for x in wb['release'].rows[0] ] == [ 'é' ]
-    assert [ x.value for x in wb['release'].rows[1] ] == [ 'éαГ😼𝒞人' ]
-    assert [ x.value for x in wb['release'].rows[2] ] == [ 'cell2' ]
+    rows = list(wb['release'].rows)
+    assert len(rows) == 3
+    assert [ x.value for x in rows[0] ] == [ 'é' ]
+    assert [ x.value for x in rows[1] ] == [ 'éαГ😼𝒞人' ]
+    assert [ x.value for x in rows[2] ] == [ 'cell2' ]
 
     # Check CSV
     assert set(tmpdir.join('release').listdir()) == set([
diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup
 import sys
 
-install_requires = ['jsonref', 'schema', 'openpyxl>=2,<2.4', 'six', 'pytz', 'xmltodict']
+install_requires = ['jsonref', 'schema', 'openpyxl>=2.5', 'six', 'pytz', 'xmltodict']
 
 if sys.version < '3':
     install_requires.append('unicodecsv')