|
6 | 6 | import subprocess |
7 | 7 | import sys |
8 | 8 | import warnings |
| 9 | +from typing import List, Iterable |
9 | 10 |
|
10 | 11 | from ast import literal_eval |
11 | 12 | from collections import OrderedDict, Counter, defaultdict |
|
26 | 27 |
|
27 | 28 | import xlrd |
28 | 29 | import xlsxwriter |
| 30 | +import openpyxl |
29 | 31 |
|
30 | 32 | from Orange.data import ( |
31 | 33 | _io, is_discrete_values, MISSING_VALUES, Table, Domain, Variable, |
@@ -405,15 +407,15 @@ def __init__(self, filename): |
405 | 407 | self.sheet = None |
406 | 408 |
|
407 | 409 | @property |
408 | | - def sheets(self): |
| 410 | + def sheets(self) -> List: |
409 | 411 | """FileFormats with a notion of sheets should override this property |
410 | 412 | to return a list of sheet names in the file. |
411 | 413 |
|
412 | 414 | Returns |
413 | 415 | ------- |
414 | 416 | a list of sheet names |
415 | 417 | """ |
416 | | - return () |
| 418 | + return [] |
417 | 419 |
|
418 | 420 | def select_sheet(self, sheet): |
419 | 421 | """Select sheet to be read |
@@ -1007,33 +1009,12 @@ def __init__(self, filename): |
1007 | 1009 | super().__init__(filename=filename) |
1008 | 1010 | self._workbook = None |
1009 | 1011 |
|
1010 | | - @property |
1011 | | - def workbook(self): |
1012 | | - if not self._workbook: |
1013 | | - self._workbook = xlrd.open_workbook(self.filename, on_demand=True) |
1014 | | - return self._workbook |
1015 | | - |
1016 | | - @property |
1017 | | - @lru_cache(1) |
1018 | | - def sheets(self): |
1019 | | - if self.workbook: |
1020 | | - return self.workbook.sheet_names() |
1021 | | - else: |
1022 | | - return () |
| 1012 | + def get_cells(self) -> Iterable: |
| 1013 | + raise NotImplementedError |
1023 | 1014 |
|
1024 | 1015 | def read(self): |
1025 | | - if self.sheet: |
1026 | | - ss = self.workbook.sheet_by_name(self.sheet) |
1027 | | - else: |
1028 | | - ss = self.workbook.sheet_by_index(0) |
1029 | 1016 | try: |
1030 | | - first_row = next(i for i in range(ss.nrows) if any(ss.row_values(i))) |
1031 | | - first_col = next(i for i in range(ss.ncols) if ss.cell_value(first_row, i)) |
1032 | | - row_len = ss.row_len(first_row) |
1033 | | - cells = filter(any, |
1034 | | - [[str(ss.cell_value(row, col)) if col < ss.row_len(row) else '' |
1035 | | - for col in range(first_col, row_len)] |
1036 | | - for row in range(first_row, ss.nrows)]) |
| 1017 | + cells = self.get_cells() |
1037 | 1018 | table = self.data_table(cells) |
1038 | 1019 | table.name = path.splitext(path.split(self.filename)[-1])[0] |
1039 | 1020 | if self.sheet: |
@@ -1069,12 +1050,68 @@ class ExcelReader(_BaseExcelReader): |
1069 | 1050 | EXTENSIONS = ('.xlsx',) |
1070 | 1051 | DESCRIPTION = 'Microsoft Excel spreadsheet' |
1071 | 1052 |
|
| 1053 | + @property |
| 1054 | + def workbook(self) -> openpyxl.Workbook: |
| 1055 | + if not self._workbook: |
| 1056 | + self._workbook = openpyxl.load_workbook(self.filename) |
| 1057 | + return self._workbook |
| 1058 | + |
| 1059 | + @property |
| 1060 | + @lru_cache(1) |
| 1061 | + def sheets(self) -> List: |
| 1062 | + return self.workbook.sheetnames if self.workbook else [] |
| 1063 | + |
| 1064 | + def get_cells(self) -> Iterable: |
| 1065 | + def str_(x): |
| 1066 | + return str(x) if x is not None else "" |
| 1067 | + |
| 1068 | + sheet = self._get_active_sheet() |
| 1069 | + cells = ([str_(sheet.cell(row, col).value) |
| 1070 | + for col in range(sheet.min_column, sheet.max_column + 1)] |
| 1071 | + for row in range(sheet.min_row, sheet.max_row + 1)) |
| 1072 | + return filter(any, cells) |
| 1073 | + |
| 1074 | + def _get_active_sheet(self) -> openpyxl.worksheet.worksheet.Worksheet: |
| 1075 | + if self.sheet: |
| 1076 | + return self.workbook[self.sheet] |
| 1077 | + else: |
| 1078 | + return self.workbook.active |
| 1079 | + |
1072 | 1080 |
|
1073 | 1081 | class XlsReader(_BaseExcelReader): |
1074 | 1082 | """Reader for .xls files""" |
1075 | 1083 | EXTENSIONS = ('.xls',) |
1076 | 1084 | DESCRIPTION = 'Microsoft Excel 97-2004 spreadsheet' |
1077 | 1085 |
|
| 1086 | + @property |
| 1087 | + def workbook(self) -> xlrd.Book: |
| 1088 | + if not self._workbook: |
| 1089 | + self._workbook = xlrd.open_workbook(self.filename, on_demand=True) |
| 1090 | + return self._workbook |
| 1091 | + |
| 1092 | + @property |
| 1093 | + @lru_cache(1) |
| 1094 | + def sheets(self) -> List: |
| 1095 | + return self.workbook.sheet_names() if self.workbook else [] |
| 1096 | + |
| 1097 | + def get_cells(self) -> Iterable: |
| 1098 | + sheet = self._get_active_sheet() |
| 1099 | + first_row = next(i for i in range(sheet.nrows) |
| 1100 | + if any(sheet.row_values(i))) |
| 1101 | + first_col = next(i for i in range(sheet.ncols) |
| 1102 | + if sheet.cell_value(first_row, i)) |
| 1103 | + row_len = sheet.row_len(first_row) |
| 1104 | + return filter(any, ([str(sheet.cell_value(row, col)) |
| 1105 | + if col < sheet.row_len(row) else '' |
| 1106 | + for col in range(first_col, row_len)] |
| 1107 | + for row in range(first_row, sheet.nrows))) |
| 1108 | + |
| 1109 | + def _get_active_sheet(self) -> xlrd.sheet.Sheet: |
| 1110 | + if self.sheet: |
| 1111 | + return self.workbook.sheet_by_name(self.sheet) |
| 1112 | + else: |
| 1113 | + return self.workbook.sheet_by_index(0) |
| 1114 | + |
1078 | 1115 |
|
1079 | 1116 | class DotReader(FileFormat): |
1080 | 1117 | """Writer for dot (graph) files""" |
|
0 commit comments