This repository was archived by the owner on Oct 22, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathxls_to_pdf.py
More file actions
57 lines (46 loc) · 1.72 KB
/
xls_to_pdf.py
File metadata and controls
57 lines (46 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# Command-line script to read XLS files
# and convert them to plain text CSV
from argparse import ArgumentParser
from csv import DictWriter, DictReader
from glob import glob
from os.path import isdir, join as joinpath
from sys import stdout
from xlrd import open_workbook
HEADERS_TEXT = 'NAME,STATUS,SALARY,PAY BASIS,POSITION TITLE'
HEADERS = HEADERS_TEXT.split(',')
def process_wh_salary_workbook(wbpath):
"""
a very non-generalized function, expecting things to
be in the expected WH salary format, headers and all,
single sheet. No need to make it more flexible right now.
"""
book = open_workbook(wbpath)
sheet = book.sheets()[0]
headers_found = False
for n in range(sheet.nrows):
cols = sheet.row_values(n)
if not headers_found:
if cols == HEADERS:
headers_found = True
else:
# headers have been found
# don't capture anything if unless
# nearly all cells are filled...
# if not all(c == '' for c in cols) and len(HEADERS) - len(cols) == 0:
if 'Employee' in cols[1] or '$' in cols[2]:
yield dict(zip(HEADERS, cols))
if __name__ == '__main__':
parser = ArgumentParser("Convert WH salary XLS page(s) to CSV")
parser.add_argument('inpath', type=str, help="Path to a XLSX file, or directory of them")
args = parser.parse_args()
inpath = args.inpath
if isdir(inpath):
filenames = glob(joinpath(inpath, '*.xls?'))
else:
filenames = [inpath]
# set up the CSV
csvout = DictWriter(stdout, fieldnames=HEADERS)
csvout.writeheader()
for fname in filenames:
for d in process_wh_salary_workbook(fname):
csvout.writerow(d)