6
6
import sys
7
7
import argparse
8
8
from contextlib import ExitStack
9
- from typing import Iterable , List , Tuple
9
+ from typing import Dict , Iterable , List , Tuple
10
+ from collections import Counter
10
11
from openpyxl import Workbook
11
12
from openpyxl .utils import get_column_letter
12
13
@@ -25,28 +26,33 @@ def is_blank_row(row: List[str]) -> bool:
25
26
26
27
27
28
def merge_tables (str_tables : Iterable [str ]) -> str :
28
- data = dict ()
29
+ data : Dict [ str , List [ List [ str ]]] = dict ()
29
30
tables = [list (csv .reader (table .splitlines ())) for table in str_tables ]
30
31
31
32
for row in tables [0 ]:
32
33
if row :
33
- data [ row [0 ]] = row
34
+ data . setdefault ( row [0 ], []). append ( row )
34
35
35
36
for table in tables :
37
+ nth : Dict [str , int ] = Counter ()
36
38
for row in table :
37
39
if not is_blank_row (row ):
40
+ index = nth [row [0 ]]
38
41
if row [0 ] in data :
39
- if not is_blank_row (data [row [0 ]]) and data [row [0 ]] != row :
40
- raise DuplicateDataError (data [row [0 ]], row , f'Duplicate data for { row [0 ]} .' )
41
- data [row [0 ]] = row
42
+ if not is_blank_row (data [row [0 ]][index ]) and data [row [0 ]][index ] != row :
43
+ raise DuplicateDataError (data [row [0 ]][index ], row , f'Duplicate data for { row [0 ]} .' )
44
+ data [row [0 ]][index ] = row
45
+ nth [row [0 ]] += 1
42
46
43
47
out = StringIO ()
44
48
writer = csv .writer (out )
49
+ nth : Dict [str , int ] = Counter ()
45
50
for row in tables [0 ]:
46
51
if not row :
47
52
continue
48
- best_row = data [row [0 ]]
49
- writer .writerow (best_row )
53
+ index = nth [row [0 ]]
54
+ writer .writerow (data [row [0 ]][index ])
55
+ nth [row [0 ]] += 1
50
56
51
57
return out .getvalue ()
52
58
@@ -64,7 +70,6 @@ def extract_tables(contents: str) -> Iterable[Tuple[str, str]]:
64
70
def main (files , out : str ):
65
71
wb = Workbook ()
66
72
files = [f .read () for f in files ]
67
- xy = list (extract_tables (files [0 ]))
68
73
tbls = map (extract_tables , files )
69
74
for tbl_group in zip (* tbls ):
70
75
assert len (set (name for name , _ in tbl_group )) == 1
0 commit comments