@@ -28,20 +28,20 @@ def deal_isolate_span(thead_part):
2828 """
2929 # 1. find out isolate span tokens.
3030 isolate_pattern = (
31- '<td></td> rowspan="(\d)+" colspan="(\d)+"></b></td>|'
32- '<td></td> colspan="(\d)+" rowspan="(\d)+"></b></td>|'
33- '<td></td> rowspan="(\d)+"></b></td>|'
34- '<td></td> colspan="(\d)+"></b></td>'
31+ r '<td></td> rowspan="(\d)+" colspan="(\d)+"></b></td>|'
32+ r '<td></td> colspan="(\d)+" rowspan="(\d)+"></b></td>|'
33+ r '<td></td> rowspan="(\d)+"></b></td>|'
34+ r '<td></td> colspan="(\d)+"></b></td>'
3535 )
3636 isolate_iter = re .finditer (isolate_pattern , thead_part )
3737 isolate_list = [i .group () for i in isolate_iter ]
3838
3939 # 2. find out span number, by step 1 results.
4040 span_pattern = (
41- ' rowspan="(\d)+" colspan="(\d)+"|'
42- ' colspan="(\d)+" rowspan="(\d)+"|'
43- ' rowspan="(\d)+"|'
44- ' colspan="(\d)+"'
41+ r ' rowspan="(\d)+" colspan="(\d)+"|'
42+ r ' colspan="(\d)+" rowspan="(\d)+"|'
43+ r ' rowspan="(\d)+"|'
44+ r ' colspan="(\d)+"'
4545 )
4646 corrected_list = []
4747 for isolate_item in isolate_list :
@@ -72,11 +72,11 @@ def deal_duplicate_bb(thead_part):
7272 """
7373 # 1. find out <td></td> in <thead></thead>.
7474 td_pattern = (
75- '<td rowspan="(\d)+" colspan="(\d)+">(.+?)</td>|'
76- '<td colspan="(\d)+" rowspan="(\d)+">(.+?)</td>|'
77- '<td rowspan="(\d)+">(.+?)</td>|'
78- '<td colspan="(\d)+">(.+?)</td>|'
79- " <td>(.*?)</td>"
75+ r '<td rowspan="(\d)+" colspan="(\d)+">(.+?)</td>|'
76+ r '<td colspan="(\d)+" rowspan="(\d)+">(.+?)</td>|'
77+ r '<td rowspan="(\d)+">(.+?)</td>|'
78+ r '<td colspan="(\d)+">(.+?)</td>|'
79+ r' <td>(.*?)</td>'
8080 )
8181 td_iter = re .finditer (td_pattern , thead_part )
8282 td_list = [t .group () for t in td_iter ]
@@ -115,7 +115,7 @@ def deal_bb(result_token):
115115 origin_thead_part = copy .deepcopy (thead_part )
116116
117117 # check "rowspan" or "colspan" occur in <thead></thead> parts or not .
118- span_pattern = '<td rowspan="(\d)+" colspan="(\d)+">|<td colspan="(\d)+" rowspan="(\d)+">|<td rowspan="(\d)+">|<td colspan="(\d)+">'
118+ span_pattern = r '<td rowspan="(\d)+" colspan="(\d)+">|<td colspan="(\d)+" rowspan="(\d)+">|<td rowspan="(\d)+">|<td colspan="(\d)+">'
119119 span_iter = re .finditer (span_pattern , thead_part )
120120 span_list = [s .group () for s in span_iter ]
121121 has_span_in_head = True if len (span_list ) > 0 else False
0 commit comments