1515from common .handle .base_split_handle import BaseSplitHandle
1616from common .utils .split_model import SplitModel
1717
18- default_pattern_list = [re .compile ('(?<=^)# .*|(?<=\\ n)# .*' ),
19- re .compile ('(?<=\\ n)(?<!#)## (?!#).*|(?<=^)(?<!#)## (?!#).*' ),
20- re .compile ("(?<=\\ n)(?<!#)### (?!#).*|(?<=^)(?<!#)### (?!#).*" ),
21- re .compile ("(?<=\\ n)(?<!#)#### (?!#).*|(?<=^)(?<!#)#### (?!#).*" ),
22- re .compile ("(?<=\\ n)(?<!#)##### (?!#).*|(?<=^)(?<!#)##### (?!#).*" ),
23- re .compile ("(?<=\\ n)(?<!#)###### (?!#).*|(?<=^)(?<!#)###### (?!#).*" )]
18+ default_pattern_list = [
19+ re .compile ('(?<=^)# (?!-\\ *- coding:).*|(?<=\\ n)# (?!-\\ *- coding:).*' ),
20+ re .compile ('(?<=\\ n)(?<!#)## (?!#).*|(?<=^)(?<!#)## (?!#).*' ),
21+ re .compile ("(?<=\\ n)(?<!#)### (?!#).*|(?<=^)(?<!#)### (?!#).*" ),
22+ re .compile ("(?<=\\ n)(?<!#)#### (?!#).*|(?<=^)(?<!#)#### (?!#).*" ),
23+ re .compile ("(?<=\\ n)(?<!#)##### (?!#).*|(?<=^)(?<!#)##### (?!#).*" ),
24+ re .compile ("(?<=\\ n)(?<!#)###### (?!#).*|(?<=^)(?<!#)###### (?!#).*" )
25+ ]
2426
2527
2628class TextSplitHandle (BaseSplitHandle ):
@@ -45,11 +47,8 @@ def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_bu
4547 try :
4648 content = buffer .decode (detect (buffer )['encoding' ])
4749 except BaseException as e :
48- return {'name' : file .name ,
49- 'content' : []}
50- return {'name' : file .name ,
51- 'content' : split_model .parse (content )
52- }
50+ return {'name' : file .name , 'content' : []}
51+ return {'name' : file .name , 'content' : split_model .parse (content )}
5352
5453 def get_content (self , file , save_image ):
5554 buffer = file .read ()
0 commit comments