File tree Expand file tree Collapse file tree 3 files changed +0
-11
lines changed
Expand file tree Collapse file tree 3 files changed +0
-11
lines changed Original file line number Diff line number Diff line change @@ -24,7 +24,6 @@ class InputArgs(BaseModel):
2424 # Resume settings
2525 start_index : int = 0
2626 end_index : int = - 1
27- interval_size : int = 1000
2827
2928 # Concurrent settings
3029 max_workers : int = 1
@@ -89,10 +88,6 @@ def check_args(self):
8988 if self .end_index >= 0 and self .end_index < self .start_index :
9089 raise ValueError ("if end_index is non negative, end_index must be greater than start_index" )
9190
92- # check interval size
93- if self .interval_size <= 0 :
94- raise ValueError ("interval_size must be positive." )
95-
9691 # check max workers
9792 if self .max_workers <= 0 :
9893 raise ValueError ("max_workers must be a positive integer." )
Original file line number Diff line number Diff line change @@ -30,8 +30,6 @@ def parse_args():
3030 default = None , help = "The number of data start to check." )
3131 parser .add_argument ("--end_index" , type = int ,
3232 default = None , help = "The number of data end to check." )
33- parser .add_argument ("--interval_size" , type = int ,
34- default = None , help = "The number of size to save while checking." )
3533 parser .add_argument ("--max_workers" , type = int ,
3634 default = None , help = "The number of max workers to concurrent check. " )
3735 parser .add_argument ("--batch_size" , type = int ,
@@ -112,8 +110,6 @@ def parse_args():
112110 input_data ['start_index' ] = args .start_index
113111 if args .end_index :
114112 input_data ['end_index' ] = args .end_index
115- if args .interval_size :
116- input_data ['interval_size' ] = args .interval_size
117113 if args .max_workers :
118114 input_data ['max_workers' ] = args .max_workers
119115 if args .batch_size :
Original file line number Diff line number Diff line change 1717| --save_raw | bool | False | No | whether save raw data. |
1818| --start_index | int | 0 | No | the number of data start to check. |
1919| --end_index | int | -1 | No | the number of data end to check. if it's negative, include the data from start_index to end. |
20- | --interval_size | int | 1000 | No | the number of size to save while checking. |
2120| --max_workers | int | 1 | No | the number of max workers to concurrent check. |
2221| --batch_size | int | 1 | No | the number of max data for concurrent check. |
2322| --dataset | str | "hugging_face" | Yes | dataset type, in [ 'hugging_face', 'local'] |
4645| save_raw | bool | False | No | whether save raw data. |
4746| start_index | int | 0 | No | the number of data start to check. |
4847| end_index | int | -1 | No | the number of data end to check. if it's negative, include the data from start_index to end. |
49- | interval_size | int | 1000 | No | the number of size to save while checking. |
5048| max_workers | int | 1 | No | the number of max workers to concurrent check. |
5149| batch_size | int | 1 | No | the number of max data for concurrent check. |
5250| dataset | str | "hugging_face" | Yes | dataset type, in [ 'hugging_face', 'local'] |
You can’t perform that action at this time.
0 commit comments