File tree Expand file tree Collapse file tree 1 file changed +5
-0
lines changed
Expand file tree Collapse file tree 1 file changed +5
-0
lines changed Original file line number Diff line number Diff line change @@ -53,6 +53,7 @@ def read(
5353 working_dir : Optional [str ] = "cache" ,
5454 parallelism : int = 4 ,
5555 recursive : bool = True ,
56+ read_nums : Optional [int ] = None ,
5657 ** reader_kwargs : Any ,
5758) -> ray .data .Dataset :
5859 """
@@ -63,6 +64,7 @@ def read(
6364 :param working_dir: Directory to cache intermediate files (PDF processing)
6465 :param parallelism: Number of parallel workers
6566 :param recursive: Whether to scan directories recursively
67+ :param read_nums: Limit the number of documents to read
6668 :param reader_kwargs: Additional kwargs passed to readers
6769 :return: Ray Dataset containing all documents
6870 """
@@ -120,6 +122,9 @@ def read(
120122 }
121123 )
122124
125+ if read_nums is not None :
126+ combined_ds = combined_ds .limit (read_nums )
127+
123128 logger .info ("[READ] Successfully read files from %s" , input_path )
124129 return combined_ds
125130
You can’t perform that action at this time.
0 commit comments