Skip to content

Commit ecb6173

Browse files
feat: add param read_nums
1 parent b5d026e commit ecb6173

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

graphgen/operators/read/read.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def read(
5353
working_dir: Optional[str] = "cache",
5454
parallelism: int = 4,
5555
recursive: bool = True,
56-
read_num: Optional[int] = None,
56+
read_nums: Optional[int] = None,
5757
**reader_kwargs: Any,
5858
) -> ray.data.Dataset:
5959
"""
@@ -64,6 +64,7 @@ def read(
6464
:param working_dir: Directory to cache intermediate files (PDF processing)
6565
:param parallelism: Number of parallel workers
6666
:param recursive: Whether to scan directories recursively
67+
:param read_nums: Limit the number of documents to read
6768
:param reader_kwargs: Additional kwargs passed to readers
6869
:return: Ray Dataset containing all documents
6970
"""
@@ -121,8 +122,8 @@ def read(
121122
}
122123
)
123124

124-
if read_num is not None:
125-
combined_ds = combined_ds.limit(read_num)
125+
if read_nums is not None:
126+
combined_ds = combined_ds.limit(read_nums)
126127

127128
logger.info("[READ] Successfully read files from %s", input_path)
128129
return combined_ds

0 commit comments

Comments
 (0)