Skip to content

Commit 76d3bee

Browse files
Red-Eyedpre-commit-ci[bot]Copilotbhimrazy
authored
Remove unnecessary fixed sleep by adding predicate-based path check (Lightning-AI#700)
* Remove unnecessary fixed sleep by adding predicate-based path check * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update dataset_utilities.py fixed checks * wip * Update src/litdata/utilities/dataset_utilities.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix:last commit --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: bhimrazy <bhimrajyadav977@gmail.com>
1 parent 8ab1975 commit 76d3bee

File tree

1 file changed

+25
-3
lines changed

1 file changed

+25
-3
lines changed

src/litdata/utilities/dataset_utilities.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import shutil
66
import tempfile
77
import time
8-
from typing import Any, Optional
8+
from typing import Any, Callable, Optional
99

1010
import numpy as np
1111

@@ -16,6 +16,27 @@
1616
from litdata.utilities.subsample import shuffle_lists_together, subsample_filenames_and_roi
1717

1818

19+
def wait_for_predicate(
20+
predicate: Callable[[], bool],
21+
timeout: float,
22+
) -> bool:
23+
"""Wait until the given predicate becomes True or the timeout expires.
24+
25+
Args:
26+
predicate: A function returning a boolean condition to check.
27+
timeout: Maximum time (in seconds) to wait.
28+
29+
Returns:
30+
True if predicate became True within timeout, else False.
31+
"""
32+
start = time.time()
33+
while time.time() - start < timeout:
34+
if predicate():
35+
return True
36+
time.sleep(0.01)
37+
return False
38+
39+
1940
def subsample_streaming_dataset(
2041
input_dir: Dir,
2142
cache_dir: Optional[Dir] = None,
@@ -66,9 +87,10 @@ def subsample_streaming_dataset(
6687
downloader = get_downloader(input_dir.url, input_dir.path, [], storage_options, session_options)
6788
downloader.download_file(os.path.join(input_dir.url, _INDEX_FILENAME), cache_index_filepath)
6889

69-
time.sleep(0.5) # Give some time for the file to be available
90+
def path_exists(p: str) -> bool:
91+
return wait_for_predicate(lambda: os.path.exists(p), timeout=0.5)
7092

71-
if not os.path.exists(input_dir.path):
93+
if not path_exists(input_dir.path):
7294
raise FileNotFoundError(f"The provided dataset path `{input_dir.path}` does not exist.")
7395

7496
if os.path.exists(os.path.join(input_dir.path, _INDEX_FILENAME)):

0 commit comments

Comments
 (0)