Skip to content

Commit 91d12a0

Browse files
cauyxylexierule
authored andcommitted
bugfix: skip write index.json if no data is wrote. (#19439)
(cherry picked from commit 47c8f4c)
1 parent 16bd042 commit 91d12a0

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

src/lightning/data/streaming/writer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,8 @@ def write_chunk_to_file(
322322

323323
def write_chunks_index(self) -> str:
324324
"""Write the chunks index to a JSON file."""
325+
if len(self._chunks_info) == 0:
326+
return ""
325327
filepath = os.path.join(self._cache_dir, f"{self.rank}.{_INDEX_FILENAME}")
326328
config = self.get_config()
327329
with open(filepath, "w") as out:
@@ -393,7 +395,6 @@ def _merge_no_wait(self, node_rank: Optional[int] = None) -> None:
393395
config = data["config"]
394396

395397
elif config != data["config"]:
396-
breakpoint()
397398
raise Exception("The config isn't consistent between chunks. This shouldn't have happened.")
398399

399400
chunks_info.extend(data["chunks"])

tests/tests_data/processing/test_data_processor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,9 @@ def test_data_processsor_nlp(tmpdir, monkeypatch):
564564
data_processor = DataProcessor(input_dir=str(tmpdir), num_workers=1, num_downloaders=1)
565565
data_processor.run(TextTokenizeRecipe(chunk_size=1024 * 11))
566566

567+
data_processor_more_wokers = DataProcessor(input_dir=str(tmpdir), num_workers=2, num_downloaders=1)
568+
data_processor_more_wokers.run(TextTokenizeRecipe(chunk_size=1024 * 11))
569+
567570

568571
class ImageResizeRecipe(DataTransformRecipe):
569572
def prepare_structure(self, input_dir: str):

0 commit comments

Comments
 (0)