Skip to content

Commit fc6768b

Browse files
jewettaijfcyaugenst-flex
authored andcommitted
added "replace_existing" arg to Batch.load(), Batch.download(), and BatchData.load(). Moved Batch.download() out of Batch.run() and into Batch.load(). BatchData.load_sim_data() invokes web.load() with replace_existing=True. (Previously it was False.)
1 parent 60f9714 commit fc6768b

File tree

3 files changed

+40
-17
lines changed

3 files changed

+40
-17
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Changed
11+
- By default, batch downloads will skip files that already exist locally. To force re-downloading and replace existing files, pass the `replace_existing=True` argument to `Batch.load()`, `Batch.download()`, or `BatchData.load()`.
12+
- The `BatchData.load_sim_data()` function now overwrites any previously downloaded simulation files (instead of skipping them).
13+
1014
### Fixed
1115
- Giving opposite boundaries different names no longer causes a symmetry validator failure.
1216

tidy3d/web/api/container.py

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -431,12 +431,7 @@ def load_sim_data(self, task_name: str) -> SimulationDataType:
431431
task_id = self.task_ids[task_name]
432432
web.get_info(task_id)
433433

434-
return web.load(
435-
task_id=task_id,
436-
path=task_data_path,
437-
replace_existing=False,
438-
verbose=False,
439-
)
434+
return web.load(task_id=task_id, path=task_data_path, verbose=False)
440435

441436
def __getitem__(self, task_name: TaskName) -> SimulationDataType:
442437
"""Get the simulation data object for a given ``task_name``."""
@@ -451,14 +446,16 @@ def __len__(self):
451446
return len(self.task_paths)
452447

453448
@classmethod
454-
def load(cls, path_dir: str = DEFAULT_DATA_DIR) -> BatchData:
449+
def load(cls, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) -> BatchData:
455450
"""Load :class:`Batch` from file, download results, and load them.
456451
457452
Parameters
458453
----------
459454
path_dir : str = './'
460455
Base directory where data will be downloaded, by default current working directory.
461456
A `batch.hdf5` file must be present in the directory.
457+
replace_existing : bool = False
458+
Downloads the data even if path exists (overwriting the existing).
462459
463460
Returns
464461
------
@@ -469,7 +466,7 @@ def load(cls, path_dir: str = DEFAULT_DATA_DIR) -> BatchData:
469466

470467
batch_file = Batch._batch_path(path_dir=path_dir)
471468
batch = Batch.from_file(batch_file)
472-
return batch.load(path_dir=path_dir)
469+
return batch.load(path_dir=path_dir, replace_existing=replace_existing)
473470

474471

475472
class Batch(WebContainer):
@@ -606,7 +603,6 @@ def run(self, path_dir: str = DEFAULT_DATA_DIR) -> BatchData:
606603
self.upload()
607604
self.start()
608605
self.monitor()
609-
self.download(path_dir=path_dir)
610606
return self.load(path_dir=path_dir)
611607

612608
@cached_property
@@ -900,13 +896,15 @@ def _batch_path(path_dir: str = DEFAULT_DATA_DIR):
900896
"""
901897
return os.path.join(path_dir, "batch.hdf5")
902898

903-
def download(self, path_dir: str = DEFAULT_DATA_DIR) -> None:
899+
def download(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) -> None:
904900
"""Download results of each task.
905901
906902
Parameters
907903
----------
908904
path_dir : str = './'
909905
Base directory where data will be downloaded, by default the current working directory.
906+
replace_existing : bool = False
907+
Downloads the data even if path exists (overwriting the existing).
910908
911909
Note
912910
----
@@ -919,17 +917,36 @@ def download(self, path_dir: str = DEFAULT_DATA_DIR) -> None:
919917
self._check_path_dir(path_dir=path_dir)
920918
self.to_file(self._batch_path(path_dir=path_dir))
921919

920+
num_existing = 0
921+
for _, job in self.jobs.items():
922+
job_path_str = self._job_data_path(task_id=job.task_id, path_dir=path_dir)
923+
if os.path.exists(job_path_str):
924+
num_existing += 1
925+
if num_existing > 0:
926+
files_plural = "files have" if num_existing > 1 else "file has"
927+
log.warning(
928+
f"{num_existing} {files_plural} already been downloaded "
929+
f"and will be skipped. To forcibly overwrite existing files, invoke "
930+
"the load or download function with `replace_existing=True`.",
931+
log_once=True,
932+
)
933+
922934
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
923935
fns = []
924936
for task_name, job in self.jobs.items():
925-
job_path = self._job_data_path(task_id=job.task_id, path_dir=path_dir)
926-
937+
job_path_str = self._job_data_path(task_id=job.task_id, path_dir=path_dir)
938+
if os.path.exists(job_path_str):
939+
if replace_existing:
940+
log.info(f"File '{job_path_str}' already exists. Overwriting.")
941+
else:
942+
log.info(f"File '{job_path_str}' already exists. Skipping.")
943+
continue
927944
if "error" in job.status:
928945
log.warning(f"Not downloading '{task_name}' as the task errored.")
929946
continue
930947

931-
def fn(job=job, job_path=job_path) -> None:
932-
return job.download(path=job_path)
948+
def fn(job=job, job_path_str=job_path_str) -> None:
949+
return job.download(path=job_path_str)
933950

934951
fns.append(fn)
935952

@@ -951,13 +968,15 @@ def fn(job=job, job_path=job_path) -> None:
951968
completed += 1
952969
progress.update(pbar, completed=completed)
953970

954-
def load(self, path_dir: str = DEFAULT_DATA_DIR) -> BatchData:
971+
def load(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) -> BatchData:
955972
"""Download results and load them into :class:`.BatchData` object.
956973
957974
Parameters
958975
----------
959976
path_dir : str = './'
960977
Base directory where data will be downloaded, by default current working directory.
978+
replace_existing : bool = False
979+
Downloads the data even if path exists (overwriting the existing).
961980
962981
Returns
963982
------
@@ -969,7 +988,7 @@ def load(self, path_dir: str = DEFAULT_DATA_DIR) -> BatchData:
969988
allowing one to load this :class:`Batch` later using ``batch = Batch.from_file()``.
970989
"""
971990
self._check_path_dir(path_dir=path_dir)
972-
self.to_file(self._batch_path(path_dir=path_dir))
991+
self.download(path_dir=path_dir, replace_existing=replace_existing)
973992

974993
if self.jobs is None:
975994
raise DataError("Can't load batch results, hasn't been uploaded.")

tidy3d/web/api/webapi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ def load(
806806
Unique identifier of task on server. Returned by :meth:`upload`.
807807
path : str
808808
Download path to .hdf5 data file (including filename).
809-
replace_existing: bool = True
809+
replace_existing : bool = True
810810
Downloads the data even if path exists (overwriting the existing).
811811
verbose : bool = True
812812
If ``True``, will print progressbars and status, otherwise, will run silently.

0 commit comments

Comments
 (0)