-
Notifications
You must be signed in to change notification settings - Fork 5
Description
I'm hitting some weird issues in a couple of notebooks, but don't have the bandwidth now to dive into the details:
tutorials/roman_simulations/roman_hlss_number_density.md ono CircleCI
``` ⛔️ tutorials/roman_simulations/roman_hlss_number_density.md An exception occurred during code execution, halting further execution:OSError Traceback (most recent call last)
Cell In[6], line 10
7 columns_to_keep = ['RA', 'DEC', 'redshift_observed', 'flux_Halpha6563']
8 columns_to_convert = ['redshift_observed', 'flux_Halpha6563']
---> 10 df = read_hdf5_to_pandas(file_path, columns_to_keep, columns_to_convert)
Cell In[5], line 71, in read_hdf5_to_pandas(file_path, columns_to_keep, columns_to_convert)
55 col_map = {
56 "RA": 0,
57 "DEC": 1,
(...) 67 "nodeIsIsolated": 11
68 }
70 # Open the HDF5 file and extract only the requested columns
📖 Built tutorials/roman_simulations/roman_hlss_number_density.md in 6.55 s.
---> 71 with h5py.File(file_path, "r") as file:
72 dataset = file["data"]
73 data = {}
File ~/project/.tox/py312-buildhtml/lib/python3.12/site-packages/h5py/_hl/files.py:564, in File.init(self, name, mode, driver, libver, userblock_size, swmr, rdcc_nslots, rdcc_nbytes, rdcc_w0, track_order, fs_strategy, fs_persist, fs_threshold, fs_page_size, page_buf_size, min_meta_keep, min_raw_keep, locking, alignment_threshold, alignment_interval, meta_block_size, **kwds)
555 fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0,
556 locking, page_buf_size, min_meta_keep, min_raw_keep,
557 alignment_threshold=alignment_threshold,
558 alignment_interval=alignment_interval,
559 meta_block_size=meta_block_size,
560 **kwds)
561 fcpl = make_fcpl(track_order=track_order, fs_strategy=fs_strategy,
562 fs_persist=fs_persist, fs_threshold=fs_threshold,
563 fs_page_size=fs_page_size)
--> 564 fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr)
566 if isinstance(libver, tuple):
567 self._libver = libver
File ~/project/.tox/py312-buildhtml/lib/python3.12/site-packages/h5py/_hl/files.py:238, in make_fid(name, mode, userblock_size, fapl, fcpl, swmr)
236 if swmr and swmr_support:
237 flags |= h5f.ACC_SWMR_READ
--> 238 fid = h5f.open(name, flags, fapl=fapl)
239 elif mode == 'r+':
240 fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
File h5py/_objects.pyx:56, in h5py._objects.with_phil.wrapper()
File h5py/_objects.pyx:57, in h5py._objects.with_phil.wrapper()
File h5py/h5f.pyx:102, in h5py.h5f.open()
OSError: Unable to synchronously open file (truncated file: eof = 2678259712, sblock->base_addr = 0, stored_eof = 2820764576)
tutorials/parallelize/Parallelize_Convolution.md on GHA
⛔️ tutorials/parallelize/Parallelize_Convolution.md An exception occurred during code execution, halting further execution:
OutOfMemoryError Traceback (most recent call last)
Cell In[9], line 5
3 for _ in range(100):
4 image_id = ray.put(image)
----> 5 ray.get([fray.remote(image_id, filters[i]) for i in range(num_cpus)])
6 duration_ray = time.time() - start
7 print("Ray duration = {:.1f}, speedup = {:.2f}"
8 .format(duration_ray, duration_conv*num_cpus / duration_ray))
File ~/work/irsa-tutorials/irsa-tutorials/.tox/py312-buildhtml/lib/python3.12/site-packages/ray/_private/auto_init_hook.py:22, in wrap_auto_init..auto_init_wrapper(*args, **kwargs)
19 @wraps(fn)
20 def auto_init_wrapper(*args, **kwargs):
21 auto_init_ray()
---> 22 return fn(*args, **kwargs)
File ~/work/irsa-tutorials/irsa-tutorials/.tox/py312-buildhtml/lib/python3.12/site-packages/ray/_private/client_mode_hook.py:104, in client_mode_hook..wrapper(*args, **kwargs)
102 if func.name != "init" or is_client_mode_enabled_by_default:
103 return getattr(ray, func.name)(*args, **kwargs)
--> 104 return func(*args, **kwargs)
File ~/work/irsa-tutorials/irsa-tutorials/.tox/py312-buildhtml/lib/python3.12/site-packages/ray/_private/worker.py:2882, in get(object_refs, timeout)
2876 if not isinstance(object_refs, list):
2877 raise ValueError(
2878 f"Invalid type of object refs, {type(object_refs)}, is given. "
2879 "'object_refs' must either be an ObjectRef or a list of ObjectRefs. "
2880 )
-> 2882 values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
2883 for i, value in enumerate(values):
2884 if isinstance(value, RayError):
File ~/work/irsa-tutorials/irsa-tutorials/.tox/py312-buildhtml/lib/python3.12/site-packages/ray/_private/worker.py:970, in Worker.get_objects(self, object_refs, timeout, return_exceptions, skip_deserialization)
968 raise value.as_instanceof_cause()
969 else:
--> 970 raise value
972 return values, debugger_breakpoint
OutOfMemoryError: Task was killed due to the node running low on memory.
Memory on the node (IP: 10.1.0.226, ID: 6ad3f61bc4975d1d3694b7d0c40a638987a941ce67bb5a930a90c8e3) where the task (task ID: 0b072c6b5c6a237cf75780c55f388a7ede7b40ce01000000, name=fray, pid=2961, memory used=0.08GB) was running was 15.29GB / 15.62GB (0.97866), which exceeds the memory usage threshold of 0.95. Ray killed this worker (ID: 6996df0623c890fd8a43a95b707ab1d10a7b80d1aa161054e80ce114) because it was the most recently scheduled task; to see more information about memory usage on this node, use ray logs raylet.out -ip 10.1.0.226. To see the logs of the worker, use `ray logs worker-6996df0623c890fd8a43a95b707ab1d10a7b80d1aa161054e80ce114*out -ip 10.1.0.226. Top 10 memory users:
PID MEM(GB) COMMAND