-
Notifications
You must be signed in to change notification settings - Fork 240
Description
Hello, I recently upgraded my GPU to an H100 and updated CUDA to version 12.2. I'm using PyTorch version 2.1. After setting up the irpe.py file, the setup was successful. However, when I try to run the training file, I encounter the following error:
Exception has occurred: ProcessRaisedException
-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 75, in _wrap
fn(i, *args)
File "/mnt/data/hyounggyu/Mini-DeiT/main_pretrain_lsqslnet_imple_transformer_ptnopatch_MAWSh2_miniDeiTnorpetune_licls_lightoff_bp40_edf20_ho_tb_dist_hyper_wd025.py", line 105, in main
v_outputs = model(v_inputs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1593, in forward
else self._run_ddp_forward(*inputs, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1411, in _run_ddp_forward
return self.module(*inputs, **kwargs) # type: ignore[index]
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/model_lsqslnet_implementation_transformer_ptnopatch_dropresv2_MAWSh2_seqMiniDeiT_licls.py", line 913, in forward
raw = self.ast_mdl_encoder(raw)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/model_lsqslnet_implementation_transformer_ptnopatch_dropresv2_MAWSh2_seqMiniDeiT_licls.py", line 525, in forward
x = blk(x)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 750, in forward
x = self.block(x)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 636, in forward
x = x + drop_path(self.attn(self.norm1(x)))
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 397, in forward
attn += self.rpe_k(q,1,29)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 29, in forward
return self.instances[r](*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/irpe.py", line 520, in forward
return self.forward_rpe_transpose(x, rp_bucket)
File "/mnt/data/hyounggyu/Mini-DeiT/irpe.py", line 644, in forward_rpe_transpose
return RPEIndexFunction.apply(lookup_table, rp_bucket)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/autograd/function.py", line 598, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/mnt/data/hyounggyu/Mini-DeiT/rpe_ops/rpe_index.py", line 37, in forward
input.device.type == 'cpu' else rpe_index_cpp.forward_gpu
AttributeError: module 'rpe_index_cpp' has no attribute 'forward_gpu'
File "/mnt/data/hyounggyu/Mini-DeiT/main_pretrain_lsqslnet_imple_transformer_ptnopatch_MAWSh2_miniDeiTnorpetune_licls_lightoff_bp40_edf20_ho_tb_dist_hyper_wd025.py", line 373, in
mp.spawn(main, args=(world_size, nprocs, args, cross_val_splits[fold], trial, fold), nprocs=nprocs)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 75, in _wrap
fn(i, *args)
File "/mnt/data/hyounggyu/Mini-DeiT/main_pretrain_lsqslnet_imple_transformer_ptnopatch_MAWSh2_miniDeiTnorpetune_licls_lightoff_bp40_edf20_ho_tb_dist_hyper_wd025.py", line 105, in main
v_outputs = model(v_inputs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1593, in forward
else self._run_ddp_forward(*inputs, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1411, in _run_ddp_forward
return self.module(*inputs, **kwargs) # type: ignore[index]
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/model_lsqslnet_implementation_transformer_ptnopatch_dropresv2_MAWSh2_seqMiniDeiT_licls.py", line 913, in forward
raw = self.ast_mdl_encoder(raw)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/model_lsqslnet_implementation_transformer_ptnopatch_dropresv2_MAWSh2_seqMiniDeiT_licls.py", line 525, in forward
x = blk(x)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 750, in forward
x = self.block(x)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 636, in forward
x = x + drop_path(self.attn(self.norm1(x)))
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 397, in forward
attn += self.rpe_k(q,1,29)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 29, in forward
return self.instances[r](*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/irpe.py", line 520, in forward
return self.forward_rpe_transpose(x, rp_bucket)
File "/mnt/data/hyounggyu/Mini-DeiT/irpe.py", line 644, in forward_rpe_transpose
return RPEIndexFunction.apply(lookup_table, rp_bucket)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/autograd/function.py", line 598, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/mnt/data/hyounggyu/Mini-DeiT/rpe_ops/rpe_index.py", line 37, in forward
input.device.type == 'cpu' else rpe_index_cpp.forward_gpu
AttributeError: module 'rpe_index_cpp' has no attribute 'forward_gpu'