Skip to content

Compatibility Issue with H100 GPU, CUDA 12.2, and PyTorch 2.1 - AttributeError: module 'rpe_index_cpp' has no attribute 'forward_gpu' #240

@gudrb

Description

@gudrb

Hello, I recently upgraded my GPU to an H100 and updated CUDA to version 12.2. I'm using PyTorch version 2.1. After setting up the irpe.py file, the setup was successful. However, when I try to run the training file, I encounter the following error:

Exception has occurred: ProcessRaisedException

-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 75, in _wrap
fn(i, *args)
File "/mnt/data/hyounggyu/Mini-DeiT/main_pretrain_lsqslnet_imple_transformer_ptnopatch_MAWSh2_miniDeiTnorpetune_licls_lightoff_bp40_edf20_ho_tb_dist_hyper_wd025.py", line 105, in main
v_outputs = model(v_inputs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1593, in forward
else self._run_ddp_forward(*inputs, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1411, in _run_ddp_forward
return self.module(*inputs, **kwargs) # type: ignore[index]
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/model_lsqslnet_implementation_transformer_ptnopatch_dropresv2_MAWSh2_seqMiniDeiT_licls.py", line 913, in forward
raw = self.ast_mdl_encoder(raw)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/model_lsqslnet_implementation_transformer_ptnopatch_dropresv2_MAWSh2_seqMiniDeiT_licls.py", line 525, in forward
x = blk(x)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 750, in forward
x = self.block(x)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 636, in forward
x = x + drop_path(self.attn(self.norm1(x)))
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 397, in forward
attn += self.rpe_k(q,1,29)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 29, in forward
return self.instances[r](*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/irpe.py", line 520, in forward
return self.forward_rpe_transpose(x, rp_bucket)
File "/mnt/data/hyounggyu/Mini-DeiT/irpe.py", line 644, in forward_rpe_transpose
return RPEIndexFunction.apply(lookup_table, rp_bucket)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/autograd/function.py", line 598, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/mnt/data/hyounggyu/Mini-DeiT/rpe_ops/rpe_index.py", line 37, in forward
input.device.type == 'cpu' else rpe_index_cpp.forward_gpu
AttributeError: module 'rpe_index_cpp' has no attribute 'forward_gpu'
File "/mnt/data/hyounggyu/Mini-DeiT/main_pretrain_lsqslnet_imple_transformer_ptnopatch_MAWSh2_miniDeiTnorpetune_licls_lightoff_bp40_edf20_ho_tb_dist_hyper_wd025.py", line 373, in
mp.spawn(main, args=(world_size, nprocs, args, cross_val_splits[fold], trial, fold), nprocs=nprocs)
torch.multiprocessing.spawn.ProcessRaisedException:

-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 75, in _wrap
fn(i, *args)
File "/mnt/data/hyounggyu/Mini-DeiT/main_pretrain_lsqslnet_imple_transformer_ptnopatch_MAWSh2_miniDeiTnorpetune_licls_lightoff_bp40_edf20_ho_tb_dist_hyper_wd025.py", line 105, in main
v_outputs = model(v_inputs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1593, in forward
else self._run_ddp_forward(*inputs, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1411, in _run_ddp_forward
return self.module(*inputs, **kwargs) # type: ignore[index]
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/model_lsqslnet_implementation_transformer_ptnopatch_dropresv2_MAWSh2_seqMiniDeiT_licls.py", line 913, in forward
raw = self.ast_mdl_encoder(raw)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/model_lsqslnet_implementation_transformer_ptnopatch_dropresv2_MAWSh2_seqMiniDeiT_licls.py", line 525, in forward
x = blk(x)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 750, in forward
x = self.block(x)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 636, in forward
x = x + drop_path(self.attn(self.norm1(x)))
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 397, in forward
attn += self.rpe_k(q,1,29)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/mini_vision_transformer.py", line 29, in forward
return self.instances[r](*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/hyounggyu/Mini-DeiT/irpe.py", line 520, in forward
return self.forward_rpe_transpose(x, rp_bucket)
File "/mnt/data/hyounggyu/Mini-DeiT/irpe.py", line 644, in forward_rpe_transpose
return RPEIndexFunction.apply(lookup_table, rp_bucket)
File "/mnt/data/hyounggyu/anaconda3/envs/tensorflow3v4/lib/python3.8/site-packages/torch/autograd/function.py", line 598, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/mnt/data/hyounggyu/Mini-DeiT/rpe_ops/rpe_index.py", line 37, in forward
input.device.type == 'cpu' else rpe_index_cpp.forward_gpu
AttributeError: module 'rpe_index_cpp' has no attribute 'forward_gpu'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions