Skip to content

RuntimeError #27

@rrrruuuiii

Description

@rrrruuuiii

(rog) root@p-a9d07e735595-ackcs-00gjelfe:~/shared-nvme/reasoning-on-graphs# python src/qa_prediction/gen_rule_path.py --model_name RoG --model_path rmanluo/RoG -d {RoG-webqsp,RoG-cwq} --split test --n_beam 3
[2025-05-12 10:45:19,519] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/runtime/zero/linear.py:49: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, input, weight, bias=None):
/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/runtime/zero/linear.py:67: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, grad_output):
Traceback (most recent call last):
File "/root/.conda/envs/rog/lib/python3.9/site-packages/transformers/utils/import_utils.py", line 1130, in _get_module
return importlib.import_module("." + module_name, self.name)
File "/root/.conda/envs/rog/lib/python3.9/importlib/init.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "", line 1030, in _gcd_import
File "", line 1007, in _find_and_load
File "", line 986, in _find_and_load_unlocked
File "", line 680, in _load_unlocked
File "", line 850, in exec_module
File "", line 228, in _call_with_frames_removed
File "/root/.conda/envs/rog/lib/python3.9/site-packages/transformers/modeling_utils.py", line 38, in
from .deepspeed import deepspeed_config, is_deepspeed_zero3_enabled
File "/root/.conda/envs/rog/lib/python3.9/site-packages/transformers/deepspeed.py", line 37, in
from accelerate.utils.deepspeed import HfDeepSpeedConfig as DeepSpeedConfig
File "/root/.conda/envs/rog/lib/python3.9/site-packages/accelerate/init.py", line 3, in
from .accelerator import Accelerator
File "/root/.conda/envs/rog/lib/python3.9/site-packages/accelerate/accelerator.py", line 35, in
from .checkpointing import load_accelerator_state, load_custom_state, save_accelerator_state, save_custom_state
File "/root/.conda/envs/rog/lib/python3.9/site-packages/accelerate/checkpointing.py", line 24, in
from .utils import (
File "/root/.conda/envs/rog/lib/python3.9/site-packages/accelerate/utils/init.py", line 136, in
from .launch import (
File "/root/.conda/envs/rog/lib/python3.9/site-packages/accelerate/utils/launch.py", line 33, in
from ..utils.other import is_port_in_use, merge_dicts
File "/root/.conda/envs/rog/lib/python3.9/site-packages/accelerate/utils/other.py", line 32, in
from deepspeed import DeepSpeedEngine
File "/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/init.py", line 22, in
from . import module_inject
File "/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/module_inject/init.py", line 6, in
from .replace_module import replace_transformer_layer, revert_transformer_layer, ReplaceWithTensorSlicing, GroupQuantizer, generic_injection
File "/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/module_inject/replace_module.py", line 568, in
from ..pipe import PipelineModule
File "/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/pipe/init.py", line 6, in
from ..runtime.pipe import PipelineModule, LayerSpec, TiedLayerSpec
File "/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/runtime/pipe/init.py", line 6, in
from .module import PipelineModule, LayerSpec, TiedLayerSpec
File "/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/runtime/pipe/module.py", line 19, in
from ..activation_checkpointing import checkpointing
File "/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/runtime/activation_checkpointing/checkpointing.py", line 25, in
from deepspeed.runtime.config import DeepSpeedConfig
File "/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/runtime/config.py", line 40, in
from ..elasticity import (
File "/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/elasticity/init.py", line 10, in
from .elastic_agent import DSElasticAgent
File "/root/.conda/envs/rog/lib/python3.9/site-packages/deepspeed/elasticity/elastic_agent.py", line 9, in
from torch.distributed.elastic.agent.server.api import log, _get_socket_with_port
ImportError: cannot import name 'log' from 'torch.distributed.elastic.agent.server.api' (/root/.conda/envs/rog/lib/python3.9/site-packages/torch/distributed/elastic/agent/server/api.py)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
File "/root/shared-nvme/reasoning-on-graphs/src/qa_prediction/gen_rule_path.py", line 7, in
import utils
File "/root/shared-nvme/reasoning-on-graphs/src/qa_prediction/../utils/init.py", line 3, in
from .training_utils import *
File "/root/shared-nvme/reasoning-on-graphs/src/qa_prediction/../utils/training_utils.py", line 8, in
model: transformers.PreTrainedModel,
File "/root/.conda/envs/rog/lib/python3.9/site-packages/transformers/utils/import_utils.py", line 1120, in getattr
module = self._get_module(self._class_to_module[name])
File "/root/.conda/envs/rog/lib/python3.9/site-packages/transformers/utils/import_utils.py", line 1132, in _get_module
raise RuntimeError(
RuntimeError: Failed to import transformers.modeling_utils because of the following error (look up to see its traceback):
cannot import name 'log' from 'torch.distributed.elastic.agent.server.api' (/root/.conda/envs/rog/lib/python3.9/site-packages/torch/distributed/elastic/agent/server/api.py)

The error is as above. The fundamental reason for the AI interpretation issue is the incompatibility between the DeepSpeed and PyTorch versions, which leads to the failure to import log from torch.distributed.elastic.agent.server.api. I installed it according to requirement.txt; what is going on?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions