使用微调的数据集进行推理colab A100失败 #1058

sycao5 · 2024-03-29T03:45:12Z

sycao5
Mar 29, 2024

../aten/src/ATen/native/cuda/Indexing.cu:1290: indexSelectLargeIndex: block: [206,0,0], thread: [61,0,0] Assertion srcIndex < srcSelectDimSize failed.

/content/drive/MyDrive/data/finetune_demo/inference_hf.py:51 in main │
│ │
│ 48 │ │ prompt: Annotated[str, typer.Option(help='')], │
│ 49 ): │
│ 50 │ model, tokenizer = load_model_and_tokenizer(model_dir) │
│ ❱ 51 │ response, _ = model.chat(tokenizer, prompt) │
│ 52 │ print(response) │
│ 53 │
│ 54 │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py:115 in decorate_context │
│ │
│ 112 │ @functools.wraps(func) │
│ 113 │ def decorate_context(*args, **kwargs): │
│ 114 │ │ with ctx_factory(): │
│ ❱ 115 │ │ │ return func(*args, **kwargs) │
│ 116 │ │
│ 117 │ return decorate_context │
│ 118 │
│ │
│ /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm3-6b/a5ba5501eb873d40d48bd098 │
│ 3bd2a8dd006bb838/modeling_chatglm.py:1042 in chat │
│ │
│ 1039 │ │ inputs = inputs.to(self.device) │
│ 1040 │ │ eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"), │
│ 1041 │ │ │ │ │ │ tokenizer.get_command("<|observation|>")] │
│ ❱ 1042 │ │ outputs = self.generate(**inputs, **gen_kwargs, eos_token_id=eos_token_id) │
│ 1043 │ │ outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1] │
│ 1044 │ │ response = tokenizer.decode(outputs) │
│ 1045 │ │ history.append({"role": role, "content": query}) │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py:115 in decorate_context │
│ │
│ 112 │ @functools.wraps(func) │
│ 113 │ def decorate_context(*args, **kwargs): │
│ 114 │ │ with ctx_factory(): │
│ ❱ 115 │ │ │ return func(*args, **kwargs) │
│ 116 │ │
│ 117 │ return decorate_context │
│ 118 │
│ │
│ /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1592 in generate │
│ │
│ 1589 │ │ │ ) │
│ 1590 │ │ │ │
│ 1591 │ │ │ # 13. run sample │
│ ❱ 1592 │ │ │ return self.sample( │
│ 1593 │ │ │ │ input_ids, │
│ 1594 │ │ │ │ logits_processor=prepared_logits_processor, │
│ 1595 │ │ │ │ logits_warper=logits_warper, │
│ │
│ /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:2696 in sample │
│ │
│ 2693 │ │ │ model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs) │
│ 2694 │ │ │ │
│ 2695 │ │ │ # forward pass to get next token │
│ ❱ 2696 │ │ │ outputs = self( │
│ 2697 │ │ │ │ **model_inputs, │
│ 2698 │ │ │ │ return_dict=True, │
│ 2699 │ │ │ │ output_attentions=output_attentions, │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511 in _wrapped_call_impl │
│ │
│ 1508 │ │ if self._compiled_call_impl is not None: │
│ 1509 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1510 │ │ else: │
│ ❱ 1511 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1512 │ │
│ 1513 │ def _call_impl(self, *args, **kwargs): │
│ 1514 │ │ forward_call = (self._slow_forward if torch._C._get_tracing_state() else self.fo │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520 in _call_impl │
│ │
│ 1517 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1518 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1519 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1520 │ │ │ return forward_call(*args, **kwargs) │
│ 1521 │ │ │
│ 1522 │ │ try: │
│ 1523 │ │ │ result = None │
│ │
│ /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm3-6b/a5ba5501eb873d40d48bd098 │
│ 3bd2a8dd006bb838/modeling_chatglm.py:941 in forward │
│ │
│ 938 │ │ use_cache = use_cache if use_cache is not None else self.config.use_cache │
│ 939 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │
│ 940 │ │ │
│ ❱ 941 │ │ transformer_outputs = self.transformer( │
│ 942 │ │ │ input_ids=input_ids, │
│ 943 │ │ │ position_ids=position_ids, │
│ 944 │ │ │ attention_mask=attention_mask, │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511 in _wrapped_call_impl │
│ │
│ 1508 │ │ if self._compiled_call_impl is not None: │
│ 1509 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1510 │ │ else: │
│ ❱ 1511 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1512 │ │
│ 1513 │ def _call_impl(self, *args, **kwargs): │
│ 1514 │ │ forward_call = (self._slow_forward if torch._C._get_tracing_state() else self.fo │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520 in _call_impl │
│ │
│ 1517 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1518 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1519 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1520 │ │ │ return forward_call(*args, **kwargs) │
│ 1521 │ │ │
│ 1522 │ │ try: │
│ 1523 │ │ │ result = None │
│ │
│ /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm3-6b/a5ba5501eb873d40d48bd098 │
│ 3bd2a8dd006bb838/modeling_chatglm.py:811 in forward │
│ │
│ 808 │ │ batch_size, seq_length = input_ids.shape │
│ 809 │ │ │
│ 810 │ │ if inputs_embeds is None: │
│ ❱ 811 │ │ │ inputs_embeds = self.embedding(input_ids) │
│ 812 │ │ │
│ 813 │ │ if self.pre_seq_len is not None: │
│ 814 │ │ │ if past_key_values is None: │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511 in _wrapped_call_impl │
│ │
│ 1508 │ │ if self._compiled_call_impl is not None: │
│ 1509 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1510 │ │ else: │
│ ❱ 1511 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1512 │ │
│ 1513 │ def _call_impl(self, *args, **kwargs): │
│ 1514 │ │ forward_call = (self._slow_forward if torch._C._get_tracing_state() else self.fo │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520 in _call_impl │
│ │
│ 1517 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1518 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1519 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1520 │ │ │ return forward_call(*args, **kwargs) │
│ 1521 │ │ │
│ 1522 │ │ try: │
│ 1523 │ │ │ result = None │
│ │
│ /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm3-6b/a5ba5501eb873d40d48bd098 │
│ 3bd2a8dd006bb838/modeling_chatglm.py:724 in forward │
│ │
│ 721 │ │
│ 722 │ def forward(self, input_ids): │
│ 723 │ │ # Embeddings. │
│ ❱ 724 │ │ words_embeddings = self.word_embeddings(input_ids) │
│ 725 │ │ embeddings = words_embeddings │
│ 726 │ │ # Data format change to avoid explicit tranposes : [b s h] --> [s b h]. │
│ 727 │ │ embeddings = embeddings.transpose(0, 1).contiguous() │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511 in _wrapped_call_impl │
│ │
│ 1508 │ │ if self._compiled_call_impl is not None: │
│ 1509 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1510 │ │ else: │
│ ❱ 1511 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1512 │ │
│ 1513 │ def _call_impl(self, *args, **kwargs): │
│ 1514 │ │ forward_call = (self._slow_forward if torch._C._get_tracing_state() else self.fo │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520 in _call_impl │
│ │
│ 1517 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1518 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1519 │ │ │ │ or global_forward_hooks or global_forward_pre_hooks): │
│ ❱ 1520 │ │ │ return forward_call(*args, **kwargs) │
│ 1521 │ │ │
│ 1522 │ │ try: │
│ 1523 │ │ │ result = None │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/sparse.py:163 in forward │
│ │
│ 160 │ │ │ │ self.weight[self.padding_idx].fill(0) │
│ 161 │ │
│ 162 │ def forward(self, input: Tensor) -> Tensor: │
│ ❱ 163 │ │ return F.embedding( │
│ 164 │ │ │ input, self.weight, self.padding_idx, self.max_norm, │
│ 165 │ │ │ self.norm_type, self.scale_grad_by_freq, self.sparse) │
│ 166 │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/functional.py:2237 in embedding │
│ │
│ 2234 │ │ # torch.embedding_renorm │
│ 2235 │ │ # remove once script supports set_grad_enabled │
│ 2236 │ │ no_grad_embedding_renorm(weight, input, max_norm, norm_type) │
│ ❱ 2237 │ return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) │
│ 2238 │
│ 2239 │
│ 2240 def embedding_bag( │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: CUDA error: device-side assert triggered
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.

zRzRzRzRzRzRzR · 2024-03-31T16:29:48Z

zRzRzRzRzRzRzR
Mar 31, 2024
Maintainer

尝试更新最新代码

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

使用微调的数据集进行推理colab A100失败 #1058

Uh oh!

{{title}}

Uh oh!

Replies: 1 comment

Uh oh!

{{title}}

Uh oh!

Select a reply

Uh oh!

使用微调的数据集进行推理colab A100失败 #1058

Uh oh!

sycao5 Mar 29, 2024

Replies: 1 comment

Uh oh!

zRzRzRzRzRzRzR Mar 31, 2024 Maintainer

sycao5
Mar 29, 2024

zRzRzRzRzRzRzR
Mar 31, 2024
Maintainer