Replies: 1 comment
-
尝试更新最新代码 |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
../aten/src/ATen/native/cuda/Indexing.cu:1290: indexSelectLargeIndex: block: [206,0,0], thread: [61,0,0] Assertion
srcIndex < srcSelectDimSize
failed./content/drive/MyDrive/data/finetune_demo/inference_hf.py:51 in main │
│ │
│ 48 │ │ prompt: Annotated[str, typer.Option(help='')], │
│ 49 ): │
│ 50 │ model, tokenizer = load_model_and_tokenizer(model_dir) │
│ ❱ 51 │ response, _ = model.chat(tokenizer, prompt) │
│ 52 │ print(response) │
│ 53 │
│ 54 │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py:115 in decorate_context │
│ │
│ 112 │ @functools.wraps(func) │
│ 113 │ def decorate_context(*args, **kwargs): │
│ 114 │ │ with ctx_factory(): │
│ ❱ 115 │ │ │ return func(*args, **kwargs) │
│ 116 │ │
│ 117 │ return decorate_context │
│ 118 │
│ │
│ /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm3-6b/a5ba5501eb873d40d48bd098 │
│ 3bd2a8dd006bb838/modeling_chatglm.py:1042 in chat │
│ │
│ 1039 │ │ inputs = inputs.to(self.device) │
│ 1040 │ │ eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"), │
│ 1041 │ │ │ │ │ │ tokenizer.get_command("<|observation|>")] │
│ ❱ 1042 │ │ outputs = self.generate(**inputs, **gen_kwargs, eos_token_id=eos_token_id) │
│ 1043 │ │ outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1] │
│ 1044 │ │ response = tokenizer.decode(outputs) │
│ 1045 │ │ history.append({"role": role, "content": query}) │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py:115 in decorate_context │
│ │
│ 112 │ @functools.wraps(func) │
│ 113 │ def decorate_context(*args, **kwargs): │
│ 114 │ │ with ctx_factory(): │
│ ❱ 115 │ │ │ return func(*args, **kwargs) │
│ 116 │ │
│ 117 │ return decorate_context │
│ 118 │
│ │
│ /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1592 in generate │
│ │
│ 1589 │ │ │ ) │
│ 1590 │ │ │ │
│ 1591 │ │ │ # 13. run sample │
│ ❱ 1592 │ │ │ return self.sample( │
│ 1593 │ │ │ │ input_ids, │
│ 1594 │ │ │ │ logits_processor=prepared_logits_processor, │
│ 1595 │ │ │ │ logits_warper=logits_warper, │
│ │
│ /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:2696 in sample │
│ │
│ 2693 │ │ │ model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs) │
│ 2694 │ │ │ │
│ 2695 │ │ │ # forward pass to get next token │
│ ❱ 2696 │ │ │ outputs = self( │
│ 2697 │ │ │ │ **model_inputs, │
│ 2698 │ │ │ │ return_dict=True, │
│ 2699 │ │ │ │ output_attentions=output_attentions, │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511 in _wrapped_call_impl │
│ │
│ 1508 │ │ if self._compiled_call_impl is not None: │
│ 1509 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1510 │ │ else: │
│ ❱ 1511 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1512 │ │
│ 1513 │ def _call_impl(self, *args, **kwargs): │
│ 1514 │ │ forward_call = (self._slow_forward if torch._C._get_tracing_state() else self.fo │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520 in _call_impl │
│ │
│ 1517 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1518 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1519 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1520 │ │ │ return forward_call(*args, **kwargs) │
│ 1521 │ │ │
│ 1522 │ │ try: │
│ 1523 │ │ │ result = None │
│ │
│ /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm3-6b/a5ba5501eb873d40d48bd098 │
│ 3bd2a8dd006bb838/modeling_chatglm.py:941 in forward │
│ │
│ 938 │ │ use_cache = use_cache if use_cache is not None else self.config.use_cache │
│ 939 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │
│ 940 │ │ │
│ ❱ 941 │ │ transformer_outputs = self.transformer( │
│ 942 │ │ │ input_ids=input_ids, │
│ 943 │ │ │ position_ids=position_ids, │
│ 944 │ │ │ attention_mask=attention_mask, │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511 in _wrapped_call_impl │
│ │
│ 1508 │ │ if self._compiled_call_impl is not None: │
│ 1509 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1510 │ │ else: │
│ ❱ 1511 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1512 │ │
│ 1513 │ def _call_impl(self, *args, **kwargs): │
│ 1514 │ │ forward_call = (self._slow_forward if torch._C._get_tracing_state() else self.fo │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520 in _call_impl │
│ │
│ 1517 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1518 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1519 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1520 │ │ │ return forward_call(*args, **kwargs) │
│ 1521 │ │ │
│ 1522 │ │ try: │
│ 1523 │ │ │ result = None │
│ │
│ /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm3-6b/a5ba5501eb873d40d48bd098 │
│ 3bd2a8dd006bb838/modeling_chatglm.py:811 in forward │
│ │
│ 808 │ │ batch_size, seq_length = input_ids.shape │
│ 809 │ │ │
│ 810 │ │ if inputs_embeds is None: │
│ ❱ 811 │ │ │ inputs_embeds = self.embedding(input_ids) │
│ 812 │ │ │
│ 813 │ │ if self.pre_seq_len is not None: │
│ 814 │ │ │ if past_key_values is None: │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511 in _wrapped_call_impl │
│ │
│ 1508 │ │ if self._compiled_call_impl is not None: │
│ 1509 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1510 │ │ else: │
│ ❱ 1511 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1512 │ │
│ 1513 │ def _call_impl(self, *args, **kwargs): │
│ 1514 │ │ forward_call = (self._slow_forward if torch._C._get_tracing_state() else self.fo │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520 in _call_impl │
│ │
│ 1517 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1518 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1519 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1520 │ │ │ return forward_call(*args, **kwargs) │
│ 1521 │ │ │
│ 1522 │ │ try: │
│ 1523 │ │ │ result = None │
│ │
│ /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm3-6b/a5ba5501eb873d40d48bd098 │
│ 3bd2a8dd006bb838/modeling_chatglm.py:724 in forward │
│ │
│ 721 │ │
│ 722 │ def forward(self, input_ids): │
│ 723 │ │ # Embeddings. │
│ ❱ 724 │ │ words_embeddings = self.word_embeddings(input_ids) │
│ 725 │ │ embeddings = words_embeddings │
│ 726 │ │ # Data format change to avoid explicit tranposes : [b s h] --> [s b h]. │
│ 727 │ │ embeddings = embeddings.transpose(0, 1).contiguous() │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511 in _wrapped_call_impl │
│ │
│ 1508 │ │ if self._compiled_call_impl is not None: │
│ 1509 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1510 │ │ else: │
│ ❱ 1511 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1512 │ │
│ 1513 │ def _call_impl(self, *args, **kwargs): │
│ 1514 │ │ forward_call = (self._slow_forward if torch._C._get_tracing_state() else self.fo │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520 in _call_impl │
│ │
│ 1517 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1518 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1519 │ │ │ │ or global_forward_hooks or global_forward_pre_hooks): │
│ ❱ 1520 │ │ │ return forward_call(*args, **kwargs) │
│ 1521 │ │ │
│ 1522 │ │ try: │
│ 1523 │ │ │ result = None │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/modules/sparse.py:163 in forward │
│ │
│ 160 │ │ │ │ self.weight[self.padding_idx].fill(0) │
│ 161 │ │
│ 162 │ def forward(self, input: Tensor) -> Tensor: │
│ ❱ 163 │ │ return F.embedding( │
│ 164 │ │ │ input, self.weight, self.padding_idx, self.max_norm, │
│ 165 │ │ │ self.norm_type, self.scale_grad_by_freq, self.sparse) │
│ 166 │
│ │
│ /usr/local/lib/python3.10/dist-packages/torch/nn/functional.py:2237 in embedding │
│ │
│ 2234 │ │ # torch.embedding_renorm │
│ 2235 │ │ # remove once script supports set_grad_enabled │
│ 2236 │ │ no_grad_embedding_renorm(weight, input, max_norm, norm_type) │
│ ❱ 2237 │ return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) │
│ 2238 │
│ 2239 │
│ 2240 def embedding_bag( │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: CUDA error: device-side assert triggered
Compile with
TORCH_USE_CUDA_DSA
to enable device-side assertions.Beta Was this translation helpful? Give feedback.
All reactions