执行官方微调demo遇到的问题 #1096
执行官方微调demo遇到的问题
#1096
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
执行该命令后 python finetune_hf.py ../AdvertiseGen/ /data/cxk_home/ChatGLM3/chatglm3-6b configs/lora.yaml 出现下面问题,看不出来是什么引起的。
`Map (num_proc=16): 0%| | 0/114599 [00:00<?, ? examples/s]
RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^^^
File "/home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 623, in _write_generator_to_queue
for i, result in enumerate(func(**kwargs)):
File "/home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3482, in _map_single
batch = apply_function_on_filtered_inputs(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3361, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/cxk_home/ChatGLM3/finetune_demo/finetune_hf.py", line 284, in process_batch
batched_conv = batch['conversations']
~~~~~^^^^^^^^^^^^^^^^^
File "/home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/formatting/formatting.py", line 270, in getitem
value = self.data[key]
~~~~~~~~~^^^^^
KeyError: 'conversations'
"""
The above exception was the direct cause of the following exception:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /data/cxk_home/ChatGLM3/finetune_demo/finetune_hf.py:464 in main │
│ │
│ 461 │ tokenizer, model = load_tokenizer_and_model(model_dir, peft_config=ft_config.peft_co │
│ 462 │ data_manager = DataManager(data_dir, ft_config.data_config) │
│ 463 │ │
│ ❱ 464 │ train_dataset = data_manager.get_dataset( │
│ 465 │ │ Split.TRAIN, │
│ 466 │ │ functools.partial( │
│ 467 │ │ │ process_batch, │
│ │
│ /data/cxk_home/ChatGLM3/finetune_demo/finetune_hf.py:263 in get_dataset │
│ │
│ 260 │ │ │ remove_columns = orig_dataset.column_names │
│ 261 │ │ else: │
│ 262 │ │ │ remove_columns = None │
│ ❱ 263 │ │ return orig_dataset.map( │
│ 264 │ │ │ process_fn, │
│ 265 │ │ │ batched=batched, │
│ 266 │ │ │ remove_columns=remove_columns, │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/arrow_dataset.py: │
│ 593 in wrapper │
│ │
│ 590 │ │ else: │
│ 591 │ │ │ self: "Dataset" = kwargs.pop("self") │
│ 592 │ │ # apply actual function │
│ ❱ 593 │ │ out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) │
│ 594 │ │ datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [ou │
│ 595 │ │ for dataset in datasets: │
│ 596 │ │ │ # Remove task templates if a column mapping of the template is no longer val │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/arrow_dataset.py: │
│ 558 in wrapper │
│ │
│ 555 │ │ │ "output_all_columns": self._output_all_columns, │
│ 556 │ │ } │
│ 557 │ │ # apply actual function │
│ ❱ 558 │ │ out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) │
│ 559 │ │ datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [ou │
│ 560 │ │ # re-apply format to the output │
│ 561 │ │ for dataset in datasets: │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/arrow_dataset.py: │
│ 3197 in map │
│ │
│ 3194 │ │ │ │ │ │ total=pbar_total, │
│ 3195 │ │ │ │ │ │ desc=(desc or "Map") + f" (num_proc={num_proc})", │
│ 3196 │ │ │ │ │ ) as pbar: │
│ ❱ 3197 │ │ │ │ │ │ for rank, done, content in iflatmap_unordered( │
│ 3198 │ │ │ │ │ │ │ pool, Dataset._map_single, kwargs_iterable=kwargs_per_job │
│ 3199 │ │ │ │ │ │ ): │
│ 3200 │ │ │ │ │ │ │ if done: │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/utils/py_utils.py │
│ :663 in iflatmap_unordered │
│ │
│ 660 │ │ finally: │
│ 661 │ │ │ if not pool_changed: │
│ 662 │ │ │ │ # we get the result in case there's an error to raise │
│ ❱ 663 │ │ │ │ [async_result.get(timeout=0.05) for async_result in async_results] │
│ 664 │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/datasets/utils/py_utils.py │
│ :663 in │
│ │
│ 660 │ │ finally: │
│ 661 │ │ │ if not pool_changed: │
│ 662 │ │ │ │ # we get the result in case there's an error to raise │
│ ❱ 663 │ │ │ │ [async_result.get(timeout=0.05) for async_result in async_results] │
│ 664 │
│ │
│ /home/jszx-02/anaconda3/envs/ChatGLM3-6b/lib/python3.11/site-packages/multiprocess/pool.py:774 │
│ in get │
│ │
│ 771 │ │ if self._success: │
│ 772 │ │ │ return self._value │
│ 773 │ │ else: │
│ ❱ 774 │ │ │ raise self._value │
│ 775 │ │
│ 776 │ def _set(self, i, obj): │
│ 777 │ │ self._success, self._value = obj │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
KeyError: 'conversations'
`
Beta Was this translation helpful? Give feedback.
All reactions