-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Open
Labels
questionFurther information is requestedFurther information is requested
Description
请提出你的问题
commit a286abc (HEAD, tag: v3.0.0-beta4)
A800-SXM4-80GB
(Pdb) self._infer(tokenized_source)
*** KeyError: 'seq_len_encoder'
(Pdb) w
/work/PaddleNLP/llm/predict/flask_server.py(318)<module>()
-> server.start_flask_server()
/work/PaddleNLP/llm/predict/flask_server.py(282)start_flask_server()
-> app.run(host="0.0.0.0", port=self.port, threaded=False)
/usr/local/lib/python3.10/dist-packages/flask/app.py(662)run()
-> run_simple(t.cast(str, host), port, self, **options)
/usr/local/lib/python3.10/dist-packages/werkzeug/serving.py(1125)run_simple()
-> srv.serve_forever()
/usr/local/lib/python3.10/dist-packages/werkzeug/serving.py(819)serve_forever()
-> super().serve_forever(poll_interval=poll_interval)
/usr/lib/python3.10/socketserver.py(237)serve_forever()
-> self._handle_request_noblock()
/usr/lib/python3.10/socketserver.py(316)_handle_request_noblock()
-> self.process_request(request, client_address)
/usr/lib/python3.10/socketserver.py(347)process_request()
-> self.finish_request(request, client_address)
/usr/lib/python3.10/socketserver.py(360)finish_request()
-> self.RequestHandlerClass(request, client_address, self)
/usr/lib/python3.10/socketserver.py(747)__init__()
-> self.handle()
/usr/local/lib/python3.10/dist-packages/werkzeug/serving.py(398)handle()
-> super().handle()
/usr/lib/python3.10/http/server.py(433)handle()
-> self.handle_one_request()
/usr/lib/python3.10/http/server.py(421)handle_one_request()
-> method()
/usr/local/lib/python3.10/dist-packages/werkzeug/serving.py(370)run_wsgi()
-> execute(self.server.app)
/usr/local/lib/python3.10/dist-packages/werkzeug/serving.py(331)execute()
-> application_iter = app(environ, start_response)
/usr/local/lib/python3.10/dist-packages/flask/app.py(1536)__call__()
-> return self.wsgi_app(environ, start_response)
/usr/local/lib/python3.10/dist-packages/flask/app.py(1511)wsgi_app()
-> response = self.full_dispatch_request()
/usr/local/lib/python3.10/dist-packages/flask/app.py(917)full_dispatch_request()
-> rv = self.dispatch_request()
/usr/local/lib/python3.10/dist-packages/flask/app.py(902)dispatch_request()
-> return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]
/work/PaddleNLP/llm/predict/flask_server.py(259)_server()
-> result = self.predict(query)
/work/PaddleNLP/llm/predict/flask_server.py(108)predict()
-> return self.predictor.predict(input_texts)
> /work/PaddleNLP/llm/predict/predictor.py(293)predict()
-> predictions = self._infer(tokenized_source)
(Pdb)
root@bddx-inf-sci-k8s-a800-00516:/work/PaddleNLP/llm# python ./predict/flask_server.py --model_name_or_path checkpoints/lora_ckpts/qwen2.5b_instruct/0924/static --mode static --port 8010 --flask_port 8011 --dtype "float16" --src_length 4000 --max_length 4000 --quant_type weight_only_int8 --total_max_length 8000
[1]- Killed python ./predict/flask_server.py --model_name_or_path checkpoints/lora_ckpts/qwen2.5b_instruct/0924/static --mode static --port 8010 --flask_port 8011 --dtype "float16" --src_length 4000 --max_length 4000 --quant_type weight_only_int8 --total_max_length 8000
[2]+ Killed python ./predict/flask_server.py --model_name_or_path checkpoints/lora_ckpts/qwen2.5b_instruct/0924/static --mode static --port 8010 --flask_port 8011 --dtype "float16" --src_length 4000 --max_length 4000 --quant_type weight_only_int8 --total_max_length 8000
/usr/local/lib/python3.10/dist-packages/_distutils_hack/__init__.py:30: UserWarning: Setuptools is replacing distutils. Support for replacing an already imported distutils is deprecated. In the future, this condition will fail. Register concerns at https://github.com/pypa/setuptools/issues/new?template=distutils-deprecation.yml
warnings.warn(
[2025-09-25 12:55:09,999] [ INFO] - The `unk_token` parameter needs to be defined: we use `eos_token` by default.
[2025-09-25 12:55:10,211] [ INFO] - Loading configuration file checkpoints/lora_ckpts/qwen2.5b_instruct/0924/static/config.json
[2025-09-25 12:55:10,211] [ INFO] - Loading configuration file checkpoints/lora_ckpts/qwen2.5b_instruct/0924/static/config.json
[2025-09-25 12:55:10,212] [ INFO] - Loading configuration file checkpoints/lora_ckpts/qwen2.5b_instruct/0924/static/generation_config.json
* Serving Flask app 'flask_server'
* Debug mode: off
[2025-09-25 12:55:17,156] [ INFO] _internal.py:97 - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on all addresses (0.0.0.0)
* Running on http://127.0.0.1:8012
* Running on http://10.206.115.206:8012
[2025-09-25 12:55:17,156] [ INFO] _internal.py:97 - Press CTRL+C to quit
/work/PaddleNLP/llm/predict/gradio_ui.py:271: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.
context_chatbot = gr.Chatbot(
* Running on local URL: http://0.0.0.0:8010
* To create a public link, set `share=True` in `launch()`.
[2025-09-25 12:56:06,127] [ INFO] - Request: {
"model": "qwen-max",
"messages": [
{
"role": "user",
"content": "# Question:\nDo a laminar simulation of flow between two parallel plates using pimpleFoam with moving wall velocity of (1 0 0) m/s"
}
],
"temperature": 0.95,
"max_tokens": 1024,
"top_p": 0.7,
"top_k": 0,
"stream": false,
"penalty_score": 1.0
}
> /work/PaddleNLP/llm/predict/predictor.py(293)predict()
-> predictions = self._infer(tokenized_source)
(Pdb) self._infer(tokenized_source)
*** KeyError: 'penalty_score'
(Pdb)
Metadata
Metadata
Assignees
Labels
questionFurther information is requestedFurther information is requested