@@ -32,7 +32,7 @@ usage: export_model.py text_generation [-h]
32
32
[--config_file_path CONFIG_FILE_PATH]
33
33
[--overwrite_models]
34
34
[--target_device TARGET_DEVICE]
35
- [--pipeline_type PIPELINE_TYPE ]
35
+ [--pipeline_type {LM,LM_CB,VLM,VLM_CB,AUTO} ]
36
36
[--kv_cache_precision {u8}]
37
37
[--extra_quantization_params EXTRA_QUANTIZATION_PARAMS]
38
38
[--enable_prefix_caching]
@@ -42,6 +42,7 @@ usage: export_model.py text_generation [-h]
42
42
[--cache_size CACHE_SIZE]
43
43
[--draft_source_model DRAFT_SOURCE_MODEL]
44
44
[--draft_model_name DRAFT_MODEL_NAME]
45
+ [--max_prompt_len MAX_PROMPT_LEN]
45
46
46
47
options:
47
48
-h, --help show this help message and exit
@@ -60,10 +61,10 @@ options:
60
61
--overwrite_models Overwrite the model if it already exists in the models
61
62
repository
62
63
--target_device TARGET_DEVICE
63
- CPU or GPU , default is CPU
64
- --pipeline_type PIPELINE_TYPE
65
- Type of the pipeline to be used. Can be either TEXT_CB
66
- or VLM_CB. When undefined, it will be autodetected
64
+ CPU, GPU, NPU or HETERO , default is CPU
65
+ --pipeline_type {LM,LM_CB,VLM,VLM_CB,AUTO}
66
+ Type of the pipeline to be used. AUTO is used by
67
+ default
67
68
--kv_cache_precision {u8}
68
69
u8 or empty (model default). Reduced kv cache
69
70
precision to u8 lowers the cache size consumption.
@@ -94,6 +95,10 @@ options:
94
95
deployment. Equal to draft_source_model if HF model
95
96
name is used. Available only in draft_source_model has
96
97
been specified.
98
+ --max_prompt_len MAX_PROMPT_LEN
99
+ Sets NPU specific property for maximum number of
100
+ tokens in the prompt. Not effective if target device
101
+ is not NPU
97
102
```
98
103
99
104
## Examples how models can be exported
0 commit comments