Skip to content

Commit 899cb82

Browse files
authored
#552: Fix for uploading models with function_name instead of model_task (#553)
* Bug fix for uploading models with function_name instead of model_task_type Signed-off-by: Amanuel Aslan <[email protected]> * Add change in changelog; fix missing config file Signed-off-by: Amanuel Aslan <[email protected]> --------- Signed-off-by: Amanuel Aslan <[email protected]>
1 parent 5c28dc8 commit 899cb82

File tree

13 files changed

+12
-16
lines changed

13 files changed

+12
-16
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
3333
- Update model upload history - opensearch-project/opensearch-semantic-highlighter-v1 (v.1.0.0)(TORCH_SCRIPT) by @nathaliellenaa ([#550](https://github.com/opensearch-project/opensearch-py-ml/pull/550))
3434

3535
### Fixed
36-
36+
- Fix for uploading models with function_name instead of model_task ([#553](https://github.com/opensearch-project/opensearch-py-ml/pull/553))
3737

3838
## [1.2.0]
3939

opensearch_py_ml/ml_commons/ml_common_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
MODEL_CONTENT_HASH_VALUE = "model_content_hash_value"
2626
MODEL_GROUP_ID = "model_group_id"
2727
MODEL_FUNCTION_NAME = "function_name"
28-
MODEL_TASK_TYPE = "model_task_type"
28+
2929
# URL of the license file for the OpenSearch project
3030
LICENSE_URL = "https://github.com/opensearch-project/opensearch-py-ml/raw/main/LICENSE"
3131
# Name of the function used for sparse encoding

opensearch_py_ml/ml_commons/model_uploader.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
MODEL_GROUP_ID,
2727
MODEL_MAX_SIZE,
2828
MODEL_NAME_FIELD,
29-
MODEL_TASK_TYPE,
3029
MODEL_TYPE,
3130
MODEL_VERSION_FIELD,
3231
TOTAL_CHUNKS_FIELD,
@@ -184,9 +183,6 @@ def _check_mandatory_field(self, model_meta: dict) -> bool:
184183
if model_meta.get(MODEL_FUNCTION_NAME) not in [
185184
"SPARSE_ENCODING",
186185
"SPARSE_TOKENIZE",
187-
] and model_meta.get(MODEL_TASK_TYPE) not in [
188-
"SPARSE_ENCODING",
189-
"SPARSE_TOKENIZE",
190186
]:
191187
raise ValueError(f"{MODEL_CONFIG_FIELD} can not be empty")
192188
else:

opensearch_py_ml/ml_models/sentencetransformermodel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1436,7 +1436,7 @@ def make_model_config_json(
14361436
"version": version_number,
14371437
"description": description,
14381438
"model_format": model_format,
1439-
"model_task_type": "TEXT_EMBEDDING",
1439+
"function_name": "TEXT_EMBEDDING",
14401440
"model_config": {
14411441
"model_type": model_type,
14421442
"embedding_dimension": embedding_dimension,
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"name": "intfloat/e5-small-v2", "version": "1.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space.", "model_format": "ONNX", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "bert", "embedding_dimension": 384, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": true, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/intfloat_e5-small-v2/\", \"architectures\": [\"BertModel\"], \"attention_probs_dropout_prob\": 0.1, \"classifier_dropout\": null, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 384, \"initializer_range\": 0.02, \"intermediate_size\": 1536, \"layer_norm_eps\": 1e-12, \"max_position_embeddings\": 512, \"model_type\": \"bert\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 0, \"position_embedding_type\": \"absolute\", \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"type_vocab_size\": 2, \"use_cache\": true, \"vocab_size\": 30522}"}}
1+
{"name": "intfloat/e5-small-v2", "version": "1.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space.", "model_format": "ONNX", "function_name": "TEXT_EMBEDDING", "model_config": {"model_type": "bert", "embedding_dimension": 384, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": true, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/intfloat_e5-small-v2/\", \"architectures\": [\"BertModel\"], \"attention_probs_dropout_prob\": 0.1, \"classifier_dropout\": null, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 384, \"initializer_range\": 0.02, \"intermediate_size\": 1536, \"layer_norm_eps\": 1e-12, \"max_position_embeddings\": 512, \"model_type\": \"bert\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 0, \"position_embedding_type\": \"absolute\", \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"type_vocab_size\": 2, \"use_cache\": true, \"vocab_size\": 30522}"}}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"name": "sentence-transformers/clip-ViT-B-32-multilingual-v1", "version": "1.0.1", "description": "This is a multi-lingual version of the OpenAI CLIP-ViT-B32 model. You can map text and images to a common dense vector space such that images and the matching texts are close. This model can be used for image search and for multi-lingual zero-shot image classification .", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "distilbert", "embedding_dimension": 512, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_clip-ViT-B-32-multilingual-v1/\", \"activation\": \"gelu\", \"architectures\": [\"DistilBertModel\"], \"attention_dropout\": 0.1, \"dim\": 768, \"dropout\": 0.1, \"hidden_dim\": 3072, \"initializer_range\": 0.02, \"max_position_embeddings\": 512, \"model_type\": \"distilbert\", \"n_heads\": 12, \"n_layers\": 6, \"output_past\": true, \"pad_token_id\": 0, \"qa_dropout\": 0.1, \"seq_classif_dropout\": 0.2, \"sinusoidal_pos_embds\": false, \"tie_weights_\": true, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"vocab_size\": 119547}"}}
1+
{"name": "sentence-transformers/clip-ViT-B-32-multilingual-v1", "version": "1.0.1", "description": "This is a multi-lingual version of the OpenAI CLIP-ViT-B32 model. You can map text and images to a common dense vector space such that images and the matching texts are close. This model can be used for image search and for multi-lingual zero-shot image classification .", "model_format": "TORCH_SCRIPT", "function_name": "TEXT_EMBEDDING", "model_config": {"model_type": "distilbert", "embedding_dimension": 512, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_clip-ViT-B-32-multilingual-v1/\", \"activation\": \"gelu\", \"architectures\": [\"DistilBertModel\"], \"attention_dropout\": 0.1, \"dim\": 768, \"dropout\": 0.1, \"hidden_dim\": 3072, \"initializer_range\": 0.02, \"max_position_embeddings\": 512, \"model_type\": \"distilbert\", \"n_heads\": 12, \"n_layers\": 6, \"output_past\": true, \"pad_token_id\": 0, \"qa_dropout\": 0.1, \"seq_classif_dropout\": 0.2, \"sinusoidal_pos_embds\": false, \"tie_weights_\": true, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"vocab_size\": 119547}"}}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "1.0.0", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources.", "model_format": "ONNX", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
1+
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "1.0.0", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources.", "model_format": "ONNX", "function_name": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "1.0.0", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources.", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
1+
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "1.0.0", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources.", "model_format": "TORCH_SCRIPT", "function_name": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "2.0.0", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (v.2.0.0)", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
1+
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "2.0.0", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (v.2.0.0)", "model_format": "TORCH_SCRIPT", "function_name": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "3.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (v.3.0.1)", "model_format": "ONNX", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
1+
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "3.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (v.3.0.1)", "model_format": "ONNX", "function_name": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}

0 commit comments

Comments
 (0)