diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d87c6db..877aed30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Update model upload history - sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 (v.1.0.2)(BOTH) by @nathaliellenaa ([#541](https://github.com/opensearch-project/opensearch-py-ml/pull/541)) - Update model upload history - sentence-transformers/paraphrase-mpnet-base-v2 (v.1.0.1)(BOTH) by @nathaliellenaa ([#543](https://github.com/opensearch-project/opensearch-py-ml/pull/543)) - Update model upload history - sentence-transformers/distiluse-base-multilingual-cased-v1 (v.1.0.2)(TORCH_SCRIPT) by @nathaliellenaa ([#545](https://github.com/opensearch-project/opensearch-py-ml/pull/545)) +- Update pretrained_models_all_versions.json (2025-06-02 16:06:54) by @nathaliellenaa ([#546](https://github.com/opensearch-project/opensearch-py-ml/pull/546)) ### Fixed - Fix the wrong final zip file name in model_uploader workflow, now will name it by the upload_prefix alse.([#413](https://github.com/opensearch-project/opensearch-py-ml/pull/413/files)) diff --git a/utils/model_uploader/model_listing/pretrained_models_all_versions.json b/utils/model_uploader/model_listing/pretrained_models_all_versions.json index bfcb73eb..7ba7fab1 100644 --- a/utils/model_uploader/model_listing/pretrained_models_all_versions.json +++ b/utils/model_uploader/model_listing/pretrained_models_all_versions.json @@ -1,218 +1,267 @@ [ { - "name": "huggingface/sentence-transformers/all-MiniLM-L12-v2", + "name": "huggingface/cross-encoders/ms-marco-MiniLM-L-12-v2", "versions": { - "1.0.1": { + "1.0.0": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." - } - } - }, - { - "name": "huggingface/sentence-transformers/all-MiniLM-L6-v2", - "versions": { + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + }, "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + }, + "1.0.2": { + "format": [ + "onnx", + "torch_script" + ], + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." } } }, { - "name": "huggingface/sentence-transformers/all-distilroberta-v1", + "name": "huggingface/cross-encoders/ms-marco-MiniLM-L-6-v2", "versions": { + "1.0.0": { + "format": [ + "onnx", + "torch_script" + ], + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + }, "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + }, + "1.0.2": { + "format": [ + "onnx", + "torch_script" + ], + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." } } }, { - "name": "huggingface/sentence-transformers/all-mpnet-base-v2", + "name": "huggingface/sentence-transformers/all-MiniLM-L12-v2", "versions": { "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." + }, + "1.0.2": { + "format": [ + "onnx", + "torch_script" + ], + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "huggingface/sentence-transformers/distiluse-base-multilingual-cased-v1", + "name": "huggingface/sentence-transformers/all-MiniLM-L6-v2", "versions": { "1.0.1": { "format": [ + "onnx", + "torch_script" + ], + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." + }, + "1.0.2": { + "format": [ + "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 512 dimensional dense vector space and can be used for tasks like clustering or semantic search." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b", + "name": "huggingface/sentence-transformers/all-distilroberta-v1", "versions": { "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a port of the DistilBert TAS-B Model to sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and is optimized for the task of semantic search." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." }, "1.0.2": { "format": [ "onnx", "torch_script" ], - "description": "This is a port of the DistilBert TAS-B Model to sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and is optimized for the task of semantic search. The model version automatically truncates input to a maximum of 512 tokens." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "huggingface/sentence-transformers/multi-qa-MiniLM-L6-cos-v1", + "name": "huggingface/sentence-transformers/all-mpnet-base-v2", "versions": { "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and was designed for semantic search. It has been trained on 215M (question, answer) pairs from diverse sources." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." + }, + "1.0.2": { + "format": [ + "onnx", + "torch_script" + ], + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1", + "name": "huggingface/sentence-transformers/distiluse-base-multilingual-cased-v1", "versions": { "1.0.1": { "format": [ - "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M (question, answer) pairs from diverse sources." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 512 dimensional dense vector space and can be used for tasks like clustering or semantic search." + }, + "1.0.2": { + "format": [ + "torch_script" + ], + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 512 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2", + "name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b", "versions": { "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." - } - } - }, - { - "name": "huggingface/sentence-transformers/paraphrase-mpnet-base-v2", - "versions": { - "1.0.0": { + "description": "This is a port of the DistilBert TAS-B Model to sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and is optimized for the task of semantic search." + }, + "1.0.2": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." + "description": "This is a port of the DistilBert TAS-B Model to sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and is optimized for the task of semantic search. This model version automatically truncates to a maximum of 512 tokens." + }, + "1.0.3": { + "format": [ + "onnx", + "torch_script" + ], + "description": "This is a port of the DistilBert TAS-B Model to sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and is optimized for the task of semantic search. This model version automatically truncates to a maximum of 512 tokens." } } }, { - "name": "huggingface/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", + "name": "huggingface/sentence-transformers/multi-qa-MiniLM-L6-cos-v1", "versions": { "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and was designed for semantic search. It has been trained on 215M (question, answer) pairs from diverse sources." + }, + "1.0.2": { + "format": [ + "onnx", + "torch_script" + ], + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and was designed for semantic search. It has been trained on 215M (question, answer) pairs from diverse sources." } } }, { - "name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v1", + "name": "huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1", "versions": { "1.0.1": { "format": [ + "onnx", "torch_script" ], - "description": "This is a neural sparse encoding model: It transfers text into sparse vector, and then extract nonzero index and value to entry and weights. It serves only in ingestion and customer should use tokenizer model in query." - } - } - }, - { - "name": "huggingface/cross-encoders/ms-marco-MiniLM-L-6-v2", - "versions": { + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M (question, answer) pairs from diverse sources." + }, "1.0.2": { "format": [ "onnx", "torch_script" ], - "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M (question, answer) pairs from diverse sources." } } }, { - "name": "huggingface/cross-encoders/ms-marco-MiniLM-L-12-v2", + "name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2", "versions": { + "1.0.1": { + "format": [ + "onnx", + "torch_script" + ], + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." + }, "1.0.2": { "format": [ "onnx", "torch_script" ], - "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v2-distill", + "name": "huggingface/sentence-transformers/paraphrase-mpnet-base-v2", "versions": { "1.0.0": { "format": [ + "onnx", "torch_script" ], - "description": "This is a neural sparse encoding model: It transfers text into sparse vector, and then extract nonzero index and value to entry and weights. It serves only in ingestion and customer should use tokenizer model in query." - } - } - }, - { - "name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v2-mini", - "versions": { - "1.0.0": { + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." + }, + "1.0.1": { "format": [ + "onnx", "torch_script" ], - "description": "This is a neural sparse encoding model: It transfers text into sparse vector, and then extract nonzero index and value to entry and weights. It serves only in ingestion and customer should use tokenizer model in query." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v2-distill", + "name": "huggingface/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "versions": { - "1.0.0": { + "1.0.1": { "format": [ + "onnx", "torch_script" ], - "description": "This is a neural sparse encoding model: It transfers text into sparse vector, and then extract nonzero index and value to entry and weights. It serves in both ingestion and search." - } - } - }, - { - "name": "amazon/sentence-highlighting/opensearch-semantic-highlighter-v1", - "versions": { - "1.0.0": { + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." + }, + "1.0.2": { "format": [ + "onnx", "torch_script" ], - "description": "A semantic highlighter model that identifies and highlights relevant sentences in a document given a query." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }