diff --git a/CHANGELOG.md b/CHANGELOG.md index b083ebd7a..93e93bfc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Bump pandas from 1.5.3 to the latest stable version by @yerzhaisang ([#422](https://github.com/opensearch-project/opensearch-py-ml/pull/422)) - Upgrade mypy, sphinx, sphinx-rtd-theme, and multiple GitHub Actions (setup-python, backport, codecov-action, create-pull-request, get-pr-commits) by @yerzhaisang([#437](https://github.com/opensearch-project/opensearch-py-ml/pull/437)) - Increment version to 1.2.0 and update maintainer list in _version.py by @nathaliellenaa([#516](https://github.com/opensearch-project/opensearch-py-ml/pull/516)) +- Update pretrained_models_all_versions.json (2025-05-29 15:51:56) by @dhrubo-os ([#520](https://github.com/opensearch-project/opensearch-py-ml/pull/520)) ### Fixed - Fix the wrong final zip file name in model_uploader workflow, now will name it by the upload_prefix alse.([#413](https://github.com/opensearch-project/opensearch-py-ml/pull/413/files)) diff --git a/utils/model_uploader/model_listing/pretrained_models_all_versions.json b/utils/model_uploader/model_listing/pretrained_models_all_versions.json index bfcb73eb2..7b9ea0e58 100644 --- a/utils/model_uploader/model_listing/pretrained_models_all_versions.json +++ b/utils/model_uploader/model_listing/pretrained_models_all_versions.json @@ -1,122 +1,96 @@ [ { - "name": "huggingface/sentence-transformers/all-MiniLM-L12-v2", + "name": "huggingface/cross-encoders/ms-marco-MiniLM-L-12-v2", "versions": { - "1.0.1": { + "1.0.0": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." - } - } - }, - { - "name": "huggingface/sentence-transformers/all-MiniLM-L6-v2", - "versions": { + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + }, "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." - } - } - }, - { - "name": "huggingface/sentence-transformers/all-distilroberta-v1", - "versions": { - "1.0.1": { + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + }, + "1.0.2": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." } } }, { - "name": "huggingface/sentence-transformers/all-mpnet-base-v2", + "name": "huggingface/cross-encoders/ms-marco-MiniLM-L-6-v2", "versions": { - "1.0.1": { + "1.0.0": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." - } - } - }, - { - "name": "huggingface/sentence-transformers/distiluse-base-multilingual-cased-v1", - "versions": { - "1.0.1": { - "format": [ - "torch_script" - ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 512 dimensional dense vector space and can be used for tasks like clustering or semantic search." - } - } - }, - { - "name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b", - "versions": { + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + }, "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a port of the DistilBert TAS-B Model to sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and is optimized for the task of semantic search." + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." }, "1.0.2": { "format": [ "onnx", "torch_script" ], - "description": "This is a port of the DistilBert TAS-B Model to sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and is optimized for the task of semantic search. The model version automatically truncates input to a maximum of 512 tokens." + "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." } } }, { - "name": "huggingface/sentence-transformers/multi-qa-MiniLM-L6-cos-v1", + "name": "huggingface/sentence-transformers/all-MiniLM-L12-v2", "versions": { "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and was designed for semantic search. It has been trained on 215M (question, answer) pairs from diverse sources." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1", + "name": "huggingface/sentence-transformers/all-MiniLM-L6-v2", "versions": { "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M (question, answer) pairs from diverse sources." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2", + "name": "huggingface/sentence-transformers/all-distilroberta-v1", "versions": { "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "huggingface/sentence-transformers/paraphrase-mpnet-base-v2", + "name": "huggingface/sentence-transformers/all-mpnet-base-v2", "versions": { - "1.0.0": { + "1.0.1": { "format": [ "onnx", "torch_script" @@ -126,93 +100,92 @@ } }, { - "name": "huggingface/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", + "name": "huggingface/sentence-transformers/distiluse-base-multilingual-cased-v1", "versions": { "1.0.1": { "format": [ - "onnx", "torch_script" ], - "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 512 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v1", + "name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b", "versions": { "1.0.1": { "format": [ + "onnx", "torch_script" ], - "description": "This is a neural sparse encoding model: It transfers text into sparse vector, and then extract nonzero index and value to entry and weights. It serves only in ingestion and customer should use tokenizer model in query." - } - } - }, - { - "name": "huggingface/cross-encoders/ms-marco-MiniLM-L-6-v2", - "versions": { + "description": "This is a port of the DistilBert TAS-B Model to sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and is optimized for the task of semantic search." + }, "1.0.2": { "format": [ "onnx", "torch_script" ], - "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + "description": "This is a port of the DistilBert TAS-B Model to sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and is optimized for the task of semantic search. This model version automatically truncates to a maximum of 512 tokens." } } }, { - "name": "huggingface/cross-encoders/ms-marco-MiniLM-L-12-v2", + "name": "huggingface/sentence-transformers/multi-qa-MiniLM-L6-cos-v1", "versions": { - "1.0.2": { + "1.0.1": { "format": [ "onnx", "torch_script" ], - "description": "The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and was designed for semantic search. It has been trained on 215M (question, answer) pairs from diverse sources." } } }, { - "name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v2-distill", + "name": "huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1", "versions": { - "1.0.0": { + "1.0.1": { "format": [ + "onnx", "torch_script" ], - "description": "This is a neural sparse encoding model: It transfers text into sparse vector, and then extract nonzero index and value to entry and weights. It serves only in ingestion and customer should use tokenizer model in query." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M (question, answer) pairs from diverse sources." } } }, { - "name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v2-mini", + "name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2", "versions": { - "1.0.0": { + "1.0.1": { "format": [ + "onnx", "torch_script" ], - "description": "This is a neural sparse encoding model: It transfers text into sparse vector, and then extract nonzero index and value to entry and weights. It serves only in ingestion and customer should use tokenizer model in query." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v2-distill", + "name": "huggingface/sentence-transformers/paraphrase-mpnet-base-v2", "versions": { "1.0.0": { "format": [ + "onnx", "torch_script" ], - "description": "This is a neural sparse encoding model: It transfers text into sparse vector, and then extract nonzero index and value to entry and weights. It serves in both ingestion and search." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }, { - "name": "amazon/sentence-highlighting/opensearch-semantic-highlighter-v1", + "name": "huggingface/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "versions": { - "1.0.0": { + "1.0.1": { "format": [ + "onnx", "torch_script" ], - "description": "A semantic highlighter model that identifies and highlights relevant sentences in a document given a query." + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search." } } }