diff --git a/docs/source/examples/asymmetric_e5_model/README.md b/docs/source/examples/asymmetric_e5_model/README.md new file mode 100644 index 00000000..88b75d75 --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/README.md @@ -0,0 +1,49 @@ +# Multilingual E5 Small Model - SageMaker & OpenSearch Integration + +Deploy the `intfloat/multilingual-e5-small` model to Amazon SageMaker and connect it to OpenSearch for semantic search. + +## Project Structure + +``` +asymmetric_e5_model/ +├── sagemaker_deployment/ # SageMaker model deployment +│ ├── deploy_cli.sh # Deploy to SageMaker +│ ├── validate_cli.sh # Validate SageMaker endpoint +│ ├── model-config.json # Model configuration +│ ├── inference.py # Custom inference code +│ └── README.md +├── opensearch_connector/ # OpenSearch integration +│ ├── setup_connector.sh # Setup connector (auto-detects local/managed) +│ ├── validate_connector.sh # Validate connector +│ └── README.md +└── README.md # This file +``` + +## Quick Start + +### 1. Deploy to SageMaker +```bash +cd sagemaker_deployment +./deploy_cli.sh +./validate_cli.sh +``` + +### 2. Setup OpenSearch Connector +```bash +cd opensearch_connector +./setup_connector.sh +./validate_connector.sh +``` + +## Prerequisites + +- AWS CLI configured with appropriate permissions +- SageMaker execution role with necessary permissions +- OpenSearch cluster with ML Commons plugin enabled +- `jq` installed for JSON parsing + +## Cost Considerations + +- ml.t2.medium: ~$0.056/hour (used in deployment) +- ml.m5.large: ~$0.115/hour (alternative for higher throughput) +- Use auto-scaling for production workloads diff --git a/docs/source/examples/asymmetric_e5_model/opensearch_connector/README.md b/docs/source/examples/asymmetric_e5_model/opensearch_connector/README.md new file mode 100644 index 00000000..eedc17b9 --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/opensearch_connector/README.md @@ -0,0 +1,41 @@ +# OpenSearch Remote Connector + +Connect deployed SageMaker endpoint to OpenSearch for ML inference. + +## Files + +- `setup_connector.sh` - Setup connector (auto-detects local vs managed OpenSearch) +- `validate_connector.sh` - Validate connector functionality + +## Prerequisites + +- Deployed SageMaker endpoint +- OpenSearch cluster with ML Commons plugin enabled +- `jq` installed for JSON parsing + +## Usage + +The setup script automatically detects whether you're using local or managed OpenSearch: + +- **Local OpenSearch** (localhost/127.0.0.1): Uses AWS access key credentials +- **Managed OpenSearch** (AWS domain): Uses IAM role credentials + +```bash +chmod +x setup_connector.sh validate_connector.sh +./setup_connector.sh +./validate_connector.sh +``` + +## Examples + +### Local OpenSearch +```bash +./setup_connector.sh http://localhost:9200 multilingual-e5-endpoint-1761349656 +./validate_connector.sh http://localhost:9200 hMW9GJoBeER1e719aVX6 +``` + +### Managed OpenSearch +```bash +./setup_connector.sh https://search-domain.us-east-1.es.amazonaws.com multilingual-e5-endpoint-1761349656 +./validate_connector.sh https://search-domain.us-east-1.es.amazonaws.com abc123def456 +``` diff --git a/docs/source/examples/asymmetric_e5_model/opensearch_connector/setup_connector.sh b/docs/source/examples/asymmetric_e5_model/opensearch_connector/setup_connector.sh new file mode 100755 index 00000000..eb63536d --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/opensearch_connector/setup_connector.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +if [ $# -ne 2 ]; then + echo "Usage: $0 " + echo "Example: $0 http://localhost:9200 multilingual-e5-endpoint-1761349656" + exit 1 +fi + +OPENSEARCH_ENDPOINT=$1 +SAGEMAKER_ENDPOINT=$2 +REGION="us-east-1" + +echo "Setting up asymmetric E5 remote model connector with post-processing..." + +# Get AWS credentials +AWS_ACCESS_KEY=$(aws configure get aws_access_key_id) +AWS_SECRET_KEY=$(aws configure get aws_secret_access_key) +AWS_SESSION_TOKEN=$(aws configure get aws_session_token) + +# Create connector with post_process_function to flatten response +CONNECTOR_RESPONSE=$(curl -s -X POST "${OPENSEARCH_ENDPOINT}/_plugins/_ml/connectors/_create" \ +-H "Content-Type: application/json" \ +-d "{ + \"name\": \"sagemaker-e5-asymmetric-connector\", + \"description\": \"Connector for multilingual-e5-small asymmetric model with flattened response\", + \"version\": \"1\", + \"protocol\": \"aws_sigv4\", + \"parameters\": { + \"region\": \"${REGION}\", + \"service_name\": \"sagemaker\" + }, + \"credential\": { + \"access_key\": \"${AWS_ACCESS_KEY}\", + \"secret_key\": \"${AWS_SECRET_KEY}\", + \"session_token\": \"${AWS_SESSION_TOKEN}\" + }, + \"actions\": [ + { + \"action_type\": \"predict\", + \"method\": \"POST\", + \"url\": \"https://runtime.sagemaker.${REGION}.amazonaws.com/endpoints/${SAGEMAKER_ENDPOINT}/invocations\", + \"headers\": { + \"content-type\": \"application/json\" + }, + \"request_body\": \"{ \\\"texts\\\": \${parameters.texts}, \\\"content_type\\\": \\\"\${parameters.content_type}\\\" }\" + } + ] +}") + +CONNECTOR_ID=$(echo $CONNECTOR_RESPONSE | jq -r '.connector_id') + +if [ "$CONNECTOR_ID" = "null" ] || [ -z "$CONNECTOR_ID" ]; then + echo "Failed to create connector:" + echo $CONNECTOR_RESPONSE + exit 1 +fi + +echo "✓ Connector created with post-processing: $CONNECTOR_ID" + +# Register model with asymmetric identifiers +MODEL_RESPONSE=$(curl -s -X POST "${OPENSEARCH_ENDPOINT}/_plugins/_ml/models/_register" \ +-H "Content-Type: application/json" \ +-d "{ + \"name\": \"e5-asymmetric-remote\", + \"function_name\": \"remote\", + \"connector_id\": \"${CONNECTOR_ID}\", + \"model_config\": { + \"model_type\": \"text_embedding\", + \"embedding_dimension\": 384, + \"framework_type\": \"SENTENCE_TRANSFORMERS\", + \"additional_config\": { + \"space_type\": \"l2\", + \"is_asymmetric\": true, + \"model_family\": \"e5\", + \"query_prefix\": \"query: \", + \"passage_prefix\": \"passage: \" + } + } +}") + +TASK_ID=$(echo $MODEL_RESPONSE | jq -r '.task_id') +sleep 10 +MODEL_ID=$(curl -s -X GET "${OPENSEARCH_ENDPOINT}/_plugins/_ml/tasks/$TASK_ID" | jq -r '.model_id') + +# Deploy model +curl -s -X POST "${OPENSEARCH_ENDPOINT}/_plugins/_ml/models/$MODEL_ID/_deploy" > /dev/null +sleep 15 + +echo "✓ Model deployed: $MODEL_ID" +echo "" +echo "Run validation: ./validate_connector.sh $OPENSEARCH_ENDPOINT $MODEL_ID" diff --git a/docs/source/examples/asymmetric_e5_model/opensearch_connector/validate_connector.sh b/docs/source/examples/asymmetric_e5_model/opensearch_connector/validate_connector.sh new file mode 100755 index 00000000..b4a13ef1 --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/opensearch_connector/validate_connector.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +if [ $# -ne 2 ]; then + echo "Usage: $0 " + echo "Example: $0 http://localhost:9200 abc123" + exit 1 +fi + +OPENSEARCH_ENDPOINT=$1 +MODEL_ID=$2 + +echo "Validating asymmetric remote model with OpenSearch ML Commons format..." + +# Check model config +echo "Model configuration:" +curl -s -X GET "${OPENSEARCH_ENDPOINT}/_plugins/_ml/models/$MODEL_ID" | jq '.model_config.additional_config' + +# Test query embedding +echo -e "\nTesting query embedding..." +QUERY_RESPONSE=$(curl -s -X POST "${OPENSEARCH_ENDPOINT}/_plugins/_ml/models/$MODEL_ID/_predict" \ +-H "Content-Type: application/json" \ +-d '{ + "parameters": { + "texts": ["What is machine learning?"], + "content_type": "query" + } +}') + +# With simplified format, response is wrapped in an array, so access response[0] +QUERY_DIM=$(echo $QUERY_RESPONSE | jq -r '.inference_results[0].output[0].dataAsMap.response[0] | length' 2>/dev/null) + +# Test passage embedding +echo "Testing passage embedding..." +PASSAGE_RESPONSE=$(curl -s -X POST "${OPENSEARCH_ENDPOINT}/_plugins/_ml/models/$MODEL_ID/_predict" \ +-H "Content-Type: application/json" \ +-d '{ + "parameters": { + "texts": ["Machine learning is a subset of artificial intelligence."], + "content_type": "passage" + } +}') + +PASSAGE_DIM=$(echo $PASSAGE_RESPONSE | jq -r '.inference_results[0].output[0].dataAsMap.response[0] | length' 2>/dev/null) + +# Validation results +if [ "$QUERY_DIM" != "null" ] && [ "$PASSAGE_DIM" != "null" ] && [ "$QUERY_DIM" -gt 0 ] && [ "$PASSAGE_DIM" -gt 0 ]; then + echo -e "\n✓ Validation successful with flattened response!" + echo "✓ Query embedding dimension: $QUERY_DIM" + echo "✓ Passage embedding dimension: $PASSAGE_DIM" + echo "✓ Post-processing function working correctly (no processing needed)" + echo "✓ Asymmetric remote model ready for neural-search" +else + echo -e "\n✗ Validation failed" + echo "Query response: $QUERY_RESPONSE" + echo "Passage response: $PASSAGE_RESPONSE" + exit 1 +fi diff --git a/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/README.md b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/README.md new file mode 100644 index 00000000..29b7eebe --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/README.md @@ -0,0 +1,37 @@ +# SageMaker Deployment + +Deploy the `intfloat/multilingual-e5-small` model to Amazon SageMaker. + +## Files + +- `deploy_cli.sh` - Deploy model to SageMaker endpoint +- `validate_cli.sh` - Validate deployed endpoint +- `model-config.json` - Model configuration template +- `inference.py` - Custom inference code (for future use) + +## Usage + +### Deploy Model +```bash +chmod +x deploy_cli.sh +./deploy_cli.sh +``` + +### Validate Deployment +```bash +chmod +x validate_cli.sh +./validate_cli.sh +``` + +## Example +```bash +./deploy_cli.sh +./validate_cli.sh multilingual-e5-endpoint-1761349656 +``` + +## Cleanup +```bash +aws sagemaker delete-endpoint --endpoint-name +aws sagemaker delete-endpoint-config --endpoint-config-name +aws sagemaker delete-model --model-name +``` diff --git a/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/batch_response.json b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/batch_response.json new file mode 100644 index 00000000..4bba3a9c --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/batch_response.json @@ -0,0 +1 @@ +[[0.21125227212905884, -0.19419950246810913, -0.28705158829689026, -0.2428111582994461, 0.6063379645347595, -0.21867252886295319, 0.16924293339252472, 0.2812801003456116, 0.46070632338523865, -0.07456876337528229, 0.19199377298355103, 0.12527324259281158, 0.2204572707414627, -0.19901631772518158, -0.21329979598522186, 0.3345649540424347, 0.3233471214771271, -0.2748561203479767, -0.16117562353610992, -0.5108640789985657, -0.044753868132829666, 0.05922229588031769, -0.3491182327270508, 0.40106308460235596, 0.4356435239315033, 0.1736554354429245, 0.27514031529426575, -0.07730219513177872, 0.16148294508457184, -0.295236736536026, -0.30202022194862366, -0.21761786937713623, 0.3350957930088043, -0.19338198006153107, 0.22834889590740204, 0.023187408223748207, -0.33283114433288574, -0.08014051616191864, 0.39514896273612976, -0.3215436041355133, 0.05863415077328682, 0.3681980073451996, 0.07294655591249466, 0.1881101131439209, 0.16668389737606049, 0.17604459822177887, -0.2039717435836792, 0.21589720249176025, 0.13771310448646545, -0.0919647291302681, -0.2667471766471863, 0.23983322083950043, 0.16578485071659088, 0.1979793757200241, 0.25132128596305847, -0.23382262885570526, -0.29370570182800293, -0.2551599144935608, -0.38960930705070496, -0.039839573204517365, 0.2916334271430969, -0.3014938533306122, -0.02439805120229721, 0.04658728465437889, 0.34110966324806213, 0.2605176866054535, 0.05444241687655449, 0.005997715052217245, -0.3516148626804352, -0.14189280569553375, -0.34010589122772217, 0.22528629004955292, 0.04097418859601021, -0.11068100482225418, 0.19737422466278076, -0.013633948750793934, 0.2216327041387558, -0.26179832220077515, -0.014621083624660969, -0.1630561202764511, -0.5414846539497375, -0.2445427030324936, -0.27291637659072876, 0.0536770336329937, -0.24048207700252533, 0.2793055474758148, 0.2719487249851227, -0.43395480513572693, 0.26769599318504333, -0.14198651909828186, 0.13415135443210602, 0.01332690566778183, -0.20169222354888916, -0.2481244057416916, -0.15707294642925262, -0.28073880076408386, -0.16542069613933563, 0.3177223205566406, 0.12545856833457947, -0.22436057031154633, 0.23595686256885529, -0.09793320298194885, 0.26328244805336, -0.24652822315692902, -0.18275590240955353, 0.2157420665025711, 0.13654851913452148, -0.2494816780090332, 0.3006797134876251, -0.42382705211639404, -0.25198158621788025, 0.1707943230867386, 0.30590298771858215, 0.29904553294181824, -0.14574192464351654, 0.014917866326868534, -0.20398318767547607, -0.08696648478507996, 0.09606444090604782, -0.1966981738805771, 0.5671528577804565, -0.1208694577217102, -0.35229387879371643, -0.24308009445667267, -0.1536935418844223, -0.24380230903625488, -0.014862663112580776, 0.2006104737520218, -0.09023428708314896, 0.025913052260875702, 0.1221967414021492, 0.26889216899871826, 0.03368315473198891, 0.4405868947505951, -0.040425244718790054, 0.3578610122203827, -0.37594643235206604, -0.29370489716529846, -0.053275302052497864, -0.2264849692583084, -0.11250916123390198, 0.4001140296459198, -0.17820560932159424, 0.322318971157074, 0.4577997922897339, 0.18764644861221313, 0.4118485748767853, 0.09684164077043533, 0.4934040606021881, -0.15834948420524597, 0.2028166502714157, -0.20119380950927734, 0.0720454677939415, 0.12703478336334229, 0.1126365065574646, -0.14861132204532623, -0.17914055287837982, -0.23946310579776764, 0.23737357556819916, 0.06479687243700027, -0.11129844188690186, -0.25300133228302, -0.3461829125881195, -0.06625434756278992, -0.42629578709602356, -0.27556106448173523, 0.15835250914096832, 0.3716146945953369, -0.27669230103492737, -0.2905639111995697, -0.3384515047073364, 0.1337440311908722, -0.3453388512134552, 0.3853027820587158, -0.024352751672267914, 0.28315070271492004, -0.11186251044273376, 0.4832243025302887, 0.36072614789009094, 0.20030361413955688, -0.22898799180984497, -0.08367487788200378, -0.38701510429382324, -0.336569219827652, -0.202059805393219, -0.24110935628414154, -0.15747791528701782, 0.24959363043308258, 0.4309763014316559, -0.13504467904567719, 0.09007302671670914, 0.11907175928354263, -0.2552972435951233, -0.33779633045196533, -0.014909546822309494, 0.009533454664051533, -0.17467840015888214, 0.2066553831100464, 0.2415183037519455, 0.32507896423339844, 0.0551767461001873, -0.019714197143912315, 0.32838523387908936, 0.13890205323696136, 0.15370625257492065, -0.060844894498586655, -0.5572726130485535, 0.2607137858867645, -0.2913813591003418, 0.22394491732120514, 0.4403073787689209, -0.1638769805431366, -0.42188000679016113, 0.24273069202899933, -0.16349317133426666, -0.06703342497348785, -0.048404913395643234, 0.37468421459198, -0.15043775737285614, 0.0833420380949974, 0.15648812055587769, -0.12193048745393753, 0.10134152323007584, -0.27248358726501465, -0.10386957973241806, 0.5114609003067017, 0.12185240536928177, -0.33968737721443176, -0.026263641193509102, 0.17217589914798737, -0.33004799485206604, -0.20234578847885132, -0.21508872509002686, -0.19373148679733276, -0.44877055287361145, -0.34779202938079834, -0.05986733362078667, 0.23047924041748047, 0.32975468039512634, -0.3893015384674072, -0.3020569086074829, -0.420619398355484, 0.294269859790802, -0.1721120923757553, 0.26115286350250244, -0.24489040672779083, 0.025629950687289238, 0.09010639786720276, 0.12845200300216675, 0.005253212060779333, 0.18028999865055084, -0.037560563534498215, -0.1858745813369751, -0.1668788194656372, -0.11272313445806503, 0.18612472712993622, 0.24533651769161224, 0.14690570533275604, -0.17163293063640594, 0.02798748016357422, 0.2224833220243454, 0.04877327382564545, 0.5472219586372375, 0.2834973931312561, 0.2307766228914261, 0.38751471042633057, -0.33294329047203064, -0.27732014656066895, -0.38004937767982483, -0.35429421067237854, -0.25443363189697266, 0.14598841965198517, 0.20117329061031342, -0.248932883143425, -0.39225444197654724, -0.3302960693836212, 0.05097399652004242, 0.5835191607475281, -0.27149200439453125, -0.31387272477149963, 0.20353324711322784, -0.01086350530385971, 0.3398315906524658, 0.44693875312805176, 0.12450791150331497, -0.1576516479253769, -0.13163615763187408, 0.20554684102535248, -0.08420971035957336, -0.31771066784858704, -0.2595221698284149, -0.38886821269989014, 0.24555666744709015, -0.2375999242067337, 0.3370110094547272, -0.02006193995475769, 0.03084886632859707, 0.2547343373298645, -0.3171059787273407, 0.20870035886764526, -0.2208980917930603, -0.1673668622970581, 0.399099737405777, 0.47974348068237305, -0.1426638513803482, 0.10651883482933044, -0.002449837513267994, 0.06023525819182396, 0.024109214544296265, 0.04211589694023132, 0.43614622950553894, 0.46233388781547546, -0.2911700904369354, -0.2905098795890808, 0.28847837448120117, 0.29792922735214233, -0.08132367581129074, 0.18856371939182281, -0.3147868812084198, -0.24237661063671112, -0.23928521573543549, -0.16218937933444977, 0.1691829413175583, -0.2907077968120575, 0.1689167469739914, 0.24316604435443878, -0.22275085747241974, -0.18394792079925537, 0.24727721512317657, 0.023630857467651367, 0.24785809218883514, -0.1722990870475769, -0.18327836692333221, 0.05747579410672188, -0.13658569753170013, -0.1135367825627327, -0.11628109216690063, 0.13512854278087616, -0.47805070877075195, -0.3184349238872528, 0.2274090200662613, 0.28230905532836914, -0.30896490812301636, 0.3379778563976288, -0.1951751857995987, -0.2729782164096832, 0.4244639575481415, -0.3605709373950958, -0.10947370529174805, 0.018936078995466232, 0.3705735504627228, -0.44834375381469727, -0.05145061016082764, 0.1725330352783203, -0.1254437118768692, 0.19373680651187897, 0.0031741533894091845, -0.1664164662361145, 0.6147427558898926, 0.30551037192344666, -0.3317946791648865, -0.16258276998996735, 0.22298216819763184, 0.16252411901950836, 0.3319890797138214, 0.2190818041563034, -0.11792375892400742, 0.021054839715361595, 0.1761171668767929, -0.19753895699977875, 0.21852876245975494, 0.1764644831418991, -0.28975698351860046, 0.12632346153259277, -0.07575345784425735, -0.23592786490917206, -0.5268154740333557, 0.359723299741745, -0.31478092074394226, -0.10997563600540161, 0.11382555961608887, 0.12888725101947784, 0.15410704910755157, 0.24876701831817627], [0.0999775305390358, -0.05962957814335823, -0.05314614251255989, -0.22110094130039215, 0.29804155230522156, -0.09600219875574112, 0.16180293262004852, 0.3137102425098419, 0.3540261685848236, -0.10435628890991211, 0.12153515219688416, 0.3168567717075348, 0.17583322525024414, -0.081809401512146, -0.14675955474376678, 0.2234644740819931, 0.3271208703517914, -0.46848225593566895, -0.16604040563106537, -0.3707093894481659, -0.056119609624147415, 0.1399156004190445, -0.2275126576423645, 0.35508671402931213, 0.30831775069236755, 0.061047863215208054, 0.14893324673175812, -0.09008998423814774, 0.307680606842041, -0.26119551062583923, -0.17986786365509033, -0.2202831506729126, 0.17325378954410553, -0.37597647309303284, 0.16063110530376434, 0.14066070318222046, -0.2366756945848465, -0.20333564281463623, 0.35488152503967285, -0.45532646775245667, 0.016140786930918694, 0.05908472463488579, 0.3397364914417267, 0.21942560374736786, 0.0645652487874031, 0.18638436496257782, -0.21891887485980988, 0.3400842249393463, -0.10128239542245865, -0.192411407828331, -0.2508872449398041, 0.2586592137813568, 0.0972096249461174, 0.40641382336616516, 0.1537451446056366, -0.4014897346496582, -0.47765040397644043, -0.04193119704723358, -0.22169649600982666, 0.19180679321289062, 0.1106720045208931, -0.2949364483356476, 0.041381001472473145, -0.0033010642509907484, 0.30777978897094727, 0.2724064290523529, 0.3075733482837677, 0.11646413058042526, -0.40939071774482727, -0.07724467664957047, -0.2564137578010559, 0.25657784938812256, 0.15948046743869781, -0.29421523213386536, 0.07417980581521988, 0.33058837056159973, 0.06850316375494003, -0.3097342252731323, 0.05532395839691162, -0.33371496200561523, -0.41707345843315125, -0.10249681025743484, -0.2850114703178406, 0.02425839751958847, -0.1642463654279709, 0.2669559419155121, 0.28119543194770813, -0.36006367206573486, 0.18554823100566864, -0.11914009600877762, 0.11820033937692642, 0.07220099121332169, -0.44976258277893066, -0.07455027848482132, -0.5743167996406555, -0.35938146710395813, -0.1502276510000229, 0.4133526384830475, 0.208688423037529, -0.3234487473964691, 0.16572405397891998, -0.07336613535881042, 0.2027130275964737, -0.20683853328227997, -0.05755296349525452, 0.21743184328079224, 0.24799633026123047, -0.24218951165676117, 0.2869885563850403, -0.3310370147228241, -0.3119741976261139, 0.19341324269771576, 0.40679478645324707, 0.18745265901088715, -0.1600116491317749, -0.06751596182584763, -0.26762834191322327, -0.3364934027194977, 0.052301544696092606, -0.020201193168759346, 0.3623519837856293, -0.07055529206991196, -0.36489951610565186, -0.2606472671031952, -0.06259597837924957, -0.3433840274810791, -0.17276662588119507, 0.04686420038342476, 0.1255204826593399, 0.09052889794111252, 0.15087085962295532, 0.19376377761363983, 0.23156209290027618, 0.2586691379547119, 0.26549574732780457, 0.24658799171447754, -0.3584660291671753, -0.013808022253215313, -0.14893291890621185, -0.21473735570907593, -0.22672437131404877, 0.2873546779155731, -0.13052549958229065, 0.4592549800872803, 0.47907719016075134, 0.10419289022684097, 0.3992815315723419, -0.000414971262216568, 0.4175891578197479, 0.015588726848363876, 0.16431362926959991, -0.24509142339229584, 0.035721827298402786, 0.14677006006240845, 0.21790973842144012, -0.37622714042663574, -0.3036378026008606, -0.10968455672264099, 0.3697183430194855, 0.17959082126617432, -0.23829112946987152, -0.2258891463279724, -0.11233177036046982, 0.033231187611818314, -0.24526424705982208, -0.18468652665615082, 0.07754522562026978, 0.18503905832767487, -0.25625163316726685, -0.2821245491504669, -0.19739629328250885, 0.15540051460266113, 0.011029203422367573, 0.4576004445552826, -0.03167002275586128, 0.1839066594839096, -0.13269656896591187, 0.2853644788265228, 0.17323578894138336, 0.056499481201171875, -0.07934180647134781, -0.0442131869494915, -0.09283355623483658, -0.2837194800376892, -0.4546079635620117, -0.07493028789758682, -0.4364403188228607, 0.10592934489250183, 0.35666272044181824, -0.29059988260269165, 0.16162462532520294, 0.10978130251169205, -0.3881567716598511, -0.24765944480895996, -0.11910507082939148, 0.002099737524986267, -0.18793095648288727, 0.34897756576538086, 0.25983917713165283, 0.13871581852436066, -0.05134670063853264, -0.10496500879526138, 0.13731543719768524, 0.2528955638408661, 0.12442677468061447, -0.19754667580127716, -0.18394164741039276, 0.27617505192756653, -0.11062923073768616, 0.11790475249290466, 0.23934467136859894, -0.45000720024108887, -0.35737869143486023, 0.16998620331287384, -0.24521297216415405, -0.32750412821769714, -0.04043450951576233, 0.3443875014781952, -0.36456096172332764, 0.023230841383337975, 0.28337833285331726, -0.17409296333789825, 0.045505523681640625, -0.18089663982391357, -0.0871027410030365, 0.5041818618774414, 0.14371177554130554, -0.23383335769176483, -0.19744056463241577, 0.07477328181266785, -0.31431275606155396, -0.29076075553894043, -0.12143679708242416, -0.2731487452983856, -0.22192007303237915, -0.37057650089263916, -0.21017402410507202, 0.21570779383182526, 0.4591344892978668, -0.20688830316066742, -0.2977985441684723, -0.2737971544265747, 0.2510002851486206, -0.4741344451904297, 0.05282985046505928, -0.34126773476600647, 0.05424635484814644, 0.11132615059614182, 0.04085609316825867, 0.1614711731672287, 0.2996865510940552, -0.3573276996612549, -0.24894772469997406, -0.15300805866718292, -0.23170264065265656, 0.08898083120584488, 0.33614206314086914, 0.4581672251224518, -0.22666113078594208, 0.12396245449781418, 0.09208368510007858, -0.17645208537578583, 0.3947051763534546, 0.44218897819519043, 0.24836820363998413, 0.05743562802672386, -0.0632951557636261, 0.03244832530617714, -0.3342568874359131, -0.2512114942073822, -0.30931350588798523, 0.11582446098327637, 0.055070310831069946, -0.181472048163414, -0.382169246673584, -0.07930385321378708, 0.08898039907217026, 0.3965209424495697, -0.22289277613162994, -0.26069459319114685, 0.25234320759773254, 0.10677119344472885, 0.33697834610939026, 0.31145724654197693, -0.06006557121872902, -0.12636829912662506, -0.036787427961826324, 0.4509601294994354, -0.28750261664390564, -0.25745689868927, -0.19700175523757935, -0.15874789655208588, 0.027448534965515137, -0.10440892726182938, 0.4774073362350464, 0.1090899333357811, 0.015258627943694592, 0.270328551530838, -0.415364146232605, 0.2166261225938797, -0.019646139815449715, -0.2594728469848633, 0.29571059346199036, 0.29338476061820984, -0.2018430233001709, 0.18641340732574463, 0.17142783105373383, 0.14538727700710297, 0.2038717120885849, 0.24734704196453094, 0.12867455184459686, 0.3276388347148895, -0.3378933370113373, -0.4443845748901367, 0.2945193946361542, 0.37546053528785706, -6.011997811583569e-06, 0.03243950381875038, -0.13658766448497772, -0.13352642953395844, -0.14861811697483063, -0.17841118574142456, 0.0266250092536211, -0.18401485681533813, 0.29827460646629333, 0.3250564932823181, -0.19795231521129608, -0.14346472918987274, 0.19268321990966797, -0.17019665241241455, 0.4010949432849884, -0.34125950932502747, -0.13112229108810425, 0.26826944947242737, -0.23141928017139435, 0.022011326625943184, -0.21802572906017303, 0.18837393820285797, -0.3366226255893707, -0.22568346560001373, -0.13989879190921783, 0.2963242530822754, -0.33139768242836, 0.19788579642772675, -0.1854097992181778, -0.22195012867450714, 0.5122227072715759, -0.1508956402540207, -0.26574140787124634, 0.012114815413951874, 0.3514796495437622, -0.40194129943847656, 0.12128740549087524, 0.3384159803390503, -0.06486112624406815, 0.2952970266342163, -0.3582289516925812, -0.3696178197860718, 0.3684426248073578, 0.12229516357183456, -0.41774263978004456, -0.02816237509250641, 0.1566888988018036, 0.19370271265506744, 0.2849496304988861, 0.30712926387786865, -0.07631228119134903, 0.008451352827250957, 0.16767768561840057, -0.10540393739938736, 0.22906221449375153, 0.2233179360628128, -0.06165638566017151, 0.06022806093096733, -0.1854293793439865, -0.1382538229227066, -0.353574275970459, 0.32826679944992065, -0.2800607979297638, -0.20478278398513794, 0.3733425438404083, 0.3572344481945038, 0.21143238246440887, 0.3331001400947571]] \ No newline at end of file diff --git a/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/deploy_cli.sh b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/deploy_cli.sh new file mode 100755 index 00000000..3c93da6d --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/deploy_cli.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +# Set variables +TIMESTAMP=$(date +%s) +MODEL_NAME="multilingual-e5-small-$TIMESTAMP" +ENDPOINT_CONFIG_NAME="multilingual-e5-config-$TIMESTAMP" +ENDPOINT_NAME="multilingual-e5-endpoint" +ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +ROLE_ARN="arn:aws:iam::${ACCOUNT_ID}:role/Admin" +REGION="us-east-1" +BUCKET="sagemaker-$REGION-$ACCOUNT_ID" + +# Create code package +echo "Creating code package..." +tar -czf model.tar.gz inference.py + +# Upload to S3 +echo "Uploading code to S3..." +aws s3 cp model.tar.gz s3://$BUCKET/$MODEL_NAME/model.tar.gz + +# Create temporary model config +sed "s/MODEL_TIMESTAMP/$TIMESTAMP/g; s|ROLE_ARN_PLACEHOLDER|$ROLE_ARN|g; s/ACCOUNT_ID/$ACCOUNT_ID/g" model-config.json > temp-model-config.json + +# Create model +aws sagemaker create-model \ + --region $REGION \ + --cli-input-json file://temp-model-config.json + +if [ $? -ne 0 ]; then + echo "Failed to create model" + rm -f temp-model-config.json model.tar.gz + exit 1 +fi + +# Create endpoint configuration +aws sagemaker create-endpoint-config \ + --region $REGION \ + --endpoint-config-name $ENDPOINT_CONFIG_NAME \ + --production-variants VariantName=primary,ModelName=$MODEL_NAME,InitialInstanceCount=1,InstanceType=ml.m5.large,InitialVariantWeight=1 + +if [ $? -ne 0 ]; then + echo "Failed to create endpoint config" + rm -f temp-model-config.json model.tar.gz + exit 1 +fi + +# Create endpoint +aws sagemaker create-endpoint \ + --region $REGION \ + --endpoint-name $ENDPOINT_NAME \ + --endpoint-config-name $ENDPOINT_CONFIG_NAME + +if [ $? -ne 0 ]; then + echo "Failed to create endpoint" + rm -f temp-model-config.json model.tar.gz + exit 1 +fi + +echo "Deployment initiated:" +echo "Model: $MODEL_NAME" +echo "Endpoint Config: $ENDPOINT_CONFIG_NAME" +echo "Endpoint: $ENDPOINT_NAME" + +# Wait for endpoint to be in service +echo "Waiting for endpoint to be ready..." +aws sagemaker wait endpoint-in-service --region $REGION --endpoint-name $ENDPOINT_NAME + +if [ $? -eq 0 ]; then + echo "Endpoint $ENDPOINT_NAME is ready!" + echo "You can now validate with: ./validate_cli.sh $ENDPOINT_NAME" +else + echo "Endpoint deployment failed or timed out" +fi + +# Cleanup +rm -f temp-model-config.json model.tar.gz diff --git a/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/inference.py b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/inference.py new file mode 100644 index 00000000..d2e10667 --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/inference.py @@ -0,0 +1,79 @@ +import json +import torch +from transformers import AutoTokenizer, AutoModel + +# Constants +TEXTS_KEY = "texts" +CONTENT_TYPE_KEY = "content_type" +INPUTS_KEY = "inputs" +PARAMETERS_KEY = "parameters" + +def model_fn(model_dir): + # Load model from HuggingFace Hub since we're using custom inference + model_name = "intfloat/multilingual-e5-small" + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = AutoModel.from_pretrained(model_name) + return {"model": model, "tokenizer": tokenizer} + +def input_fn(request_body, request_content_type): + """ + Parse input JSON and return list of texts for embedding generation. + + Expected input formats: + 1. Custom format: {"texts": ["text1", "text2"], "content_type": "query"} + 2. Custom with parameters: {"parameters": {"texts": ["text1"], "content_type": "passage"}} + """ + if request_content_type == "application/json": + input_data = json.loads(request_body) + + # Handle OpenSearch remote connector format + if PARAMETERS_KEY in input_data: + params = input_data[PARAMETERS_KEY] + texts = params.get(TEXTS_KEY, []) + content_type = params.get(CONTENT_TYPE_KEY) + if content_type: + texts = [f"{content_type}: {text}" for text in texts] + else: + texts = input_data.get(TEXTS_KEY, []) + content_type = input_data.get(CONTENT_TYPE_KEY) + if content_type: + texts = [f"{content_type}: {text}" for text in texts] + + return texts + else: + raise ValueError(f"Unsupported content type: {request_content_type}") + +def predict_fn(input_data, model_dict): + model = model_dict["model"] + tokenizer = model_dict["tokenizer"] + + # Tokenize + inputs = tokenizer(input_data, padding=True, truncation=True, return_tensors="pt", max_length=512) + + # Generate embeddings + with torch.no_grad(): + outputs = model(**inputs) + embeddings = outputs.last_hidden_state.mean(dim=1) + + return embeddings.cpu().numpy() + +def output_fn(prediction, content_type): + """ + Format prediction output for OpenSearch connector compatibility. + + Expected output format (simple array for OpenSearch to wrap): + Single embedding: [0.1, 0.2, 0.3, ...] + Batch embeddings: [[0.1, 0.2, ...], [0.3, 0.4, ...]] + """ + if content_type == "application/json": + # Return simple array format for OpenSearch to wrap properly + if len(prediction.shape) == 2: # Batch of embeddings + # Return array of arrays for batch + result = [embedding.tolist() for embedding in prediction] + else: # Single embedding + # Return single array + result = prediction.tolist() + + return json.dumps(result) + else: + raise ValueError(f"Unsupported content type: {content_type}") diff --git a/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/model-config.json b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/model-config.json new file mode 100644 index 00000000..1f9eacbe --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/model-config.json @@ -0,0 +1,12 @@ +{ + "ModelName": "multilingual-e5-small-MODEL_TIMESTAMP", + "PrimaryContainer": { + "Image": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04", + "ModelDataUrl": "s3://sagemaker-us-east-1-ACCOUNT_ID/multilingual-e5-small-MODEL_TIMESTAMP/model.tar.gz", + "Environment": { + "SAGEMAKER_PROGRAM": "inference.py", + "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/code" + } + }, + "ExecutionRoleArn": "ROLE_ARN_PLACEHOLDER" +} diff --git a/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/passage_response.json b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/passage_response.json new file mode 100644 index 00000000..ec48b712 --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/passage_response.json @@ -0,0 +1 @@ +[[-0.15027110278606415, -0.17404615879058838, -0.18577830493450165, -0.35548436641693115, 0.5187090635299683, -0.018291564658284187, 0.030295494943857193, 0.2763817608356476, 0.6826865673065186, -0.07250575721263885, -0.05410723015666008, 0.07206694036722183, 0.28893402218818665, -0.3298208713531494, -0.3270958960056305, 0.40311747789382935, 0.36103829741477966, -0.24471746385097504, -0.023404372856020927, -0.5035589337348938, 0.0011254349956288934, -0.2521737813949585, -0.0987662747502327, 0.36402878165245056, 0.31543269753456116, 0.04576990008354187, 0.2558189928531647, 0.06138498708605766, 0.15948054194450378, -0.2958873510360718, -0.3098081052303314, -0.2696464955806732, 0.3122010827064514, -0.18361128866672516, 0.28596198558807373, -0.08014586567878723, -0.3751654624938965, -0.1966492086648941, 0.31819748878479004, -0.31918102502822876, 0.0436028316617012, 0.23668886721134186, 0.08069725334644318, 0.25577983260154724, 0.26807907223701477, 0.29041633009910583, -0.27665096521377563, 0.3198518753051758, 0.11747542023658752, -0.07459309697151184, -0.2189175933599472, 0.2825084328651428, 0.11416655033826828, 0.21050551533699036, 0.34006205201148987, -0.2512734830379486, -0.04095879942178726, -0.18994103372097015, -0.37156373262405396, -0.285979688167572, 0.24814310669898987, -0.2805616855621338, -0.07649344205856323, -0.013637318275868893, 0.3475215435028076, 0.36999252438545227, -0.11939886212348938, 0.07174855470657349, -0.335440456867218, -0.201027512550354, -0.3052011728286743, 0.2803651690483093, 0.11483081430196762, -0.165008544921875, 0.2583684027194977, -0.06624610722064972, 0.14310689270496368, -0.21716715395450592, -0.02444385178387165, -0.16776467859745026, -0.2616840600967407, -0.20785154402256012, -0.11347077786922455, 0.03937980532646179, -0.18899796903133392, 0.3354128301143646, 0.26316848397254944, -0.3115343749523163, 0.3872986435890198, -0.2844565808773041, 0.28245192766189575, -0.07061275094747543, -0.13174249231815338, -0.05954946577548981, -0.09561024606227875, -0.17320849001407623, -0.3158038556575775, 0.36221301555633545, 0.14794814586639404, -0.17559996247291565, 0.1651720255613327, -0.07632634788751602, 0.15976089239120483, -0.33826184272766113, -0.1968168020248413, 0.2169232964515686, 0.14244425296783447, -0.44272899627685547, 0.2936801314353943, -0.39589717984199524, -0.3189640939235687, 0.0555867962539196, 0.23146888613700867, 0.2640289068222046, -0.1528303325176239, -0.05648433044552803, -0.16183622181415558, -0.22885224223136902, 0.20441754162311554, -0.1859145611524582, 0.5601231455802917, 0.03943751007318497, -0.34162846207618713, -0.243055060505867, -0.2989421486854553, -0.13759872317314148, 0.06356555223464966, 0.09465593099594116, 0.00013165395648684353, -0.038511332124471664, 0.21544389426708221, 0.3175217807292938, 0.11656442284584045, 0.3347916007041931, -0.07805805653333664, 0.43718379735946655, -0.339921772480011, -0.257536381483078, -0.11757081001996994, -0.20674295723438263, -0.17733992636203766, 0.274488240480423, -0.31765443086624146, 0.16024187207221985, 0.509494960308075, 0.30123355984687805, 0.31286856532096863, 0.1076715737581253, 0.43430957198143005, -0.3013831675052643, 0.16817422211170197, -0.06739011406898499, 0.18829381465911865, 0.2778390347957611, 0.19482119381427765, -0.09391848742961884, -0.1904657781124115, -0.16857093572616577, 0.37587010860443115, 0.16312642395496368, -0.16161110997200012, -0.3724978566169739, -0.2632110118865967, -0.09416095167398453, -0.24818341434001923, -0.3430075943470001, -0.04060085117816925, 0.39944446086883545, -0.1437377631664276, -0.21401682496070862, -0.3111279606819153, 0.251720666885376, -0.1752186268568039, 0.4099709987640381, -0.0967257097363472, 0.19497527182102203, -0.1268170177936554, 0.5159304738044739, 0.2600120007991791, 0.15325511991977692, -0.2007351517677307, -0.18073146045207977, -0.16582244634628296, -0.23783506453037262, -0.17169995605945587, -0.21771280467510223, -0.16901123523712158, 0.11988545954227448, 0.36803799867630005, -0.2162386029958725, 0.23524442315101624, 0.16015920042991638, -0.2769145369529724, -0.3170761466026306, -0.12702547013759613, -0.04928090050816536, -0.19587549567222595, 0.34167587757110596, 0.34435489773750305, 0.2150527536869049, 0.07921148091554642, -0.05564895644783974, 0.230828195810318, 0.096257783472538, -0.07677603513002396, -0.16163253784179688, -0.31614944338798523, 0.19280436635017395, -0.2663184106349945, 0.1432855874300003, 0.37558555603027344, -0.09154064208269119, -0.3777075409889221, 0.34669196605682373, -0.1300935447216034, -0.2785319685935974, -0.18849170207977295, 0.25823792815208435, -0.19100864231586456, 0.04001547023653984, 0.23445618152618408, 0.06814782321453094, 0.1454792469739914, -0.3630126416683197, -0.22373446822166443, 0.45002585649490356, 0.10075121372938156, -0.30611392855644226, -0.15134340524673462, 0.22886160016059875, -0.23688487708568573, -0.17506834864616394, -0.11598581075668335, -0.2599581480026245, -0.37528809905052185, -0.32019340991973877, -0.12195807695388794, 0.19797639548778534, 0.3890979290008545, -0.3311002850532532, -0.4607181251049042, -0.3293878138065338, 0.3683645725250244, -0.03711650148034096, 0.3221883773803711, -0.21482916176319122, -0.03487198427319527, 0.04206213355064392, 0.176036074757576, 0.08774565905332565, 0.011078901588916779, -0.252734899520874, -0.2881111204624176, -0.2607787549495697, -0.31813302636146545, 0.22619765996932983, 0.18091070652008057, 0.21137399971485138, -0.18626663088798523, 0.08287107199430466, 0.11039561778306961, 0.02738388068974018, 0.4838288128376007, 0.2339504510164261, 0.267976850271225, 0.44742560386657715, -0.2013837844133377, -0.31707921624183655, -0.2417682260274887, -0.44612953066825867, -0.2811526358127594, 0.10913200676441193, 0.25092068314552307, -0.3362553119659424, -0.4137056767940521, -0.2544061839580536, 0.1710471212863922, 0.5646557807922363, -0.436403751373291, -0.28590360283851624, 0.11409199982881546, -0.07289597392082214, 0.38267454504966736, 0.4924387037754059, 0.2871546447277069, -0.22234828770160675, -0.04962091147899628, 0.03846737742424011, -0.04286123812198639, -0.19473427534103394, -0.07160763442516327, -0.31920096278190613, 0.21849548816680908, -0.29110583662986755, 0.4807734787464142, -0.03269729018211365, 0.024805312976241112, 0.27455583214759827, -0.33214566111564636, 0.08503855764865875, -0.27772948145866394, -0.09894666820764542, 0.3025405704975128, 0.4697811007499695, -0.13228991627693176, 0.06958076357841492, -0.16962741315364838, 0.0726604163646698, -0.02759638987481594, 0.1334376186132431, 0.4732311964035034, 0.41330140829086304, -0.35816970467567444, -0.38301411271095276, 0.2130235880613327, 0.09524665772914886, -0.051740117371082306, 0.24324360489845276, -0.18642885982990265, -0.18662481009960175, -0.14698784053325653, -0.11324547976255417, 0.013555294834077358, -0.21932671964168549, 0.2738673686981201, 0.12115935981273651, -0.42372649908065796, -0.1270364224910736, 0.26746484637260437, -0.13195015490055084, 0.17182192206382751, -0.09387104958295822, -0.10040216147899628, 0.020610330626368523, -0.15109913051128387, -0.05684611573815346, -0.0883018746972084, 0.08580126613378525, -0.44998738169670105, -0.12360361218452454, 0.24721094965934753, 0.28735730051994324, -0.22331896424293518, 0.3126091957092285, -0.07164841890335083, -0.23854337632656097, 0.4062841236591339, -0.4260595440864563, -0.11660507321357727, 0.23749519884586334, 0.43120643496513367, -0.3329983949661255, 0.058912262320518494, 0.2327670007944107, -0.08224952965974808, 0.18992094695568085, 0.034049488604068756, -0.28907471895217896, 0.6019108891487122, 0.21537038683891296, -0.4023866653442383, -0.24330615997314453, 0.17554479837417603, 0.23722676932811737, 0.44198986887931824, 0.45256394147872925, -0.2321983426809311, 0.0633951798081398, 0.2400585412979126, -0.20329488813877106, 0.17868520319461823, 0.3698062300682068, -0.16240307688713074, 0.02402404136955738, -0.0026248747017234564, -0.3107243478298187, -0.41555383801460266, 0.2991645336151123, -0.29420578479766846, -0.3316832482814789, 0.059223804622888565, 0.3327523469924927, 0.22718819975852966, 0.26585134863853455]] \ No newline at end of file diff --git a/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/query_response.json b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/query_response.json new file mode 100644 index 00000000..bc50f117 --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/query_response.json @@ -0,0 +1 @@ +[[0.21125222742557526, -0.1941993236541748, -0.28705155849456787, -0.24281108379364014, 0.6063380837440491, -0.21867252886295319, 0.16924279928207397, 0.2812802195549011, 0.46070635318756104, -0.07456868886947632, 0.19199371337890625, 0.12527327239513397, 0.2204572558403015, -0.19901621341705322, -0.21329985558986664, 0.3345648944377899, 0.32334718108177185, -0.2748561203479767, -0.16117577254772186, -0.5108640789985657, -0.04475392401218414, 0.05922245606780052, -0.34911811351776123, 0.4010632038116455, 0.4356435537338257, 0.1736554503440857, 0.2751404047012329, -0.07730215042829514, 0.16148270666599274, -0.29523664712905884, -0.30202022194862366, -0.21761786937713623, 0.33509576320648193, -0.19338203966617584, 0.22834886610507965, 0.02318735420703888, -0.33283114433288574, -0.08014053851366043, 0.39514878392219543, -0.3215435743331909, 0.05863434076309204, 0.36819806694984436, 0.07294666767120361, 0.18811015784740448, 0.16668397188186646, 0.17604462802410126, -0.2039717435836792, 0.2158973067998886, 0.13771314918994904, -0.0919647142291069, -0.2667471468448639, 0.23983310163021088, 0.1657848209142685, 0.19797946512699127, 0.25132134556770325, -0.23382268846035004, -0.29370567202568054, -0.2551599442958832, -0.3896092176437378, -0.03983955830335617, 0.29163333773612976, -0.30149397253990173, -0.02439812570810318, 0.04658728465437889, 0.34110963344573975, 0.26051759719848633, 0.0544423945248127, 0.0059976838529109955, -0.3516148626804352, -0.14189280569553375, -0.34010589122772217, 0.2252863645553589, 0.04097441956400871, -0.11068090796470642, 0.1973743438720703, -0.013633756898343563, 0.22163261473178864, -0.26179829239845276, -0.014621115289628506, -0.16305606067180634, -0.5414846539497375, -0.24454271793365479, -0.27291640639305115, 0.053677063435316086, -0.2404821664094925, 0.27930545806884766, 0.27194875478744507, -0.4339548647403717, 0.2676958441734314, -0.14198653399944305, 0.13415144383907318, 0.013326878659427166, -0.20169229805469513, -0.24812443554401398, -0.1570730358362198, -0.280738890171051, -0.1654207557439804, 0.31772229075431824, 0.12545864284038544, -0.22436052560806274, 0.2359568327665329, -0.09793323278427124, 0.26328250765800476, -0.24652822315692902, -0.18275584280490875, 0.2157421112060547, 0.13654853403568268, -0.24948161840438843, 0.3006797730922699, -0.42382702231407166, -0.251981645822525, 0.17079441249370575, 0.3059029281139374, 0.29904550313949585, -0.14574183523654938, 0.014917999505996704, -0.20398323237895966, -0.08696652203798294, 0.09606447070837021, -0.1966981142759323, 0.5671530365943909, -0.12086940556764603, -0.35229384899139404, -0.24308030307292938, -0.15369351208209991, -0.2438025027513504, -0.014862753450870514, 0.20061050355434418, -0.09023427963256836, 0.025913110002875328, 0.12219677120447159, 0.2688921093940735, 0.03368305787444115, 0.4405869245529175, -0.04042518138885498, 0.3578610122203827, -0.3759465515613556, -0.29370489716529846, -0.05327538773417473, -0.22648507356643677, -0.11250925064086914, 0.40011414885520935, -0.1782057285308838, 0.3223189115524292, 0.4577997028827667, 0.18764644861221313, 0.4118485152721405, 0.09684163331985474, 0.4934040606021881, -0.15834955871105194, 0.20281660556793213, -0.2011936902999878, 0.07204549759626389, 0.12703467905521393, 0.11263646930456161, -0.148611381649971, -0.17914052307605743, -0.2394629865884781, 0.23737366497516632, 0.06479690223932266, -0.11129841953516006, -0.25300124287605286, -0.34618303179740906, -0.06625428795814514, -0.4262959659099579, -0.27556100487709045, 0.15835250914096832, 0.3716146945953369, -0.2766924500465393, -0.2905638515949249, -0.3384515047073364, 0.13374412059783936, -0.3453388214111328, 0.38530290126800537, -0.02435285598039627, 0.28315064311027527, -0.11186233907938004, 0.4832243025302887, 0.36072614789009094, 0.20030373334884644, -0.228987917304039, -0.08367487788200378, -0.38701513409614563, -0.336569219827652, -0.202059805393219, -0.241109237074852, -0.15747816860675812, 0.2495935708284378, 0.4309763014316559, -0.13504484295845032, 0.09007308632135391, 0.11907172203063965, -0.2552972733974457, -0.33779633045196533, -0.014909599907696247, 0.009533613920211792, -0.17467834055423737, 0.20665539801120758, 0.24151833355426788, 0.3250790238380432, 0.05517682060599327, -0.01971430517733097, 0.3283853232860565, 0.13890217244625092, 0.15370617806911469, -0.060844793915748596, -0.5572725534439087, 0.26071369647979736, -0.29138126969337463, 0.22394472360610962, 0.44030728936195374, -0.16387711465358734, -0.42187991738319397, 0.24273072183132172, -0.16349318623542786, -0.06703337281942368, -0.048404987901449203, 0.3746841847896576, -0.15043781697750092, 0.08334202319383621, 0.15648816525936127, -0.12193045765161514, 0.10134154558181763, -0.2724835276603699, -0.10386969894170761, 0.5114610195159912, 0.12185236066579819, -0.3396874666213989, -0.026263676583766937, 0.1721758395433426, -0.33004799485206604, -0.20234578847885132, -0.21508868038654327, -0.19373153150081635, -0.4487706124782562, -0.34779199957847595, -0.059867288917303085, 0.2304791957139969, 0.3297548294067383, -0.3893016278743744, -0.30205678939819336, -0.42061933875083923, 0.2942698895931244, -0.17211215198040009, 0.2611527740955353, -0.24489039182662964, 0.025629928335547447, 0.09010634571313858, 0.12845194339752197, 0.0052532316185534, 0.18029005825519562, -0.037560585886240005, -0.18587444722652435, -0.16687877476215363, -0.11272308230400085, 0.18612472712993622, 0.24533653259277344, 0.1469055712223053, -0.17163296043872833, 0.02798759751021862, 0.22248335182666779, 0.04877319931983948, 0.5472219586372375, 0.28349733352661133, 0.23077648878097534, 0.3875148594379425, -0.3329434096813202, -0.27732011675834656, -0.38004937767982483, -0.3542943000793457, -0.2544335424900055, 0.1459885537624359, 0.20117320120334625, -0.24893301725387573, -0.3922543525695801, -0.33029603958129883, 0.05097396299242973, 0.5835190415382385, -0.2714919149875641, -0.3138727843761444, 0.20353317260742188, -0.010863554663956165, 0.3398316204547882, 0.44693875312805176, 0.12450795620679855, -0.1576516181230545, -0.13163618743419647, 0.20554696023464203, -0.08420965820550919, -0.3177105784416199, -0.2595221698284149, -0.3888682425022125, 0.24555659294128418, -0.23760002851486206, 0.33701106905937195, -0.02006196230649948, 0.030848823487758636, 0.2547343671321869, -0.3171060383319855, 0.20870031416416168, -0.2208980917930603, -0.1673668473958969, 0.399099737405777, 0.4797435700893402, -0.14266376197338104, 0.10651887208223343, -0.0024498135317116976, 0.06023525819182396, 0.024109160527586937, 0.04211573675274849, 0.43614622950553894, 0.4623338282108307, -0.2911700904369354, -0.2905098497867584, 0.28847846388816833, 0.2979293763637543, -0.08132363110780716, 0.1885635107755661, -0.3147869408130646, -0.24237678945064545, -0.23928521573543549, -0.16218934953212738, 0.1691829115152359, -0.2907077372074127, 0.16891682147979736, 0.24316616356372833, -0.22275082767009735, -0.18394790589809418, 0.24727730453014374, 0.023630766198039055, 0.24785806238651276, -0.17229902744293213, -0.18327821791172028, 0.05747581645846367, -0.13658566772937775, -0.11353683471679688, -0.11628123372793198, 0.1351284384727478, -0.47805073857307434, -0.31843504309654236, 0.2274089902639389, 0.282308965921402, -0.3089647889137268, 0.3379780352115631, -0.19517511129379272, -0.2729782164096832, 0.4244639575481415, -0.3605709373950958, -0.10947375744581223, 0.018936071544885635, 0.370573490858078, -0.44834375381469727, -0.05145057663321495, 0.17253297567367554, -0.12544380128383636, 0.193736732006073, 0.00317421555519104, -0.16641651093959808, 0.6147428750991821, 0.3055102527141571, -0.3317945897579193, -0.1625828742980957, 0.22298216819763184, 0.16252408921718597, 0.33198919892311096, 0.2190818190574646, -0.11792385578155518, 0.021054847165942192, 0.17611700296401978, -0.1975388377904892, 0.21852868795394897, 0.1764644831418991, -0.28975698351860046, 0.12632331252098083, -0.07575347274541855, -0.23592786490917206, -0.5268154740333557, 0.359723299741745, -0.3147810399532318, -0.10997568815946579, 0.1138254925608635, 0.1288873851299286, 0.1541072279214859, 0.24876709282398224]] \ No newline at end of file diff --git a/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/validate_cli.sh b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/validate_cli.sh new file mode 100755 index 00000000..1ca1b957 --- /dev/null +++ b/docs/source/examples/asymmetric_e5_model/sagemaker_deployment/validate_cli.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +ENDPOINT_NAME=$1 + +# Test payloads with custom format +QUERY_PAYLOAD='{ + "texts": ["how much protein should a female eat"], + "content_type": "query" +}' + +PASSAGE_PAYLOAD='{ + "texts": ["As a general guideline, the CDC'\''s average requirement of protein for women ages 19 to 70 is 46 grams per day."], + "content_type": "passage" +}' + +BATCH_PAYLOAD='{ + "texts": ["how much protein should a female eat", "what are the benefits of exercise"], + "content_type": "query" +}' + +echo "Testing endpoint: $ENDPOINT_NAME" + +# Test query request +echo "Testing query request..." +echo "$QUERY_PAYLOAD" > query_payload.json +aws sagemaker-runtime invoke-endpoint \ + --endpoint-name $ENDPOINT_NAME \ + --content-type application/json \ + --body fileb://query_payload.json \ + query_response.json + +# Test passage request +echo "Testing passage request..." +echo "$PASSAGE_PAYLOAD" > passage_payload.json +aws sagemaker-runtime invoke-endpoint \ + --endpoint-name $ENDPOINT_NAME \ + --content-type application/json \ + --body fileb://passage_payload.json \ + passage_response.json + +# Test batch request +echo "Testing batch request..." +echo "$BATCH_PAYLOAD" > batch_payload.json +aws sagemaker-runtime invoke-endpoint \ + --endpoint-name $ENDPOINT_NAME \ + --content-type application/json \ + --body fileb://batch_payload.json \ + batch_response.json + +if [ $? -eq 0 ]; then + echo "✓ All requests successful!" + echo "✓ Query response saved to query_response.json" + echo "✓ Passage response saved to passage_response.json" + echo "✓ Batch response saved to batch_response.json" + + # Show response sizes + QUERY_SIZE=$(wc -c < query_response.json) + PASSAGE_SIZE=$(wc -c < passage_response.json) + BATCH_SIZE=$(wc -c < batch_response.json) + echo "✓ Query response size: $QUERY_SIZE bytes" + echo "✓ Passage response size: $PASSAGE_SIZE bytes" + echo "✓ Batch response size: $BATCH_SIZE bytes" + + # Clean up payload files only, keep response files + rm -f query_payload.json passage_payload.json batch_payload.json +else + echo "✗ Endpoint invocation failed!" + exit 1 +fi