From 68cd5c62eaf577c3637d4f296dcfeb02efe9efc9 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 09:56:59 -0600 Subject: [PATCH 01/56] INTPYTHON-309 & INTPYTHON-417 Use new cluster and schedule on interval --- .evergreen/config.yml | 94 +++++++++++++++++++++------ .evergreen/fetch-secrets.sh | 46 +++++++++++++ .evergreen/provision-atlas.sh | 3 + .gitignore | 4 ++ chatgpt-retrieval-plugin/run.sh | 9 +-- llama-index-python-kvstore/run.sh | 9 +-- llama-index-python-vectorstore/run.sh | 9 +-- semantic-kernel-csharp/run.sh | 6 +- semantic-kernel-python/run.sh | 15 +++-- 9 files changed, 156 insertions(+), 39 deletions(-) create mode 100644 .evergreen/fetch-secrets.sh diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 133faab..60f996c 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -28,6 +28,13 @@ functions: params: directory: "src" + "fetch secrets": + - command: subprocess.exec + type: setup + working_dir: "src" + binary: bash + args: [.evergreen/fetch-secrets.sh] + "fetch repo": - command: shell.exec type: setup @@ -54,12 +61,10 @@ functions: add_expansions_to_env: true working_dir: "src/${DIR}/${REPO_NAME}" binary: bash - env: - atlas: ${workdir}/src/atlas/bin/atlas args: - ../run.sh - "setup atlas cli": + "setup local atlas": - command: subprocess.exec type: setup retry_on_failure: true @@ -67,43 +72,78 @@ functions: add_expansions_to_env: true working_dir: "src" binary: bash - env: - atlas: ${workdir}/src/atlas/bin/atlas args: - .evergreen/provision-atlas.sh pre_error_fails_task: true pre: - func: "fetch source" - - func: "setup atlas cli" + - func: "fetch secrets" tasks: - - name: test-semantic-kernel-python + - name: test-semantic-kernel-python-local + commands: + - func: "fetch repo" + - func: "setup local atlas" + - func: "execute tests" + + - name: test-semantic-kernel-python-remote commands: - func: "fetch repo" - func: "execute tests" - - name: test-semantic-kernel-csharp + - name: test-semantic-kernel-csharp-local commands: - func: "fetch repo" + - func: "setup local atlas" - func: "execute tests" - - name: test-langchain-python + - name: test-semantic-kernel-csharp-remote commands: - func: "fetch repo" - func: "execute tests" - - name: test-chatgpt-retrieval-plugin + - name: test-langchain-python-local commands: - func: "fetch repo" + - func: "setup local atlas" - func: "execute tests" - - name: test-llama-index + - name: test-langchain-python-remote commands: - func: "fetch repo" - func: "execute tests" - - name: test-docarray + - name: test-chatgpt-retrieval-plugin-local + commands: + - func: "fetch repo" + - func: "setup local atlas" + - func: "execute tests" + + - name: test-chatgpt-retrieval-plugin-remote + commands: + - func: "fetch repo" + - func: "execute tests" + + - name: test-llama-index-local + commands: + - func: "fetch repo" + - func: "setup local atlas" + - func: "execute tests" + + - name: test-llama-index-remote + commands: + - func: "fetch repo" + - func: "execute tests" + + # TODO: INTPYTHON-313 + # - name: test-docarray-local + # commands: + # - func: "fetch repo" + # - func: "setup local atlas" + # - func: "execute tests" + + - name: test-docarray-remote commands: - func: "fetch repo" - func: "execute tests" @@ -121,7 +161,10 @@ buildvariants: run_on: - rhel87-small tasks: - - name: test-llama-index + - name: test-llama-index-local + - name: test-llama-index-remote + batchtime: 10080 # 1 week + - name: test-semantic-kernel-python-rhel display_name: Semantic-Kernel RHEL Python expansions: @@ -132,7 +175,9 @@ buildvariants: run_on: - rhel87-small tasks: - - name: test-semantic-kernel-python + - name: test-semantic-kernel-python-local + - name: test-semantic-kernel-python-remote + batchtime: 10080 # 1 week - name: test-semantic-kernel-csharp-rhel display_name: Semantic-Kernel RHEL CSharp @@ -144,7 +189,9 @@ buildvariants: run_on: - rhel87-small tasks: - - name: test-semantic-kernel-csharp + - name: test-semantic-kernel-csharp-local + - name: test-semantic-kernel-csharp-remote + batchtime: 10080 # 1 week - name: test-langchain-python-rhel display_name: Langchain RHEL Python @@ -156,7 +203,9 @@ buildvariants: run_on: - rhel87-small tasks: - - name: test-langchain-python + - name: test-langchain-python-local + - name: test-langchain-python-remote + batchtime: 10080 # 1 week - name: test-chatgpt-retrieval-plugin-rhel display_name: ChatGPT Retrieval Plugin @@ -168,7 +217,9 @@ buildvariants: run_on: - rhel87-small tasks: - - name: test-chatgpt-retrieval-plugin + - name: test-chatgpt-retrieval-plugin-local + - name: test-chatgpt-retrieval-plugin-remote + batchtime: 10080 # 1 week - name: test-llama-index-vectorstore-rhel display_name: LlamaIndex RHEL Vector Store @@ -181,7 +232,9 @@ buildvariants: run_on: - rhel87-small tasks: - - name: test-llama-index + - name: test-llama-index-local + - name: test-llama-index-remote + batchtime: 10080 # 1 week - name: test-docarray-rhel display_name: DocArray RHEL @@ -193,4 +246,7 @@ buildvariants: run_on: - rhel87-small tasks: - - name: test-docarray + # TODO: INTPYTHON-313 + # - name: test-docarray-local + - name: test-docarray-remote + # batchtime: 10080 # 1 week diff --git a/.evergreen/fetch-secrets.sh b/.evergreen/fetch-secrets.sh new file mode 100644 index 0000000..44426f4 --- /dev/null +++ b/.evergreen/fetch-secrets.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +set -eu + +if [ -z "${DIR:-}" ]; then + echo "Must give a target dir!" + exit 1 +fi + +# Clone drivers-evergeen-tools. +git clone https://github.com/mongodb-labs/drivers-evergreen-tools + +# Get the secrets for drivers/ai-ml-pipeline-testing. +. drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing + +# Get the correct remote URI. +case $DIR in + llama-index-python-kvstore) + MONGODB_URI=$LLAMA_INDEX_MONGODB_URI + ;; + semantic-kernel-python) + MONGODB_URI=$SEMANTIC_KERNEL_MONGODB_URI + ;; + semantic-kernel-csharp) + MONGODB_URI=$SEMANTIC_KERNEL_MONGODB_URI + ;; + langchain-python) + MONGODB_URI=$LANGCHAIN_MONGODB_URI + ;; + chatgpt-retrieval-plugin) + MONGODB_URI=$CHATGPT_RETRIEVAL_PLUGIN_MONGODB_URI + ;; + llama-index-python-vectorstore) + MONGODB_URI=$LLAMA_INDEX_MONGODB_URI + ;; + docarray) + MONGODB_URI=$DOCARRAY_MONGODB_URI + ;; + *) + echo "Missing config in fetch-secrets.sh for DIR: $DIR" + exit 1 + ;; +esac + +# Export the URI +echo "MONGODB_URI=$MONGODB_URI" >> secrets-export.sh diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index 30dba7a..7c88802 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -31,3 +31,6 @@ DATABASE=$DATABASE \ DIR=$DIR \ TARGET_DIR=$TARGET_DIR \ $PYTHON_BINARY $SCAFFOLD_SCRIPT + +# Export the URI +echo "MONGODB_URI=$CONN_STRING" >> secrets-export.sh diff --git a/.gitignore b/.gitignore index fabf148..59d832a 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,7 @@ xunit-results/ # Miscellaneous .DS_Store +drivers-evergreen-tools + +# Secrets +secrets-export.sh diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index dc1cba0..eb95132 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -4,6 +4,10 @@ set -x +# Get the MONGODB_URI and OPENAI_API_KEY. +# shellcheck disable=SC2154 +source $workdir/src/secrets-export.sh + # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh @@ -24,12 +28,9 @@ $PYTHON_BINARY -m poetry lock --no-update # Install from pyproject.toml into package specific environment $PYTHON_BINARY -m poetry install --with dev -# Run tests. Sensitive variables in Evergreen come from Evergeen project: ai-ml-pipeline-testing/ -# shellcheck disable=SC2154 -OPENAI_API_KEY=$openai_api_key \ +# Run tests. DATASTORE="mongodb" \ BEARER_TOKEN="staylowandkeepmoving" \ -MONGODB_URI=$(fetch_local_atlas_uri) \ MONGODB_DATABASE="chatgpt_retrieval_plugin_test_db" \ MONGODB_COLLECTION="chatgpt_retrieval_plugin_test_vectorstore" \ MONGODB_INDEX="vector_index" \ diff --git a/llama-index-python-kvstore/run.sh b/llama-index-python-kvstore/run.sh index 70691bc..0a73ce3 100644 --- a/llama-index-python-kvstore/run.sh +++ b/llama-index-python-kvstore/run.sh @@ -2,6 +2,10 @@ set -x +# Get the MONGODB_URI and OPENAI_API_KEY. +# shellcheck disable=SC2154 +source $workdir/src/secrets-export.sh + # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh @@ -29,10 +33,7 @@ $PYTHON_BINARY -m poetry lock --no-update # Install from pyproject.toml into package specific environment $PYTHON_BINARY -m poetry install --with dev -# Run tests. Sensitive variables in Evergreen come from Evergreen project: ai-ml-pipeline-testing/ -# shellcheck disable=SC2154 -OPENAI_API_KEY=$openai_api_key \ -MONGODB_URI=$CONN_STRING \ +# Run tests. MONGODB_DATABASE="llama_index_test_db" \ MONGODB_COLLECTION="llama_index_test_kvstore" \ $PYTHON_BINARY -m poetry run pytest -v tests diff --git a/llama-index-python-vectorstore/run.sh b/llama-index-python-vectorstore/run.sh index e3eaf02..b15a194 100644 --- a/llama-index-python-vectorstore/run.sh +++ b/llama-index-python-vectorstore/run.sh @@ -2,6 +2,10 @@ set -x +# Get the MONGODB_URI and OPENAI_API_KEY. +# shellcheck disable=SC2154 +source $workdir/src/secrets-export.sh + # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh @@ -26,10 +30,7 @@ $PYTHON_BINARY -m poetry lock --no-update # Install from pyproject.toml into package specific environment $PYTHON_BINARY -m poetry install --with dev -# Run tests. Sensitive variables in Evergreen come from Evergreen project: ai-ml-pipeline-testing/ -# shellcheck disable=SC2154 -MONGODB_URI=$(fetch_local_atlas_uri) \ -OPENAI_API_KEY=$openai_api_key \ +# Run tests. MONGODB_DATABASE="llama_index_test_db" \ MONGODB_COLLECTION="llama_index_test_vectorstore" \ MONGODB_INDEX="vector_index" \ diff --git a/semantic-kernel-csharp/run.sh b/semantic-kernel-csharp/run.sh index 96fed2c..07da2fe 100644 --- a/semantic-kernel-csharp/run.sh +++ b/semantic-kernel-csharp/run.sh @@ -2,6 +2,10 @@ set -x +# Get the MONGODB_URI. +# shellcheck disable=SC2154 +source $workdir/src/secrets-export.sh + # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh # WORKING_DIR = src/semantic-kernel-csharp/semantic-kernel @@ -20,5 +24,5 @@ sed -i -e 's/"MongoDB Atlas cluster is required"/null/g' dotnet/src/IntegrationT # Run tests echo "Running MongoDBMemoryStoreTests" -MongoDB__ConnectionString=$(fetch_local_atlas_uri) \ +MongoDB__ConnectionString=$MONGODB_URI \ $DOTNET_SDK_PATH/dotnet test dotnet/src/IntegrationTests/IntegrationTests.csproj --filter SemanticKernel.IntegrationTests.Connectors.MongoDB.MongoDBMemoryStoreTests diff --git a/semantic-kernel-python/run.sh b/semantic-kernel-python/run.sh index 4958719..2125b14 100644 --- a/semantic-kernel-python/run.sh +++ b/semantic-kernel-python/run.sh @@ -2,10 +2,13 @@ set -x +# Get the MONGODB_URI and OPENAI_API_KEY. +# shellcheck disable=SC2154 +source $workdir/src/secrets-export.sh + # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh -CONN_STRING=$(fetch_local_atlas_uri) PYTHON_BINARY=$(find_python3) # WORKING_DIR = src/semantic-kernel-python/semantic-kernel @@ -22,21 +25,19 @@ make install-sk make install-pre-commit # shellcheck disable=SC2154 -OPENAI_API_KEY=$openai_api_key \ - OPENAI_ORG_ID="" \ +OPENAI_ORG_ID="" \ AZURE_OPENAI_DEPLOYMENT_NAME="" \ AZURE_OPENAI_ENDPOINT="" \ AZURE_OPENAI_API_KEY="" \ - MONGODB_ATLAS_CONNECTION_STRING=$CONN_STRING \ + MONGODB_ATLAS_CONNECTION_STRING=$MONGODB_URI \ Python_Integration_Tests=1 \ uv run pytest tests/integration/memory/memory_stores/test_mongodb_atlas_memory_store.py -k test_collection_knn # shellcheck disable=SC2154 -OPENAI_API_KEY=$openai_api_key \ - OPENAI_ORG_ID="" \ +OPENAI_ORG_ID="" \ AZURE_OPENAI_DEPLOYMENT_NAME="" \ AZURE_OPENAI_ENDPOINT="" \ AZURE_OPENAI_API_KEY="" \ - MONGODB_ATLAS_CONNECTION_STRING=$CONN_STRING \ + MONGODB_ATLAS_CONNECTION_STRING=$MONGODB_URI \ Python_Integration_Tests=1 \ uv run pytest tests/integration/memory/memory_stores/test_mongodb_atlas_memory_store.py From 5b17bb3434a0a4266b1580e6071610a63fa9c67f Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 10:01:57 -0600 Subject: [PATCH 02/56] fix config --- .evergreen/config.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 60f996c..f2fec30 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -31,9 +31,10 @@ functions: "fetch secrets": - command: subprocess.exec type: setup - working_dir: "src" - binary: bash - args: [.evergreen/fetch-secrets.sh] + params: + working_dir: "src" + binary: bash + args: [.evergreen/fetch-secrets.sh] "fetch repo": - command: shell.exec From 111ced24930c5d954b926a1fbe3b1f53291628ff Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 10:03:42 -0600 Subject: [PATCH 03/56] fix config --- .evergreen/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index f2fec30..964b1c1 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -34,6 +34,7 @@ functions: params: working_dir: "src" binary: bash + include_expansions_in_env: [DIR] args: [.evergreen/fetch-secrets.sh] "fetch repo": From ed04fc4c01231a18e1e8e82c2df01c218a7dc49e Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 17:36:28 -0600 Subject: [PATCH 04/56] debug --- .evergreen/fetch-secrets.sh | 2 ++ .evergreen/provision-atlas.sh | 1 + chatgpt-retrieval-plugin/run.sh | 4 +++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.evergreen/fetch-secrets.sh b/.evergreen/fetch-secrets.sh index 44426f4..92e1b89 100644 --- a/.evergreen/fetch-secrets.sh +++ b/.evergreen/fetch-secrets.sh @@ -44,3 +44,5 @@ esac # Export the URI echo "MONGODB_URI=$MONGODB_URI" >> secrets-export.sh + +echo ${MONGODB_URI:0:10} diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index 7c88802..c6617fc 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -34,3 +34,4 @@ DATABASE=$DATABASE \ # Export the URI echo "MONGODB_URI=$CONN_STRING" >> secrets-export.sh +echo ${CONN_STRING:0:10} diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index eb95132..f85580d 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -2,12 +2,14 @@ # chat-gpt-retrieval-plugin is a poetry run project -set -x +set -eux # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 source $workdir/src/secrets-export.sh +echo ${MONGODB_URI:0:10} + # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh From ea81019b97998efd5b56289f24a763e90292402f Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 17:39:19 -0600 Subject: [PATCH 05/56] bug --- .evergreen/utils.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/utils.sh b/.evergreen/utils.sh index a14b063..5727ae1 100644 --- a/.evergreen/utils.sh +++ b/.evergreen/utils.sh @@ -17,7 +17,7 @@ find_python3() { elif [ -d "/Library/Frameworks/Python.Framework/Versions/3.7" ]; then PYTHON="/Library/Frameworks/Python.Framework/Versions/3.7/bin/python3" fi - elif [ "Windows_NT" = "$OS" ]; then # Magic variable in cygwin + elif [ "Windows_NT" = "${OS:-}" ]; then # Magic variable in cygwin PYTHON="C:/python/Python37/python.exe" else # Prefer our own toolchain, fall back to mongodb toolchain if it has Python 3.7+. From 2443a55ae1e03c3b71a555abe07823359c5f9c96 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 17:49:18 -0600 Subject: [PATCH 06/56] try again --- .evergreen/fetch-secrets.sh | 2 -- .evergreen/provision-atlas.sh | 1 - chatgpt-retrieval-plugin/run.sh | 4 ++-- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.evergreen/fetch-secrets.sh b/.evergreen/fetch-secrets.sh index 92e1b89..44426f4 100644 --- a/.evergreen/fetch-secrets.sh +++ b/.evergreen/fetch-secrets.sh @@ -44,5 +44,3 @@ esac # Export the URI echo "MONGODB_URI=$MONGODB_URI" >> secrets-export.sh - -echo ${MONGODB_URI:0:10} diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index c6617fc..7c88802 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -34,4 +34,3 @@ DATABASE=$DATABASE \ # Export the URI echo "MONGODB_URI=$CONN_STRING" >> secrets-export.sh -echo ${CONN_STRING:0:10} diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index f85580d..bb964eb 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -8,8 +8,6 @@ set -eux # shellcheck disable=SC2154 source $workdir/src/secrets-export.sh -echo ${MONGODB_URI:0:10} - # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh @@ -31,6 +29,8 @@ $PYTHON_BINARY -m poetry lock --no-update $PYTHON_BINARY -m poetry install --with dev # Run tests. +MONGODB_URI="$MONGODB_URI" \ +OPENAI_API_KEY="$OPENAI_API_KEY" \ DATASTORE="mongodb" \ BEARER_TOKEN="staylowandkeepmoving" \ MONGODB_DATABASE="chatgpt_retrieval_plugin_test_db" \ From f260636aa54761cc34c93fc569fe2e83a9e6149d Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 21:22:56 -0600 Subject: [PATCH 07/56] try again --- chatgpt-retrieval-plugin/run.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index bb964eb..58eff06 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -6,7 +6,7 @@ set -eux # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 -source $workdir/src/secrets-export.sh +. $workdir/src/secrets-export.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh @@ -29,8 +29,6 @@ $PYTHON_BINARY -m poetry lock --no-update $PYTHON_BINARY -m poetry install --with dev # Run tests. -MONGODB_URI="$MONGODB_URI" \ -OPENAI_API_KEY="$OPENAI_API_KEY" \ DATASTORE="mongodb" \ BEARER_TOKEN="staylowandkeepmoving" \ MONGODB_DATABASE="chatgpt_retrieval_plugin_test_db" \ From 749b416a618b14cf4f9f1c394e9ae3143cb737b9 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 21:32:28 -0600 Subject: [PATCH 08/56] try again --- chatgpt-retrieval-plugin/run.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index 58eff06..7be84d2 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -2,7 +2,7 @@ # chat-gpt-retrieval-plugin is a poetry run project -set -eux +set -eu # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 @@ -29,6 +29,8 @@ $PYTHON_BINARY -m poetry lock --no-update $PYTHON_BINARY -m poetry install --with dev # Run tests. +MONGODB_URI="$MONGODB_URI" \ +OPENAI_API_KEY="$OPENAI_API_KEY" \ DATASTORE="mongodb" \ BEARER_TOKEN="staylowandkeepmoving" \ MONGODB_DATABASE="chatgpt_retrieval_plugin_test_db" \ From 3bda392f568ee4f6d4d0e2ceae6c3ad70a954174 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 21:44:40 -0600 Subject: [PATCH 09/56] try again --- .evergreen/provision-atlas.sh | 2 +- docarray/run.sh | 8 ++++++-- langchain-python/run.sh | 13 +++++++------ llama-index-python-kvstore/run.sh | 6 ++++-- llama-index-python-vectorstore/run.sh | 6 ++++-- semantic-kernel-csharp/run.sh | 4 ++-- semantic-kernel-python/run.sh | 10 ++++++---- 7 files changed, 30 insertions(+), 19 deletions(-) diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index 7c88802..13d6990 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -eu . .evergreen/utils.sh @@ -9,7 +10,6 @@ EVERGREEN_PATH=$(pwd)/.evergreen TARGET_DIR=$(pwd)/$DIR SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py -set -ex mkdir atlas setup_local_atlas diff --git a/docarray/run.sh b/docarray/run.sh index 315adb4..f1a1cf9 100644 --- a/docarray/run.sh +++ b/docarray/run.sh @@ -3,7 +3,11 @@ # Sets up a virtual environment (poetry) # Runs the mongodb tests of the upstream repo -set -x +set -eu + +# Get the MONGODB_URI. +# shellcheck disable=SC2154 +. $workdir/src/secrets-export.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh @@ -23,6 +27,6 @@ poetry install --with dev --extras mongo # Run tests. Sensitive variables in Evergreen come from Evergeen project: ai-ml-pipeline-testing/ # shellcheck disable=SC2154 -MONGODB_URI=$docarray_mongodb_uri \ +MONGODB_URI="$MONGODB_URI" \ MONGODB_DATABASE="docarray_test_db" \ pytest -v tests/index/mongo_atlas diff --git a/langchain-python/run.sh b/langchain-python/run.sh index 568f4e4..9d3f52d 100644 --- a/langchain-python/run.sh +++ b/langchain-python/run.sh @@ -1,7 +1,11 @@ #!/bin/bash # WORKING_DIR = src/langchain-python/langchain -set -x +set -eu + +# Get the MONGODB_URI and OPENAI_API_KEY. +# shellcheck disable=SC2154 +. $workdir/src/secrets-export.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh @@ -20,11 +24,8 @@ poetry lock --no-update poetry install --with test --with test_integration -MONGODB_ATLAS_URI=$(fetch_local_atlas_uri) - -export MONGODB_ATLAS_URI -# shellcheck disable=SC2154 -export OPENAI_API_KEY=$openai_api_key +export MONGODB_ATLAS_URI=$MONGODB_ATLAS_URI +export OPENAI_API_KEY=$OPENAI_API_KEY make test diff --git a/llama-index-python-kvstore/run.sh b/llama-index-python-kvstore/run.sh index 0a73ce3..c680edf 100644 --- a/llama-index-python-kvstore/run.sh +++ b/llama-index-python-kvstore/run.sh @@ -1,10 +1,10 @@ #!/bin/sh -set -x +set -eu # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 -source $workdir/src/secrets-export.sh +. $workdir/src/secrets-export.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh @@ -34,6 +34,8 @@ $PYTHON_BINARY -m poetry lock --no-update $PYTHON_BINARY -m poetry install --with dev # Run tests. +MONGODB_URI="$MONGODB_URI" \ +OPENAI_API_KEY="$OPENAI_API_KEY" \ MONGODB_DATABASE="llama_index_test_db" \ MONGODB_COLLECTION="llama_index_test_kvstore" \ $PYTHON_BINARY -m poetry run pytest -v tests diff --git a/llama-index-python-vectorstore/run.sh b/llama-index-python-vectorstore/run.sh index b15a194..d9ee2e9 100644 --- a/llama-index-python-vectorstore/run.sh +++ b/llama-index-python-vectorstore/run.sh @@ -1,10 +1,10 @@ #!/bin/sh -set -x +set -eu # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 -source $workdir/src/secrets-export.sh +. $workdir/src/secrets-export.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh @@ -31,6 +31,8 @@ $PYTHON_BINARY -m poetry lock --no-update $PYTHON_BINARY -m poetry install --with dev # Run tests. +MONGODB_URI="$MONGODB_URI" \ +OPENAI_API_KEY="$OPENAI_API_KEY" \ MONGODB_DATABASE="llama_index_test_db" \ MONGODB_COLLECTION="llama_index_test_vectorstore" \ MONGODB_INDEX="vector_index" \ diff --git a/semantic-kernel-csharp/run.sh b/semantic-kernel-csharp/run.sh index 07da2fe..086c537 100644 --- a/semantic-kernel-csharp/run.sh +++ b/semantic-kernel-csharp/run.sh @@ -1,10 +1,10 @@ #!/bin/bash -set -x +set -eu # Get the MONGODB_URI. # shellcheck disable=SC2154 -source $workdir/src/secrets-export.sh +. $workdir/src/secrets-export.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh diff --git a/semantic-kernel-python/run.sh b/semantic-kernel-python/run.sh index 2125b14..38589cf 100644 --- a/semantic-kernel-python/run.sh +++ b/semantic-kernel-python/run.sh @@ -1,10 +1,10 @@ #!/bin/bash -set -x +set -eu # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 -source $workdir/src/secrets-export.sh +. $workdir/src/secrets-export.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh @@ -25,7 +25,8 @@ make install-sk make install-pre-commit # shellcheck disable=SC2154 -OPENAI_ORG_ID="" \ +OPENAI_API_KEY="$OPENAI_API_KEY" \ + OPENAI_ORG_ID="" \ AZURE_OPENAI_DEPLOYMENT_NAME="" \ AZURE_OPENAI_ENDPOINT="" \ AZURE_OPENAI_API_KEY="" \ @@ -34,7 +35,8 @@ OPENAI_ORG_ID="" \ uv run pytest tests/integration/memory/memory_stores/test_mongodb_atlas_memory_store.py -k test_collection_knn # shellcheck disable=SC2154 -OPENAI_ORG_ID="" \ +OPENAI_API_KEY="$OPENAI_API_KEY" \ + OPENAI_ORG_ID="" \ AZURE_OPENAI_DEPLOYMENT_NAME="" \ AZURE_OPENAI_ENDPOINT="" \ AZURE_OPENAI_API_KEY="" \ From 343401783117f4a31ee16bda3ce4798ad1988bf0 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 22:00:36 -0600 Subject: [PATCH 10/56] try again --- .evergreen/config.yml | 3 +-- langchain-python/run.sh | 2 +- llama-index-python-kvstore/run.sh | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 964b1c1..62a5141 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -228,8 +228,7 @@ buildvariants: expansions: DIR: llama-index-python-vectorstore REPO_NAME: llama_index - # TODO: Update INTPYTHON-414 - CLONE_URL: -b INTPYTHON-414 --single-branch https://github.com/blink1073/llama_index.git + CLONE_URL: https://github.com/blink1073/llama_index.git DATABASE: llama_index_test_db run_on: - rhel87-small diff --git a/langchain-python/run.sh b/langchain-python/run.sh index 9d3f52d..3bd4bbc 100644 --- a/langchain-python/run.sh +++ b/langchain-python/run.sh @@ -24,7 +24,7 @@ poetry lock --no-update poetry install --with test --with test_integration -export MONGODB_ATLAS_URI=$MONGODB_ATLAS_URI +export MONGODB_ATLAS_URI=$MONGODB_URI export OPENAI_API_KEY=$OPENAI_API_KEY make test diff --git a/llama-index-python-kvstore/run.sh b/llama-index-python-kvstore/run.sh index c680edf..a576c69 100644 --- a/llama-index-python-kvstore/run.sh +++ b/llama-index-python-kvstore/run.sh @@ -9,7 +9,6 @@ set -eu # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh -CONN_STRING=$(fetch_local_atlas_uri) PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" From af656e3cea1c8c37bf02ee07c029d8208234ce19 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 19 Nov 2024 22:14:46 -0600 Subject: [PATCH 11/56] fix secrets handling --- .evergreen/fetch-secrets.sh | 2 +- .evergreen/provision-atlas.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.evergreen/fetch-secrets.sh b/.evergreen/fetch-secrets.sh index 44426f4..a935b6e 100644 --- a/.evergreen/fetch-secrets.sh +++ b/.evergreen/fetch-secrets.sh @@ -43,4 +43,4 @@ case $DIR in esac # Export the URI -echo "MONGODB_URI=$MONGODB_URI" >> secrets-export.sh +echo "export MONGODB_URI=$MONGODB_URI" >> secrets-export.sh diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index 13d6990..ab07e56 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -33,4 +33,4 @@ DATABASE=$DATABASE \ $PYTHON_BINARY $SCAFFOLD_SCRIPT # Export the URI -echo "MONGODB_URI=$CONN_STRING" >> secrets-export.sh +echo "export MONGODB_URI=$CONN_STRING" >> secrets-export.sh From 9a2093c1bf995ce3237b6f3e60fdadad7ce41d76 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 06:19:55 -0600 Subject: [PATCH 12/56] fix secrets handling --- .evergreen/config.yml | 16 +++++++++++- .evergreen/fetch-secrets.sh | 37 --------------------------- .evergreen/provision-atlas.sh | 8 ++++-- .gitignore | 1 + chatgpt-retrieval-plugin/run.sh | 2 +- docarray/run.sh | 2 +- langchain-python/run.sh | 2 +- llama-index-python-kvstore/run.sh | 2 +- llama-index-python-vectorstore/run.sh | 2 +- semantic-kernel-csharp/run.sh | 2 +- semantic-kernel-python/run.sh | 2 +- 11 files changed, 29 insertions(+), 47 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 62a5141..b4e4d74 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -34,7 +34,6 @@ functions: params: working_dir: "src" binary: bash - include_expansions_in_env: [DIR] args: [.evergreen/fetch-secrets.sh] "fetch repo": @@ -77,6 +76,15 @@ functions: args: - .evergreen/provision-atlas.sh + "setup remote atlas": + - command: subprocess.exec + type: setup + params: + include_expansions_in_env: [DIR] + working_dir: "src" + binary: bash + args: [.evergreen/setup-remote.sh] + pre_error_fails_task: true pre: - func: "fetch source" @@ -92,6 +100,7 @@ tasks: - name: test-semantic-kernel-python-remote commands: - func: "fetch repo" + - func: "setup remote atlas" - func: "execute tests" - name: test-semantic-kernel-csharp-local @@ -103,6 +112,7 @@ tasks: - name: test-semantic-kernel-csharp-remote commands: - func: "fetch repo" + - func: "setup remote atlas" - func: "execute tests" - name: test-langchain-python-local @@ -114,6 +124,7 @@ tasks: - name: test-langchain-python-remote commands: - func: "fetch repo" + - func: "setup remote atlas" - func: "execute tests" - name: test-chatgpt-retrieval-plugin-local @@ -125,6 +136,7 @@ tasks: - name: test-chatgpt-retrieval-plugin-remote commands: - func: "fetch repo" + - func: "setup remote atlas" - func: "execute tests" - name: test-llama-index-local @@ -136,6 +148,7 @@ tasks: - name: test-llama-index-remote commands: - func: "fetch repo" + - func: "setup remote atlas" - func: "execute tests" # TODO: INTPYTHON-313 @@ -148,6 +161,7 @@ tasks: - name: test-docarray-remote commands: - func: "fetch repo" + - func: "setup remote atlas" - func: "execute tests" buildvariants: diff --git a/.evergreen/fetch-secrets.sh b/.evergreen/fetch-secrets.sh index a935b6e..2ca2980 100644 --- a/.evergreen/fetch-secrets.sh +++ b/.evergreen/fetch-secrets.sh @@ -2,45 +2,8 @@ set -eu -if [ -z "${DIR:-}" ]; then - echo "Must give a target dir!" - exit 1 -fi - # Clone drivers-evergeen-tools. git clone https://github.com/mongodb-labs/drivers-evergreen-tools # Get the secrets for drivers/ai-ml-pipeline-testing. . drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing - -# Get the correct remote URI. -case $DIR in - llama-index-python-kvstore) - MONGODB_URI=$LLAMA_INDEX_MONGODB_URI - ;; - semantic-kernel-python) - MONGODB_URI=$SEMANTIC_KERNEL_MONGODB_URI - ;; - semantic-kernel-csharp) - MONGODB_URI=$SEMANTIC_KERNEL_MONGODB_URI - ;; - langchain-python) - MONGODB_URI=$LANGCHAIN_MONGODB_URI - ;; - chatgpt-retrieval-plugin) - MONGODB_URI=$CHATGPT_RETRIEVAL_PLUGIN_MONGODB_URI - ;; - llama-index-python-vectorstore) - MONGODB_URI=$LLAMA_INDEX_MONGODB_URI - ;; - docarray) - MONGODB_URI=$DOCARRAY_MONGODB_URI - ;; - *) - echo "Missing config in fetch-secrets.sh for DIR: $DIR" - exit 1 - ;; -esac - -# Export the URI -echo "export MONGODB_URI=$MONGODB_URI" >> secrets-export.sh diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index ab07e56..a7b2699 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -32,5 +32,9 @@ DATABASE=$DATABASE \ TARGET_DIR=$TARGET_DIR \ $PYTHON_BINARY $SCAFFOLD_SCRIPT -# Export the URI -echo "export MONGODB_URI=$CONN_STRING" >> secrets-export.sh +# Export the URI and OPEN +source secrets-export.sh + +# Create the env file +echo "export OPENAI_API_KEY=$OPENAI_API_KEY" >> env.sh +echo "export MONGODB_URI=$CONN_STRING" >> env.sh diff --git a/.gitignore b/.gitignore index 59d832a..af6cdc2 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,4 @@ drivers-evergreen-tools # Secrets secrets-export.sh +env.sh diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index 7be84d2..9927cc5 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -6,7 +6,7 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 -. $workdir/src/secrets-export.sh +. $workdir/src/env.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh diff --git a/docarray/run.sh b/docarray/run.sh index f1a1cf9..3477b5a 100644 --- a/docarray/run.sh +++ b/docarray/run.sh @@ -7,7 +7,7 @@ set -eu # Get the MONGODB_URI. # shellcheck disable=SC2154 -. $workdir/src/secrets-export.sh +. $workdir/src/env.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh diff --git a/langchain-python/run.sh b/langchain-python/run.sh index 3bd4bbc..3bba90d 100644 --- a/langchain-python/run.sh +++ b/langchain-python/run.sh @@ -5,7 +5,7 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 -. $workdir/src/secrets-export.sh +. $workdir/src/env.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh diff --git a/llama-index-python-kvstore/run.sh b/llama-index-python-kvstore/run.sh index a576c69..ca5e2db 100644 --- a/llama-index-python-kvstore/run.sh +++ b/llama-index-python-kvstore/run.sh @@ -4,7 +4,7 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 -. $workdir/src/secrets-export.sh +. $workdir/src/env.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh diff --git a/llama-index-python-vectorstore/run.sh b/llama-index-python-vectorstore/run.sh index d9ee2e9..1a517eb 100644 --- a/llama-index-python-vectorstore/run.sh +++ b/llama-index-python-vectorstore/run.sh @@ -4,7 +4,7 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 -. $workdir/src/secrets-export.sh +. $workdir/src/env.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh diff --git a/semantic-kernel-csharp/run.sh b/semantic-kernel-csharp/run.sh index 086c537..c88b9a9 100644 --- a/semantic-kernel-csharp/run.sh +++ b/semantic-kernel-csharp/run.sh @@ -4,7 +4,7 @@ set -eu # Get the MONGODB_URI. # shellcheck disable=SC2154 -. $workdir/src/secrets-export.sh +. $workdir/src/env.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh diff --git a/semantic-kernel-python/run.sh b/semantic-kernel-python/run.sh index 38589cf..fa5a16e 100644 --- a/semantic-kernel-python/run.sh +++ b/semantic-kernel-python/run.sh @@ -4,7 +4,7 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 -. $workdir/src/secrets-export.sh +. $workdir/src/env.sh # shellcheck disable=SC2154 . $workdir/src/.evergreen/utils.sh From f50115ff953e68fb4f39492e1848222bacc5904b Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 06:23:10 -0600 Subject: [PATCH 13/56] add missing file --- .evergreen/setup-remote.sh | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .evergreen/setup-remote.sh diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh new file mode 100644 index 0000000..ec11609 --- /dev/null +++ b/.evergreen/setup-remote.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -eu + +source secrets-export.sh + +if [ -z "${DIR:-}" ]; then + echo "Must give a target dir!" + exit 1 +fi + +# Get the correct remote URI. +case $DIR in + llama-index-python-kvstore) + MONGODB_URI=$LLAMA_INDEX_MONGODB_URI + ;; + semantic-kernel-python) + MONGODB_URI=$SEMANTIC_KERNEL_MONGODB_URI + ;; + semantic-kernel-csharp) + MONGODB_URI=$SEMANTIC_KERNEL_MONGODB_URI + ;; + langchain-python) + MONGODB_URI=$LANGCHAIN_MONGODB_URI + ;; + chatgpt-retrieval-plugin) + MONGODB_URI=$CHATGPT_RETRIEVAL_PLUGIN_MONGODB_URI + ;; + llama-index-python-vectorstore) + MONGODB_URI=$LLAMA_INDEX_MONGODB_URI + ;; + docarray) + MONGODB_URI=$DOCARRAY_MONGODB_URI + ;; + *) + echo "Missing config in fetch-secrets.sh for DIR: $DIR" + exit 1 + ;; +esac + +# Create the env file +echo "export OPENAI_API_KEY=$OPENAI_API_KEY" >> env.sh +echo "export MONGODB_URI=$MONGODB_URI" >> env.sh From 2b69ba3a8c756dfc1462eaded7c084d8cc9541fc Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 06:24:05 -0600 Subject: [PATCH 14/56] fix llama_index --- .evergreen/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index b4e4d74..3eecd4a 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -242,7 +242,7 @@ buildvariants: expansions: DIR: llama-index-python-vectorstore REPO_NAME: llama_index - CLONE_URL: https://github.com/blink1073/llama_index.git + CLONE_URL: https://github.com/run-llama/llama_index.git DATABASE: llama_index_test_db run_on: - rhel87-small From 7ae343ece662a1e1447092f83c9972acdf53c56e Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 06:34:18 -0600 Subject: [PATCH 15/56] fix secrets handling --- .evergreen/provision-atlas.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index a7b2699..398de8f 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -14,10 +14,11 @@ mkdir atlas setup_local_atlas -cd atlas +pushd atlas $PYTHON_BINARY -m venv . source ./bin/activate +popd # Test server is up $PYTHON_BINARY -m pip install pymongo @@ -32,7 +33,7 @@ DATABASE=$DATABASE \ TARGET_DIR=$TARGET_DIR \ $PYTHON_BINARY $SCAFFOLD_SCRIPT -# Export the URI and OPEN +# Get the secrets. source secrets-export.sh # Create the env file From 06a91d1b9ca2f0357407edeab49ab5a83d125aef Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 08:26:54 -0600 Subject: [PATCH 16/56] try old remote urls --- .evergreen/config.yml | 3 ++- .evergreen/setup-remote.sh | 14 +++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 3eecd4a..b0a4bfb 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -80,7 +80,8 @@ functions: - command: subprocess.exec type: setup params: - include_expansions_in_env: [DIR] + #include_expansions_in_env: [DIR] + add_expansions_to_env: true working_dir: "src" binary: bash args: [.evergreen/setup-remote.sh] diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index ec11609..c7744ed 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -11,25 +11,25 @@ fi # Get the correct remote URI. case $DIR in llama-index-python-kvstore) - MONGODB_URI=$LLAMA_INDEX_MONGODB_URI + MONGODB_URI=$llama_index_mongodb_uri ;; semantic-kernel-python) - MONGODB_URI=$SEMANTIC_KERNEL_MONGODB_URI + MONGODB_URI=$semantic_kernel_mongodb_uri ;; semantic-kernel-csharp) - MONGODB_URI=$SEMANTIC_KERNEL_MONGODB_URI + MONGODB_URI=$semantic_kernel_mongodb_uri ;; langchain-python) - MONGODB_URI=$LANGCHAIN_MONGODB_URI + MONGODB_URI=$semantic_kernel_mongodb_uri ;; chatgpt-retrieval-plugin) - MONGODB_URI=$CHATGPT_RETRIEVAL_PLUGIN_MONGODB_URI + MONGODB_URI=$semantic_kernel_mongodb_uri ;; llama-index-python-vectorstore) - MONGODB_URI=$LLAMA_INDEX_MONGODB_URI + MONGODB_URI=$llama_index_mongodb_uri ;; docarray) - MONGODB_URI=$DOCARRAY_MONGODB_URI + MONGODB_URI=$semantic_kernel_mongodb_uri ;; *) echo "Missing config in fetch-secrets.sh for DIR: $DIR" From 19da278cab8f386e6f5919f44a704e7413726cc6 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 09:19:40 -0600 Subject: [PATCH 17/56] try old remote urls --- .evergreen/setup-remote.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index c7744ed..160b179 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -11,25 +11,25 @@ fi # Get the correct remote URI. case $DIR in llama-index-python-kvstore) - MONGODB_URI=$llama_index_mongodb_uri + MONGODB_URI=$DOCARRAY_MONGODB_URI ;; semantic-kernel-python) - MONGODB_URI=$semantic_kernel_mongodb_uri + MONGODB_URI=$DOCARRAY_MONGODB_URI ;; semantic-kernel-csharp) - MONGODB_URI=$semantic_kernel_mongodb_uri + MONGODB_URI=$DOCARRAY_MONGODB_URI ;; langchain-python) - MONGODB_URI=$semantic_kernel_mongodb_uri + MONGODB_URI=$DOCARRAY_MONGODB_URI ;; chatgpt-retrieval-plugin) - MONGODB_URI=$semantic_kernel_mongodb_uri + MONGODB_URI=$DOCARRAY_MONGODB_URI ;; llama-index-python-vectorstore) - MONGODB_URI=$llama_index_mongodb_uri + MONGODB_URI=$DOCARRAY_MONGODB_URI ;; docarray) - MONGODB_URI=$semantic_kernel_mongodb_uri + MONGODB_URI=$DOCARRAY_MONGODB_URI ;; *) echo "Missing config in fetch-secrets.sh for DIR: $DIR" From 0cd1e0c9e4b891be02a07f3684de9e9a601c0a98 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 12:01:30 -0600 Subject: [PATCH 18/56] try old remote urls --- .evergreen/setup-remote.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index 160b179..4d7953b 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -1,7 +1,7 @@ #!/bin/bash set -eu -source secrets-export.sh +# source secrets-export.sh if [ -z "${DIR:-}" ]; then echo "Must give a target dir!" @@ -11,25 +11,25 @@ fi # Get the correct remote URI. case $DIR in llama-index-python-kvstore) - MONGODB_URI=$DOCARRAY_MONGODB_URI + MONGODB_URI=$llama_index_mongodb_uri ;; semantic-kernel-python) - MONGODB_URI=$DOCARRAY_MONGODB_URI + MONGODB_URI=$semantic_kernel_mongodb_uri ;; semantic-kernel-csharp) - MONGODB_URI=$DOCARRAY_MONGODB_URI + MONGODB_URI=$semantic_kernel_mongodb_uri ;; langchain-python) - MONGODB_URI=$DOCARRAY_MONGODB_URI + MONGODB_URI=$docarray_mongodb_uri ;; chatgpt-retrieval-plugin) - MONGODB_URI=$DOCARRAY_MONGODB_URI + MONGODB_URI=$chatgpt_retrieval_plugin_mongodb_uri ;; llama-index-python-vectorstore) - MONGODB_URI=$DOCARRAY_MONGODB_URI + MONGODB_URI=$llama_index_mongodb_uri ;; docarray) - MONGODB_URI=$DOCARRAY_MONGODB_URI + MONGODB_URI=$docarray_mongodb_uri ;; *) echo "Missing config in fetch-secrets.sh for DIR: $DIR" @@ -38,5 +38,5 @@ case $DIR in esac # Create the env file -echo "export OPENAI_API_KEY=$OPENAI_API_KEY" >> env.sh +echo "export OPENAI_API_KEY=$openai_api_key" >> env.sh echo "export MONGODB_URI=$MONGODB_URI" >> env.sh From c4dcdd30431b9b83e9122c90d1c93762dfc56a98 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 12:12:59 -0600 Subject: [PATCH 19/56] debug --- .evergreen/setup-remote.sh | 3 +++ .evergreen/utils.sh | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index 4d7953b..965674b 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -40,3 +40,6 @@ esac # Create the env file echo "export OPENAI_API_KEY=$openai_api_key" >> env.sh echo "export MONGODB_URI=$MONGODB_URI" >> env.sh + +echo "set MONGODB_URI=$MONGODB_URI" +exit 1 diff --git a/.evergreen/utils.sh b/.evergreen/utils.sh index 5727ae1..c82683a 100644 --- a/.evergreen/utils.sh +++ b/.evergreen/utils.sh @@ -1,6 +1,6 @@ -#!/bin/bash -ex +#!/bin/bash -set -o xtrace +set -eu find_python3() { PYTHON="" From efba00eb61bd711cf3f649e0ebc11150a278adcc Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 12:16:35 -0600 Subject: [PATCH 20/56] try again with new cluster --- .evergreen/setup-remote.sh | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index 965674b..6385a4d 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -11,25 +11,25 @@ fi # Get the correct remote URI. case $DIR in llama-index-python-kvstore) - MONGODB_URI=$llama_index_mongodb_uri + MONGODB_URI=$LLAMA_INDEX_MONGODB_URI ;; semantic-kernel-python) - MONGODB_URI=$semantic_kernel_mongodb_uri + MONGODB_URI=$SEMANTIC_KERNEL_MONGODB_URI ;; semantic-kernel-csharp) - MONGODB_URI=$semantic_kernel_mongodb_uri + MONGODB_URI=$SEMANTIC_KERNEL_MONGODB_URI ;; langchain-python) - MONGODB_URI=$docarray_mongodb_uri + MONGODB_URI=$LANGCHAIN_MONGODB_URI ;; chatgpt-retrieval-plugin) - MONGODB_URI=$chatgpt_retrieval_plugin_mongodb_uri + MONGODB_URI=$CHATGPT_RETRIEVAL_PLUGIN_MONGODB_URI ;; llama-index-python-vectorstore) - MONGODB_URI=$llama_index_mongodb_uri + MONGODB_URI=$LLAMA_INDEX_MONGODB_URI ;; docarray) - MONGODB_URI=$docarray_mongodb_uri + MONGODB_URI=$DOCARRAY_MONGODB_URI ;; *) echo "Missing config in fetch-secrets.sh for DIR: $DIR" @@ -40,6 +40,3 @@ esac # Create the env file echo "export OPENAI_API_KEY=$openai_api_key" >> env.sh echo "export MONGODB_URI=$MONGODB_URI" >> env.sh - -echo "set MONGODB_URI=$MONGODB_URI" -exit 1 From ab11405148c3fd350e253dc3ba7a7f645d68a6e4 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 12:52:35 -0600 Subject: [PATCH 21/56] set up cluster at startup --- .evergreen/scaffold_atlas.py | 1 + .evergreen/setup-remote.sh | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 6e66250..4f06422 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -122,6 +122,7 @@ def generate_indexes(client: MongoClient, index_jsons: list[Path]) -> None: def main() -> None: client = MongoClient(CONN_STRING) database = client[DATABASE_NAME] + client.drop_database(DATABASE_NAME) collection_jsons = walk_directory(DB_PATH) index_jsons = walk_directory(INDEX_PATH) generate_collections(database, collection_jsons) diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index 6385a4d..fa228ec 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -1,7 +1,7 @@ #!/bin/bash set -eu -# source secrets-export.sh +source secrets-export.sh if [ -z "${DIR:-}" ]; then echo "Must give a target dir!" @@ -36,7 +36,39 @@ case $DIR in exit 1 ;; esac +export MONGODB_URI # Create the env file echo "export OPENAI_API_KEY=$openai_api_key" >> env.sh echo "export MONGODB_URI=$MONGODB_URI" >> env.sh + + +. .evergreen/utils.sh + +PYTHON_BINARY=$(find_python3) + +# Should be called from src +EVERGREEN_PATH=$(pwd)/.evergreen +TARGET_DIR=$(pwd)/$DIR +SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py + +mkdir atlas + +pushd atlas + +$PYTHON_BINARY -m venv . +source ./bin/activate +popd + +# Test server is up +$PYTHON_BINARY -m pip install pymongo +CONN_STRING=$CONN_STRING \ + $PYTHON_BINARY -c "from pymongo import MongoClient; import os; MongoClient(os.environ['MONGODB_URI']).db.command('ping')" + +# Add database and index configurations +DATABASE=$DATABASE \ + CONN_STRING=$MONGODB_URI \ + REPO_NAME=$REPO_NAME \ + DIR=$DIR \ + TARGET_DIR=$TARGET_DIR \ + $PYTHON_BINARY $SCAFFOLD_SCRIPT From ca14f1670d2331ffa3b2dad6c73374673412ecb9 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 13:03:58 -0600 Subject: [PATCH 22/56] fix remote setup --- .evergreen/setup-remote.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index fa228ec..a14108f 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -62,7 +62,7 @@ popd # Test server is up $PYTHON_BINARY -m pip install pymongo -CONN_STRING=$CONN_STRING \ +CONN_STRING=$MONGODB_URI \ $PYTHON_BINARY -c "from pymongo import MongoClient; import os; MongoClient(os.environ['MONGODB_URI']).db.command('ping')" # Add database and index configurations From c356168feb304924b010ba89b52f257c6c387539 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 13:29:38 -0600 Subject: [PATCH 23/56] fix remote setup --- .evergreen/scaffold_atlas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 4f06422..0e32853 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -41,6 +41,7 @@ def upload_data(db: Database, filename: Path) -> None: db.name, collection_name, ) + db.drop_collection(collection_name) if not isinstance(loaded_collection, list): loaded_collection = [loaded_collection] if loaded_collection: @@ -122,7 +123,6 @@ def generate_indexes(client: MongoClient, index_jsons: list[Path]) -> None: def main() -> None: client = MongoClient(CONN_STRING) database = client[DATABASE_NAME] - client.drop_database(DATABASE_NAME) collection_jsons = walk_directory(DB_PATH) index_jsons = walk_directory(INDEX_PATH) generate_collections(database, collection_jsons) From ff80a59861bbc12ba62f005e6c96f6666a802d97 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Nov 2024 14:58:22 -0600 Subject: [PATCH 24/56] fix remote setup --- .evergreen/scaffold_atlas.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 0e32853..28f6012 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -72,7 +72,11 @@ def create_index(client: MongoClient, filename: Path) -> None: search_index = SearchIndexModel( loaded_index_configuration, name=index_name, type=index_type ) - collection.create_search_index(search_index) + indexes = [index["name"] for index in collection.list_search_indexes()] + if index_name in indexes: + collection.update_search_index(index_name, loaded_index_configuration) + else: + collection.create_search_index(search_index) def walk_directory(filepath) -> list[str]: From 11eef1c19ed212592fd9cb7a61f4a32932359049 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 05:42:25 -0600 Subject: [PATCH 25/56] patch min llama-index-core version --- .../patches/pin-llama-index-core.patch | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 llama-index-python-vectorstore/patches/pin-llama-index-core.patch diff --git a/llama-index-python-vectorstore/patches/pin-llama-index-core.patch b/llama-index-python-vectorstore/patches/pin-llama-index-core.patch new file mode 100644 index 0000000..75bd176 --- /dev/null +++ b/llama-index-python-vectorstore/patches/pin-llama-index-core.patch @@ -0,0 +1,12 @@ +diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/pyproject.toml +index ba22a585c..f7b6c5647 100644 +--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/pyproject.toml ++++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/pyproject.toml +@@ -41,6 +41,7 @@ jupyter = "^1.0.0" + llama-index-embeddings-openai = "^0.3.0" + llama-index-llms-openai = "^0.3.0" + llama-index-readers-file = "^0.4.0" ++llama-index-core = "^0.12.1" + mypy = "0.991" + pre-commit = "3.2.0" + pylint = "2.15.10" From 688f5ca1789d6a9cc15310e09b0980a6c6ad4915 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 05:59:09 -0600 Subject: [PATCH 26/56] see if all is working --- .evergreen/config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index b0a4bfb..b7ffbd8 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -193,8 +193,8 @@ buildvariants: - rhel87-small tasks: - name: test-semantic-kernel-python-local - - name: test-semantic-kernel-python-remote - batchtime: 10080 # 1 week + # - name: test-semantic-kernel-python-remote + # batchtime: 10080 # 1 week - name: test-semantic-kernel-csharp-rhel display_name: Semantic-Kernel RHEL CSharp @@ -207,8 +207,8 @@ buildvariants: - rhel87-small tasks: - name: test-semantic-kernel-csharp-local - - name: test-semantic-kernel-csharp-remote - batchtime: 10080 # 1 week + # - name: test-semantic-kernel-csharp-remote + # batchtime: 10080 # 1 week - name: test-langchain-python-rhel display_name: Langchain RHEL Python From 8e2e257f6c8187477e5f40feb7d6f6f1db5ed31f Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 09:23:48 -0600 Subject: [PATCH 27/56] add waits --- .evergreen/config.yml | 12 ++++----- .evergreen/scaffold_atlas.py | 51 +++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index b7ffbd8..c286811 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -80,7 +80,6 @@ functions: - command: subprocess.exec type: setup params: - #include_expansions_in_env: [DIR] add_expansions_to_env: true working_dir: "src" binary: bash @@ -152,12 +151,11 @@ tasks: - func: "setup remote atlas" - func: "execute tests" - # TODO: INTPYTHON-313 - # - name: test-docarray-local - # commands: - # - func: "fetch repo" - # - func: "setup local atlas" - # - func: "execute tests" + - name: test-docarray-local + commands: + - func: "fetch repo" + - func: "setup local atlas" + - func: "execute tests" - name: test-docarray-remote commands: diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 28f6012..2b967dc 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -4,9 +4,11 @@ import logging import os from pathlib import Path -from typing import Any, Union +from time import sleep, monotonic +from typing import Any, Callable, Union from pymongo import MongoClient +from pymongo.collection import Collection from pymongo.database import Database from pymongo.operations import SearchIndexModel from pymongo.results import InsertManyResult @@ -78,6 +80,53 @@ def create_index(client: MongoClient, filename: Path) -> None: else: collection.create_search_index(search_index) + _wait_for_predicate( + predicate=lambda: _is_index_ready(collection, index_name), + err=f"{index_name=} did not complete in {10}!", + timeout=10, + ) + + +def _is_index_ready(collection: Collection, index_name: str) -> bool: + """Check for the index name in the list of available search indexes. + + This confirms that the specified index is of status READY. + + Args: + collection (Collection): MongoDB Collection to for the search indexes + index_name (str): Vector Search Index name + + Returns: + bool : True if the index is present and READY false otherwise + """ + search_indexes = collection.list_search_indexes(index_name) + + for index in search_indexes: + if index["status"] == "READY": + return True + return False + + +def _wait_for_predicate( + predicate: Callable, err: str, timeout: float = 120, interval: float = 0.5 +) -> None: + """Generic to block until the predicate returns true. + + Args: + predicate (Callable[, bool]): A function that returns a boolean value + err (str): Error message to raise if nothing occurs + timeout (float, optional): Wait time for predicate. Defaults to TIMEOUT. + interval (float, optional): Interval to check predicate. Defaults to DELAY. + + Raises: + TimeoutError: _description_ + """ + start = monotonic() + while not predicate(): + if monotonic() - start > timeout: + raise TimeoutError(err) + sleep(interval) + def walk_directory(filepath) -> list[str]: """Return all *.json filenames in the DB_PATH directory""" From 04bef34e3102a62a4c82883bee285c609a3941b3 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 09:26:18 -0600 Subject: [PATCH 28/56] try 20sec --- .evergreen/scaffold_atlas.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 2b967dc..0465382 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -80,10 +80,11 @@ def create_index(client: MongoClient, filename: Path) -> None: else: collection.create_search_index(search_index) + timeout = 20 _wait_for_predicate( predicate=lambda: _is_index_ready(collection, index_name), - err=f"{index_name=} did not complete in {10}!", - timeout=10, + err=f"{index_name=} did not complete in {timeout}!", + timeout=timeout, ) From 77969cfdb67d9245c2178996a4dde057ff1b59d9 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 09:37:43 -0600 Subject: [PATCH 29/56] only create if it does not exist --- .evergreen/scaffold_atlas.py | 56 ++---------------------------------- 1 file changed, 2 insertions(+), 54 deletions(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 0465382..d7dd6df 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -4,11 +4,9 @@ import logging import os from pathlib import Path -from time import sleep, monotonic -from typing import Any, Callable, Union +from typing import Any, Union from pymongo import MongoClient -from pymongo.collection import Collection from pymongo.database import Database from pymongo.operations import SearchIndexModel from pymongo.results import InsertManyResult @@ -75,59 +73,9 @@ def create_index(client: MongoClient, filename: Path) -> None: loaded_index_configuration, name=index_name, type=index_type ) indexes = [index["name"] for index in collection.list_search_indexes()] - if index_name in indexes: - collection.update_search_index(index_name, loaded_index_configuration) - else: + if index_name not in indexes: collection.create_search_index(search_index) - timeout = 20 - _wait_for_predicate( - predicate=lambda: _is_index_ready(collection, index_name), - err=f"{index_name=} did not complete in {timeout}!", - timeout=timeout, - ) - - -def _is_index_ready(collection: Collection, index_name: str) -> bool: - """Check for the index name in the list of available search indexes. - - This confirms that the specified index is of status READY. - - Args: - collection (Collection): MongoDB Collection to for the search indexes - index_name (str): Vector Search Index name - - Returns: - bool : True if the index is present and READY false otherwise - """ - search_indexes = collection.list_search_indexes(index_name) - - for index in search_indexes: - if index["status"] == "READY": - return True - return False - - -def _wait_for_predicate( - predicate: Callable, err: str, timeout: float = 120, interval: float = 0.5 -) -> None: - """Generic to block until the predicate returns true. - - Args: - predicate (Callable[, bool]): A function that returns a boolean value - err (str): Error message to raise if nothing occurs - timeout (float, optional): Wait time for predicate. Defaults to TIMEOUT. - interval (float, optional): Interval to check predicate. Defaults to DELAY. - - Raises: - TimeoutError: _description_ - """ - start = monotonic() - while not predicate(): - if monotonic() - start > timeout: - raise TimeoutError(err) - sleep(interval) - def walk_directory(filepath) -> list[str]: """Return all *.json filenames in the DB_PATH directory""" From e9ada753378554d46e8e37632c269c2abfa5c70f Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 11:05:03 -0600 Subject: [PATCH 30/56] add debugging --- .evergreen/provision-atlas.sh | 1 + .evergreen/scaffold_atlas.py | 72 ++++++++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index 398de8f..6f2f0c3 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -30,6 +30,7 @@ DATABASE=$DATABASE \ CONN_STRING=$CONN_STRING \ REPO_NAME=$REPO_NAME \ DIR=$DIR \ + DEBUG="${DEBUG:-1}" \ TARGET_DIR=$TARGET_DIR \ $PYTHON_BINARY $SCAFFOLD_SCRIPT diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index d7dd6df..e831123 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -4,7 +4,8 @@ import logging import os from pathlib import Path -from typing import Any, Union +from time import sleep, monotonic +from typing import Any, Callable, Union from pymongo import MongoClient from pymongo.database import Database @@ -67,6 +68,13 @@ def create_index(client: MongoClient, filename: Path) -> None: index_name = loaded_index_configuration.pop("name") index_type = loaded_index_configuration.pop("type", None) + logger.debug( + "creating search index: %s on %s.%s...", + index_name, + collection_name, + database_name, + ) + collection = client[database_name][collection_name] search_index = SearchIndexModel( @@ -75,6 +83,68 @@ def create_index(client: MongoClient, filename: Path) -> None: indexes = [index["name"] for index in collection.list_search_indexes()] if index_name not in indexes: collection.create_search_index(search_index) + logger.debug("waiting for search index to be queryable...") + wait_until_complete = 10 + _wait_for_predicate( + predicate=lambda: _is_index_ready(collection, index_name), + err=f"Index {index_name} update did not complete in {wait_until_complete}!", + timeout=wait_until_complete, + ) + logger.debug("waiting for search index to be queryable... done.") + else: + logger.debug( + "search index already exists!: %s on %s.%s", + index_name, + collection_name, + database_name, + ) + logger.debug( + "creating search: %s on %s.%s... done", + index_name, + collection_name, + database_name, + ) + + +def _is_index_ready(collection: Any, index_name: str) -> bool: + """Check for the index name in the list of available search indexes. + + This confirms that the specified index is of status READY. + + Args: + collection (Collection): MongoDB Collection to for the search indexes + index_name (str): Vector Search Index name + + Returns: + bool : True if the index is present and READY false otherwise + """ + search_indexes = collection.list_search_indexes(index_name) + + for index in search_indexes: + if index["status"] == "READY": + return True + return False + + +def _wait_for_predicate( + predicate: Callable, err: str, timeout: float = 120, interval: float = 0.5 +) -> None: + """Generic to block until the predicate returns true. + + Args: + predicate (Callable[, bool]): A function that returns a boolean value + err (str): Error message to raise if nothing occurs + timeout (float, optional): Wait time for predicate. Defaults to TIMEOUT. + interval (float, optional): Interval to check predicate. Defaults to DELAY. + + Raises: + TimeoutError: _description_ + """ + start = monotonic() + while not predicate(): + if monotonic() - start > timeout: + raise TimeoutError(err) + sleep(interval) def walk_directory(filepath) -> list[str]: From 019be0600a6da15114276067182629f72db046f9 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 13:00:05 -0600 Subject: [PATCH 31/56] force debug logging --- .evergreen/scaffold_atlas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index e831123..678409a 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -14,7 +14,7 @@ logging.basicConfig() logger = logging.getLogger(__file__) -logger.setLevel(logging.DEBUG if os.environ.get("DEBUG") else logging.INFO) +logger.setLevel(logging.DEBUG) DATABASE_NAME = os.environ.get("DATABASE") CONN_STRING = os.environ.get("CONN_STRING") From ee2fc6c3da87dde9d2a8fe6606712b483444d272 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 13:00:25 -0600 Subject: [PATCH 32/56] run all variants --- .evergreen/config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index c286811..b98a68b 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -191,8 +191,8 @@ buildvariants: - rhel87-small tasks: - name: test-semantic-kernel-python-local - # - name: test-semantic-kernel-python-remote - # batchtime: 10080 # 1 week + - name: test-semantic-kernel-python-remote + batchtime: 10080 # 1 week - name: test-semantic-kernel-csharp-rhel display_name: Semantic-Kernel RHEL CSharp @@ -205,8 +205,8 @@ buildvariants: - rhel87-small tasks: - name: test-semantic-kernel-csharp-local - # - name: test-semantic-kernel-csharp-remote - # batchtime: 10080 # 1 week + - name: test-semantic-kernel-csharp-remote + batchtime: 10080 # 1 week - name: test-langchain-python-rhel display_name: Langchain RHEL Python From 94a7700df01ef44b93384b5b72f6977da579a487 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 13:26:48 -0600 Subject: [PATCH 33/56] increase timeout --- .evergreen/scaffold_atlas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 678409a..c4b3006 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -84,7 +84,7 @@ def create_index(client: MongoClient, filename: Path) -> None: if index_name not in indexes: collection.create_search_index(search_index) logger.debug("waiting for search index to be queryable...") - wait_until_complete = 10 + wait_until_complete = 60 _wait_for_predicate( predicate=lambda: _is_index_ready(collection, index_name), err=f"Index {index_name} update did not complete in {wait_until_complete}!", @@ -99,7 +99,7 @@ def create_index(client: MongoClient, filename: Path) -> None: database_name, ) logger.debug( - "creating search: %s on %s.%s... done", + "creating search index: %s on %s.%s... done", index_name, collection_name, database_name, From 1034e6867c70f82d037d211b82035274a653f519 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 14:03:25 -0600 Subject: [PATCH 34/56] debug chatgpt --- .evergreen/scaffold_atlas.py | 6 +++--- chatgpt-retrieval-plugin/run.sh | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index c4b3006..2998f78 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -71,8 +71,8 @@ def create_index(client: MongoClient, filename: Path) -> None: logger.debug( "creating search index: %s on %s.%s...", index_name, - collection_name, database_name, + collection_name, ) collection = client[database_name][collection_name] @@ -95,14 +95,14 @@ def create_index(client: MongoClient, filename: Path) -> None: logger.debug( "search index already exists!: %s on %s.%s", index_name, - collection_name, database_name, + collection_name, ) logger.debug( "creating search index: %s on %s.%s... done", index_name, - collection_name, database_name, + collection_name, ) diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index 9927cc5..fa3fc74 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -4,6 +4,8 @@ set -eu +exit 1 + # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 . $workdir/src/env.sh From 4eb03bafad2026d2c954dfd360321c2190d75e23 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 14:10:34 -0600 Subject: [PATCH 35/56] debug chatgpt --- chatgpt-retrieval-plugin/run.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index fa3fc74..9927cc5 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -4,8 +4,6 @@ set -eu -exit 1 - # Get the MONGODB_URI and OPENAI_API_KEY. # shellcheck disable=SC2154 . $workdir/src/env.sh From 42dcbd2f5f4ae3eed814f5ee87368af2e59748b4 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 20:45:10 -0600 Subject: [PATCH 36/56] fix for python msk --- .../indexes/nearestSearch_default.json | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/semantic-kernel-python/indexes/nearestSearch_default.json b/semantic-kernel-python/indexes/nearestSearch_default.json index 30a6347..53cc9f6 100644 --- a/semantic-kernel-python/indexes/nearestSearch_default.json +++ b/semantic-kernel-python/indexes/nearestSearch_default.json @@ -1,15 +1,14 @@ { + "fields": [ + { + "numDimensions": 3, + "path": "embedding", + "similarity": "cosine", + "type": "vector" + } + ], + "name": "default", + "type": "vectorSearch", "collectionName": "nearestSearch", "database": "pyMSKTest", - "mappings": { - "dynamic": true, - "fields": { - "embedding": { - "dimensions": 3, - "similarity": "cosine", - "type": "knnVector" - } - } - }, - "name": "default" } \ No newline at end of file From 09f265ff44849e08f9ee0c5eb10b958c7c1f6a59 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 20:48:23 -0600 Subject: [PATCH 37/56] update msk --- .../indexes/nearestSearch_default.json | 14 +++++--------- .../indexes/nearestSearch_default.json | 2 +- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/semantic-kernel-csharp/indexes/nearestSearch_default.json b/semantic-kernel-csharp/indexes/nearestSearch_default.json index f0ca670..210d23c 100644 --- a/semantic-kernel-csharp/indexes/nearestSearch_default.json +++ b/semantic-kernel-csharp/indexes/nearestSearch_default.json @@ -1,15 +1,11 @@ { "collectionName": "nearestSearch", "database": "dotnetMSKNearestTest", - "mappings": { - "dynamic": true, - "fields": { - "embedding": { - "dimensions": 3, + "fields": [{ + "path": "embedding", + "numDimensions": 3, "similarity": "cosine", - "type": "knnVector" - } - } - }, + "type": "vector" + }], "name": "default" } \ No newline at end of file diff --git a/semantic-kernel-python/indexes/nearestSearch_default.json b/semantic-kernel-python/indexes/nearestSearch_default.json index 53cc9f6..096e153 100644 --- a/semantic-kernel-python/indexes/nearestSearch_default.json +++ b/semantic-kernel-python/indexes/nearestSearch_default.json @@ -10,5 +10,5 @@ "name": "default", "type": "vectorSearch", "collectionName": "nearestSearch", - "database": "pyMSKTest", + "database": "pyMSKTest" } \ No newline at end of file From eaf685f97444e350da3f5c58e842c4ae9d2aec70 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 20:50:08 -0600 Subject: [PATCH 38/56] update msk --- semantic-kernel-csharp/indexes/nearestSearch_default.json | 1 + 1 file changed, 1 insertion(+) diff --git a/semantic-kernel-csharp/indexes/nearestSearch_default.json b/semantic-kernel-csharp/indexes/nearestSearch_default.json index 210d23c..aa932ff 100644 --- a/semantic-kernel-csharp/indexes/nearestSearch_default.json +++ b/semantic-kernel-csharp/indexes/nearestSearch_default.json @@ -1,6 +1,7 @@ { "collectionName": "nearestSearch", "database": "dotnetMSKNearestTest", + "type": "vectorSearch", "fields": [{ "path": "embedding", "numDimensions": 3, From b75ffbe591f71f81ae9fc270976af344938ad3d4 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 21:38:27 -0600 Subject: [PATCH 39/56] update docarray --- .evergreen/config.yml | 4 +--- .evergreen/scaffold_atlas.py | 4 ++-- docarray/indexes/vector_index_mydoc__list_docs__docs.json | 2 -- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index b98a68b..62b0391 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -260,7 +260,5 @@ buildvariants: run_on: - rhel87-small tasks: - # TODO: INTPYTHON-313 - # - name: test-docarray-local + - name: test-docarray-local - name: test-docarray-remote - # batchtime: 10080 # 1 week diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 2998f78..259d8cb 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -83,14 +83,14 @@ def create_index(client: MongoClient, filename: Path) -> None: indexes = [index["name"] for index in collection.list_search_indexes()] if index_name not in indexes: collection.create_search_index(search_index) - logger.debug("waiting for search index to be queryable...") + logger.debug("waiting for search index to be ready...") wait_until_complete = 60 _wait_for_predicate( predicate=lambda: _is_index_ready(collection, index_name), err=f"Index {index_name} update did not complete in {wait_until_complete}!", timeout=wait_until_complete, ) - logger.debug("waiting for search index to be queryable... done.") + logger.debug("waiting for search index to be ready... done.") else: logger.debug( "search index already exists!: %s on %s.%s", diff --git a/docarray/indexes/vector_index_mydoc__list_docs__docs.json b/docarray/indexes/vector_index_mydoc__list_docs__docs.json index a4f986d..6e0fdf4 100644 --- a/docarray/indexes/vector_index_mydoc__list_docs__docs.json +++ b/docarray/indexes/vector_index_mydoc__list_docs__docs.json @@ -12,5 +12,3 @@ "database": "docarray_test_db", "collectionName": "mydoc__list_docs__docs" } - - From 29b2493097e85231fb9ac74acd38633f24b6c945 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 21:38:37 -0600 Subject: [PATCH 40/56] update docarray --- docarray/indexes/text_index_bespoke_name.json | 16 +++++++++++++ .../indexes/vector_index_bespoke_name.json | 23 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 docarray/indexes/text_index_bespoke_name.json create mode 100644 docarray/indexes/vector_index_bespoke_name.json diff --git a/docarray/indexes/text_index_bespoke_name.json b/docarray/indexes/text_index_bespoke_name.json new file mode 100644 index 0000000..b65e8c2 --- /dev/null +++ b/docarray/indexes/text_index_bespoke_name.json @@ -0,0 +1,16 @@ + { + "mappings": { + "dynamic": false, + "fields": { + "text": [ + { + "type": "string" + } + ] + } + }, + "name": "text_index", + "type": "search", + "database": "docarray_test_db", + "collectionName": "bespoke_name" + } \ No newline at end of file diff --git a/docarray/indexes/vector_index_bespoke_name.json b/docarray/indexes/vector_index_bespoke_name.json new file mode 100644 index 0000000..74410f3 --- /dev/null +++ b/docarray/indexes/vector_index_bespoke_name.json @@ -0,0 +1,23 @@ +{ + "fields": [ + { + "numDimensions": 10, + "path": "embedding", + "similarity": "cosine", + "type": "vector" + }, + { + "path": "number", + "type": "filter" + }, + { + "path": "text", + "type": "filter" + } + ], + "name": "vector_index", + "type": "vectorSearch", + "database": "docarray_test_db", + "collectionName": "bespoke_name" + } + \ No newline at end of file From b88b8621602bdc9d1fa35f1b0271a567d5541add Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 21:46:05 -0600 Subject: [PATCH 41/56] update langchain --- ...langchain_test_retrievers_fulltext_index.json | 16 ++++++++-------- .../langchain_test_retrievers_vector_index.json | 14 +++++--------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/langchain-python/indexes/langchain_test_retrievers_fulltext_index.json b/langchain-python/indexes/langchain_test_retrievers_fulltext_index.json index 51ac3a7..f9f458b 100644 --- a/langchain-python/indexes/langchain_test_retrievers_fulltext_index.json +++ b/langchain-python/indexes/langchain_test_retrievers_fulltext_index.json @@ -1,13 +1,13 @@ { "mappings": { - "dynamic": false, - "fields": { - "text": [ - { - "type": "string" - } - ] - } + "dynamic": false, + "fields": { + "text": [ + { + "type": "string" + } + ] + } }, "name": "text_index", "type": "search", diff --git a/langchain-python/indexes/langchain_test_retrievers_vector_index.json b/langchain-python/indexes/langchain_test_retrievers_vector_index.json index c13d4cf..747ef92 100644 --- a/langchain-python/indexes/langchain_test_retrievers_vector_index.json +++ b/langchain-python/indexes/langchain_test_retrievers_vector_index.json @@ -1,18 +1,14 @@ { "fields": [ { - "numDimensions": 1536, - "path": "embedding", - "similarity": "dotProduct", - "type": "vector" - }, - { - "path": "c", - "type": "filter" + "numDimensions": 1536, + "path": "embedding", + "similarity": "cosine", + "type": "vector" } ], "name": "vector_index", "type": "vectorSearch", "database": "langchain_test_db", "collectionName": "langchain_test_retrievers" -} \ No newline at end of file +} From fa9ccb0f19e6a25a8e0fd374f996b9b0e4802cc5 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 21 Nov 2024 21:47:12 -0600 Subject: [PATCH 42/56] fix docarray --- docarray/database/bespoke_name.json | 1 + 1 file changed, 1 insertion(+) create mode 100644 docarray/database/bespoke_name.json diff --git a/docarray/database/bespoke_name.json b/docarray/database/bespoke_name.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/docarray/database/bespoke_name.json @@ -0,0 +1 @@ +[] \ No newline at end of file From 4c7549e4e66e8c310d077b8b1989ae7b22b3b5ea Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Fri, 22 Nov 2024 07:24:46 -0600 Subject: [PATCH 43/56] try with clean db --- .evergreen/scaffold_atlas.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 259d8cb..a1c7821 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -83,21 +83,25 @@ def create_index(client: MongoClient, filename: Path) -> None: indexes = [index["name"] for index in collection.list_search_indexes()] if index_name not in indexes: collection.create_search_index(search_index) - logger.debug("waiting for search index to be ready...") - wait_until_complete = 60 - _wait_for_predicate( - predicate=lambda: _is_index_ready(collection, index_name), - err=f"Index {index_name} update did not complete in {wait_until_complete}!", - timeout=wait_until_complete, - ) - logger.debug("waiting for search index to be ready... done.") + else: logger.debug( - "search index already exists!: %s on %s.%s", + "search index already exists, updating: %s on %s.%s", index_name, database_name, collection_name, ) + collection.update_search_index(index_name, loaded_index_configuration) + + logger.debug("waiting for search index to be ready...") + wait_until_complete = 60 + _wait_for_predicate( + predicate=lambda: _is_index_ready(collection, index_name), + err=f"Index {index_name} update did not complete in {wait_until_complete}!", + timeout=wait_until_complete, + ) + logger.debug("waiting for search index to be ready... done.") + logger.debug( "creating search index: %s on %s.%s... done", index_name, From 2ac1f313815fcc3e52bdc781d3ebf25dc682f9c9 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Fri, 22 Nov 2024 07:25:06 -0600 Subject: [PATCH 44/56] increase timeout --- .evergreen/scaffold_atlas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index a1c7821..2c7c382 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -94,7 +94,7 @@ def create_index(client: MongoClient, filename: Path) -> None: collection.update_search_index(index_name, loaded_index_configuration) logger.debug("waiting for search index to be ready...") - wait_until_complete = 60 + wait_until_complete = 120 _wait_for_predicate( predicate=lambda: _is_index_ready(collection, index_name), err=f"Index {index_name} update did not complete in {wait_until_complete}!", From ea5ffe5ab5e067320ecef9af8501d0792db6e0dd Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Fri, 22 Nov 2024 08:39:35 -0600 Subject: [PATCH 45/56] try not removing collections --- .evergreen/scaffold_atlas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 2c7c382..2172c5b 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -42,7 +42,7 @@ def upload_data(db: Database, filename: Path) -> None: db.name, collection_name, ) - db.drop_collection(collection_name) + db[collection_name].delete_many({}) if not isinstance(loaded_collection, list): loaded_collection = [loaded_collection] if loaded_collection: From 0e97128cee2ff99129f69cb29c3d5a5ec2cfe784 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Fri, 22 Nov 2024 13:07:34 -0600 Subject: [PATCH 46/56] fix collection handling --- .evergreen/scaffold_atlas.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py index 2172c5b..645ec44 100644 --- a/.evergreen/scaffold_atlas.py +++ b/.evergreen/scaffold_atlas.py @@ -42,13 +42,17 @@ def upload_data(db: Database, filename: Path) -> None: db.name, collection_name, ) - db[collection_name].delete_many({}) + collections = [c["name"] for c in db.list_collections()] + if collection_name in collections: + logger.debug("Clearing existing collection", collection_name) + db[collection_name].delete_many({}) + if not isinstance(loaded_collection, list): loaded_collection = [loaded_collection] if loaded_collection: result: InsertManyResult = db[collection_name].insert_many(loaded_collection) logger.debug("Uploaded results for %s: %s", filename.name, result.inserted_ids) - else: + elif collection_name not in collections: logger.debug("Empty collection named %s created", collection_name) db.create_collection(collection_name) From 1618a15ccc4b9befca0953bad779cc89a72edb29 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Fri, 22 Nov 2024 17:44:35 -0600 Subject: [PATCH 47/56] skip python semantic kernel --- .evergreen/config.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 62b0391..6a05dfc 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -191,8 +191,9 @@ buildvariants: - rhel87-small tasks: - name: test-semantic-kernel-python-local - - name: test-semantic-kernel-python-remote - batchtime: 10080 # 1 week + # TODO: INTPYTHON-430 + # - name: test-semantic-kernel-python-remote + # batchtime: 10080 # 1 week - name: test-semantic-kernel-csharp-rhel display_name: Semantic-Kernel RHEL CSharp From 58099461ca7a578442b14227998ac34481908a14 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Fri, 22 Nov 2024 17:48:10 -0600 Subject: [PATCH 48/56] lint --- .evergreen/setup-remote.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index a14108f..102caf8 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -39,10 +39,11 @@ esac export MONGODB_URI # Create the env file -echo "export OPENAI_API_KEY=$openai_api_key" >> env.sh +echo "export OPENAI_API_KEY=$OPENAI_API_KEY" >> env.sh echo "export MONGODB_URI=$MONGODB_URI" >> env.sh +# Ensure the remote database is populated. . .evergreen/utils.sh PYTHON_BINARY=$(find_python3) From 83af1856c1849c1cd92125767a242caf66fad3a4 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Sun, 1 Dec 2024 20:51:26 -0600 Subject: [PATCH 49/56] address review --- .evergreen/config.yml | 11 +++++++++++ .evergreen/provision-atlas.sh | 32 ++------------------------------ .evergreen/setup-remote.sh | 29 +---------------------------- .evergreen/utils.sh | 31 +++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 58 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 6a05dfc..a950622 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -92,54 +92,63 @@ pre: tasks: - name: test-semantic-kernel-python-local + tags: [local] commands: - func: "fetch repo" - func: "setup local atlas" - func: "execute tests" - name: test-semantic-kernel-python-remote + tags: [remote] commands: - func: "fetch repo" - func: "setup remote atlas" - func: "execute tests" - name: test-semantic-kernel-csharp-local + tags: [local] commands: - func: "fetch repo" - func: "setup local atlas" - func: "execute tests" - name: test-semantic-kernel-csharp-remote + tags: [remote] commands: - func: "fetch repo" - func: "setup remote atlas" - func: "execute tests" - name: test-langchain-python-local + tags: [local] commands: - func: "fetch repo" - func: "setup local atlas" - func: "execute tests" - name: test-langchain-python-remote + tags: [remote] commands: - func: "fetch repo" - func: "setup remote atlas" - func: "execute tests" - name: test-chatgpt-retrieval-plugin-local + tags: [local] commands: - func: "fetch repo" - func: "setup local atlas" - func: "execute tests" - name: test-chatgpt-retrieval-plugin-remote + tags: [remote] commands: - func: "fetch repo" - func: "setup remote atlas" - func: "execute tests" - name: test-llama-index-local + tags: [local] commands: - func: "fetch repo" - func: "setup local atlas" @@ -152,12 +161,14 @@ tasks: - func: "execute tests" - name: test-docarray-local + tags: [local] commands: - func: "fetch repo" - func: "setup local atlas" - func: "execute tests" - name: test-docarray-remote + tags: [remote] commands: - func: "fetch repo" - func: "setup remote atlas" diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index 6f2f0c3..a6a4e5b 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -3,36 +3,8 @@ set -eu . .evergreen/utils.sh -PYTHON_BINARY=$(find_python3) - -# Should be called from src -EVERGREEN_PATH=$(pwd)/.evergreen -TARGET_DIR=$(pwd)/$DIR -SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py - -mkdir atlas - -setup_local_atlas - -pushd atlas - -$PYTHON_BINARY -m venv . -source ./bin/activate -popd - -# Test server is up -$PYTHON_BINARY -m pip install pymongo -CONN_STRING=$CONN_STRING \ - $PYTHON_BINARY -c "from pymongo import MongoClient; import os; MongoClient(os.environ['CONN_STRING']).db.command('ping')" - -# Add database and index configurations -DATABASE=$DATABASE \ - CONN_STRING=$CONN_STRING \ - REPO_NAME=$REPO_NAME \ - DIR=$DIR \ - DEBUG="${DEBUG:-1}" \ - TARGET_DIR=$TARGET_DIR \ - $PYTHON_BINARY $SCAFFOLD_SCRIPT +CONN_STRING=$(setup_local_atlas) +CONN_STRING=$CONN_STRING provision_atlas # Get the secrets. source secrets-export.sh diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index 102caf8..9c417e1 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -42,34 +42,7 @@ export MONGODB_URI echo "export OPENAI_API_KEY=$OPENAI_API_KEY" >> env.sh echo "export MONGODB_URI=$MONGODB_URI" >> env.sh - # Ensure the remote database is populated. . .evergreen/utils.sh -PYTHON_BINARY=$(find_python3) - -# Should be called from src -EVERGREEN_PATH=$(pwd)/.evergreen -TARGET_DIR=$(pwd)/$DIR -SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py - -mkdir atlas - -pushd atlas - -$PYTHON_BINARY -m venv . -source ./bin/activate -popd - -# Test server is up -$PYTHON_BINARY -m pip install pymongo -CONN_STRING=$MONGODB_URI \ - $PYTHON_BINARY -c "from pymongo import MongoClient; import os; MongoClient(os.environ['MONGODB_URI']).db.command('ping')" - -# Add database and index configurations -DATABASE=$DATABASE \ - CONN_STRING=$MONGODB_URI \ - REPO_NAME=$REPO_NAME \ - DIR=$DIR \ - TARGET_DIR=$TARGET_DIR \ - $PYTHON_BINARY $SCAFFOLD_SCRIPT +CONN_STRING=MONGODB_URI provision_atlas diff --git a/.evergreen/utils.sh b/.evergreen/utils.sh index c82683a..885d4f6 100644 --- a/.evergreen/utils.sh +++ b/.evergreen/utils.sh @@ -115,3 +115,34 @@ fetch_local_atlas_uri() { export CONN_STRING=$CONN_STRING echo "$CONN_STRING" } + + +scaffold_atlas() { + PYTHON_BINARY=$(find_python3) + + # Should be called from src + EVERGREEN_PATH=$(pwd)/.evergreen + TARGET_DIR=$(pwd)/$DIR + SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py + + mkdir -p atlas + pushd atlas + + $PYTHON_BINARY -m venv . + source ./bin/activate + popd + + # Test server is up + $PYTHON_BINARY -m pip install pymongo + CONN_STRING=$CONN_STRING \ + $PYTHON_BINARY -c "from pymongo import MongoClient; import os; MongoClient(os.environ['CONN_STRING']).db.command('ping')" + + # Add database and index configurations + DATABASE=$DATABASE \ + CONN_STRING=$CONN_STRING \ + REPO_NAME=$REPO_NAME \ + DIR=$DIR \ + DEBUG="${DEBUG:-1}" \ + TARGET_DIR=$TARGET_DIR \ + $PYTHON_BINARY $SCAFFOLD_SCRIPT +} From 8757c48364a4bed612c71cf604ce837c5628dc80 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Sun, 1 Dec 2024 21:04:27 -0600 Subject: [PATCH 50/56] cleanup --- .evergreen/provision-atlas.sh | 2 +- .evergreen/setup-remote.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index a6a4e5b..c419d35 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -4,7 +4,7 @@ set -eu . .evergreen/utils.sh CONN_STRING=$(setup_local_atlas) -CONN_STRING=$CONN_STRING provision_atlas +CONN_STRING=$CONN_STRING scaffold_atlas # Get the secrets. source secrets-export.sh diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index 9c417e1..f43844f 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -45,4 +45,4 @@ echo "export MONGODB_URI=$MONGODB_URI" >> env.sh # Ensure the remote database is populated. . .evergreen/utils.sh -CONN_STRING=MONGODB_URI provision_atlas +CONN_STRING=MONGODB_URI scaffold_atlas From 04926fb4f58511a64d1c34f4db787064f23b8b54 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Sun, 1 Dec 2024 21:15:47 -0600 Subject: [PATCH 51/56] fix remote handling --- .evergreen/setup-remote.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index f43844f..551f548 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -45,4 +45,4 @@ echo "export MONGODB_URI=$MONGODB_URI" >> env.sh # Ensure the remote database is populated. . .evergreen/utils.sh -CONN_STRING=MONGODB_URI scaffold_atlas +CONN_STRING=$MONGODB_URI scaffold_atlas From c650983268e16b9d0b47d9b5b79a37cd3035187a Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Sun, 1 Dec 2024 21:20:21 -0600 Subject: [PATCH 52/56] fix local --- .evergreen/provision-atlas.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index c419d35..dc1e010 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -3,8 +3,8 @@ set -eu . .evergreen/utils.sh -CONN_STRING=$(setup_local_atlas) -CONN_STRING=$CONN_STRING scaffold_atlas +setup_local_atlas +scaffold_atlas # Get the secrets. source secrets-export.sh From cf3c1544411464fcaf7f0df34ca043dae91b6a0a Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Mon, 2 Dec 2024 07:48:03 -0600 Subject: [PATCH 53/56] add more llama_index filters --- .../indexes/filters_metadata_year.json | 16 ++++++++++++++++ .../indexes/filters_vector_index.json | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 llama-index-python-vectorstore/indexes/filters_metadata_year.json create mode 100644 llama-index-python-vectorstore/indexes/filters_vector_index.json diff --git a/llama-index-python-vectorstore/indexes/filters_metadata_year.json b/llama-index-python-vectorstore/indexes/filters_metadata_year.json new file mode 100644 index 0000000..4f70c3e --- /dev/null +++ b/llama-index-python-vectorstore/indexes/filters_metadata_year.json @@ -0,0 +1,16 @@ +{ + "mappings": { + "dynamic": false, + "fields": { + "metadata.year": [ + { + "type": "number" + } + ] + } + }, + "name": "metadata_year", + "type": "search", + "database": "llama_index_test_db", + "collectionName": "llama_index_test_filters" +} diff --git a/llama-index-python-vectorstore/indexes/filters_vector_index.json b/llama-index-python-vectorstore/indexes/filters_vector_index.json new file mode 100644 index 0000000..9ea597d --- /dev/null +++ b/llama-index-python-vectorstore/indexes/filters_vector_index.json @@ -0,0 +1,18 @@ +{ + "fields": [ + { + "numDimensions": 10, + "path": "embedding", + "similarity": "cosine", + "type": "vector" + }, + { + "path": "metadata.year", + "type": "filter" + } + ], + "name": "vector_index", + "type": "vectorSearch", + "database": "llama_index_test_db", + "collectionName": "llama_index_test_filters" +} \ No newline at end of file From aa418b6d5fd66bb9ab1db2dd442fea5a3f6e6f5f Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Mon, 2 Dec 2024 16:53:54 -0600 Subject: [PATCH 54/56] skip remote llama index --- .../database/llama_index_test_filters.json | 1 + 1 file changed, 1 insertion(+) create mode 100644 llama-index-python-vectorstore/database/llama_index_test_filters.json diff --git a/llama-index-python-vectorstore/database/llama_index_test_filters.json b/llama-index-python-vectorstore/database/llama_index_test_filters.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/llama-index-python-vectorstore/database/llama_index_test_filters.json @@ -0,0 +1 @@ +[] \ No newline at end of file From 620845f1e1d7547f28cc25bec6479a99c71fcfe2 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Mon, 2 Dec 2024 19:00:57 -0600 Subject: [PATCH 55/56] skip remote llama index --- .evergreen/config.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index a950622..c1ffbf6 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -259,8 +259,9 @@ buildvariants: - rhel87-small tasks: - name: test-llama-index-local - - name: test-llama-index-remote - batchtime: 10080 # 1 week + # TODO: INTPYTHON-440 + # - name: test-llama-index-remote + # batchtime: 10080 # 1 week - name: test-docarray-rhel display_name: DocArray RHEL From c4ad06e8d600c0d83e3625933d08186bb312de28 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 3 Dec 2024 08:05:13 -0600 Subject: [PATCH 56/56] remove new filters --- .../indexes/filters_metadata_year.json | 16 ---------------- .../indexes/filters_vector_index.json | 18 ------------------ 2 files changed, 34 deletions(-) delete mode 100644 llama-index-python-vectorstore/indexes/filters_metadata_year.json delete mode 100644 llama-index-python-vectorstore/indexes/filters_vector_index.json diff --git a/llama-index-python-vectorstore/indexes/filters_metadata_year.json b/llama-index-python-vectorstore/indexes/filters_metadata_year.json deleted file mode 100644 index 4f70c3e..0000000 --- a/llama-index-python-vectorstore/indexes/filters_metadata_year.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "mappings": { - "dynamic": false, - "fields": { - "metadata.year": [ - { - "type": "number" - } - ] - } - }, - "name": "metadata_year", - "type": "search", - "database": "llama_index_test_db", - "collectionName": "llama_index_test_filters" -} diff --git a/llama-index-python-vectorstore/indexes/filters_vector_index.json b/llama-index-python-vectorstore/indexes/filters_vector_index.json deleted file mode 100644 index 9ea597d..0000000 --- a/llama-index-python-vectorstore/indexes/filters_vector_index.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "fields": [ - { - "numDimensions": 10, - "path": "embedding", - "similarity": "cosine", - "type": "vector" - }, - { - "path": "metadata.year", - "type": "filter" - } - ], - "name": "vector_index", - "type": "vectorSearch", - "database": "llama_index_test_db", - "collectionName": "llama_index_test_filters" -} \ No newline at end of file