diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 876d562..a9bb287 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -44,7 +44,7 @@ functions: - command: subprocess.exec type: test params: - include_expansions_in_env: [DIR] + include_expansions_in_env: [DIR, REPO_ORG, REPO_BRANCH] working_dir: "src" binary: bash args: [.evergreen/fetch-repo.sh] @@ -96,6 +96,13 @@ post: working_dir: "src" binary: bash args: [drivers-evergreen-tools/.evergreen/teardown.sh] + - command: subprocess.exec + type: setup + params: + include_expansions_in_env: [DIR, REPO_ORG, REPO_BRANCH] + working_dir: "src" + binary: bash + args: [.evergreen/teardown.sh] tasks: - name: test-semantic-kernel-python-local diff --git a/.evergreen/fetch-repo.sh b/.evergreen/fetch-repo.sh index d7e52d5..253df4f 100644 --- a/.evergreen/fetch-repo.sh +++ b/.evergreen/fetch-repo.sh @@ -9,13 +9,32 @@ fi cd ${DIR} +# Allow overrides from the patch build. +REPO_ORG_OVERRIDE=${REPO_ORG:-} +REPO_BRANCH_OVERRIDE=${REPO_BRANCH:-} + # Source the configuration. set -a source config.env set +a +if [ -n "${REPO_ORG_OVERRIDE}" ]; then + REPO_ORG="${REPO_ORG_OVERRIDE}" +fi +if [ -n "${REPO_BRANCH_OVERRIDE}" ]; then + REPO_BRANCH="${REPO_BRANCH_OVERRIDE}" +fi + rm -rf ${REPO_NAME} -git clone ${CLONE_URL} + +ARGS="https://github.com/${REPO_ORG}/${REPO_NAME}" +if [ -n "${REPO_BRANCH:-}" ]; then + ARGS="-b ${REPO_BRANCH} ${ARGS}" +fi + +echo "Cloning repo $ARGS..." +git clone --depth=1 ${ARGS} +echo "Cloning repo $ARGS... done." # Apply patches to upstream repo if desired. if [ -d "patches" ]; then diff --git a/.evergreen/teardown.sh b/.evergreen/teardown.sh new file mode 100644 index 0000000..62af497 --- /dev/null +++ b/.evergreen/teardown.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -eu + +OVERRIDES= +if [ -n "${REPO_ORG:-}" ]; then + echo "REPO_ORG=$REPO_ORG" + OVERRIDES=1 +fi +if [ -n "${REPO_BRANCH:-}" ]; then + echo "REPO_BRANCH=$REPO_BRANCH" + OVERRIDES=1 +fi + +if [ -z "${OVERRIDES}" ]; then + echo "No overrides" +fi diff --git a/README.md b/README.md index 7c6d949..f384b81 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,8 @@ Within each subdirectory you should expect to have: - `run.sh` -- A script that should handle any additional library installations and steps for executing the test suite. This script should not populate the Atlas database with any required test data. - `config.env` - A file that defines the following environment variables: - `REPO_NAME` -- The name of the AI/ML framework repository that will get cloned - - `CLONE_URL` -- The Github URL to clone into the specified `DIR` + - `REPO_ORG` -- The Github org of the repository + - `REPO_BRANCH` -- The optional branch to clone - `DATABASE` -- The optional database where the Atlas CLI will load your index configs - `database/` -- An optional directory used by `.evergreen/scaffold_atlas.py` to populate a MongoDB database with test data. Only provide this if your tests require pre-populated data. - `database/{collection}.json` -- An optional JSON file containing one or more MongoDB documents that will be uploaded to `$DATABASE.{collection}` in the local Atlas instance. Only provide this if your tests require pre-populated data. @@ -117,7 +118,7 @@ Test execution flow is defined in `.evergreen/config.yml`. The test pipeline's c **[Functions](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-Configuration-Files#functions)** -- We've defined some common functions that will be used. See the `.evergreen/config.yml` for example cases. The standard procedure is to fetch the repository, provision Atlas as needed, and then execute the tests specified in the `run.sh` script you create. Ensure that the expansions are provided for these functions, otherwise the tests will run improperly and most likely fail. -- [`fetch repo`](https://github.com/mongodb-labs/ai-ml-pipeline-testing/blob/main/.evergreen/config.yml#L30) -- Clones the library's git repository; make sure to provide the expansion CLONE_URL +- [`fetch repo`](https://github.com/mongodb-labs/ai-ml-pipeline-testing/blob/main/.evergreen/config.yml#L30) -- Clones the library's git repository; make sure to provide the expansion REPO_ORG/REPO_NAME and REPO_BRANCH (optional) - [`execute tests`](https://github.com/mongodb-labs/ai-ml-pipeline-testing/blob/main/.evergreen/config.yml#L51) -- Uses [subprocess.exec](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-Commands#subprocessexec) to run the provided `run.sh` file. `run.sh` must be within the specified `DIR` path. - `fetch source` -- Retrieves the current (`ai-ml-pipeline-testing`) repo - `setup atlas cli` -- Sets up the local Atlas deployment @@ -137,8 +138,7 @@ At the start, we will hopefully add the integration tests themselves. The bad news is that the maintainers of the AI/ML packages may take considerable time to review and merge our changes. The good news is that we can begin testing without pointing to the main branch of the upstream repo. -The parameter value of the `CLONE_URL` is very flexible. -We literally just call `git clone $CLONE_URL`. +We can use `REPO_ORG`, `REPO_NAME`, and an optional `REPO_BRANCH` to define which repo to clone. As such, we can point to an arbitrary branch on an arbitrary repo. While developing, we encourage developers to point to a feature branch on their own fork, and add a TODO with the JIRA ticket to update the url @@ -169,3 +169,11 @@ We realized that we could easily get this working without changing the upstream simply by applying a git patch file. This is a standard practice used by `conda package` maintainers, as they often have to build for a more broad set of scenarios than the original authors intended. + +### Running a patch build of a given PR + +Rather than making a new branch and modifying a `config.env` file, you can run a patch build as follows: + +```bash +evergreen patch -p ai-ml-pipelin-testing --param REPO_ORG="" --param REPO_BRANCH="" -y "" +``` diff --git a/chatgpt-retrieval-plugin/config.env b/chatgpt-retrieval-plugin/config.env index d45bb7d..85c8719 100644 --- a/chatgpt-retrieval-plugin/config.env +++ b/chatgpt-retrieval-plugin/config.env @@ -1,3 +1,3 @@ REPO_NAME=chatgpt-retrieval-plugin -CLONE_URL="https://github.com/openai/chatgpt-retrieval-plugin.git" +REPO_ORG=openai DATABASE=chatgpt_retrieval_plugin_test_db diff --git a/docarray/config.env b/docarray/config.env index c18451c..72f016a 100644 --- a/docarray/config.env +++ b/docarray/config.env @@ -1,3 +1,3 @@ REPO_NAME=docarray -CLONE_URL="https://github.com/docarray/docarray.git" +REPO_ORG=docarray DATABASE=docarray_test_db diff --git a/haystack-embeddings/config.env b/haystack-embeddings/config.env index c4aefd7..4394e22 100644 --- a/haystack-embeddings/config.env +++ b/haystack-embeddings/config.env @@ -1,3 +1,3 @@ REPO_NAME=haystack-core-integrations -CLONE_URL="https://github.com/deepset-ai/haystack-core-integrations.git" +REPO_ORG=deepset-ai DATABASE=haystack_integration_test diff --git a/haystack-fulltext/config.env b/haystack-fulltext/config.env index 4b84856..3ac0d41 100644 --- a/haystack-fulltext/config.env +++ b/haystack-fulltext/config.env @@ -1,3 +1,3 @@ REPO_NAME=haystack-core-integrations -CLONE_URL="https://github.com/deepset-ai/haystack-core-integrations.git" +REPO_ORG=deepset-ai DATABASE=haystack_test diff --git a/langchain-python/config.env b/langchain-python/config.env index 5470511..18baa54 100644 --- a/langchain-python/config.env +++ b/langchain-python/config.env @@ -1,3 +1,3 @@ REPO_NAME=langchain-mongodb -CLONE_URL="https://github.com/langchain-ai/langchain-mongodb.git" +REPO_ORG=langchain-ai DATABASE=langchain_test_db diff --git a/langchaingo-golang/config.env b/langchaingo-golang/config.env index aed3f3a..4d93390 100644 --- a/langchaingo-golang/config.env +++ b/langchaingo-golang/config.env @@ -1,2 +1,2 @@ REPO_NAME=langchaingo -CLONE_URL="https://github.com/tmc/langchaingo.git" +REPO_ORG=tmc diff --git a/langgraph-python/config.env b/langgraph-python/config.env index 8ebc827..568abf1 100644 --- a/langgraph-python/config.env +++ b/langgraph-python/config.env @@ -1,3 +1,3 @@ REPO_NAME=langchain-mongodb -CLONE_URL="https://github.com/langchain-ai/langchain-mongodb.git" +REPO_ORG=langchain-ai DATABASE=langgraph-test diff --git a/llama-index-python-vectorstore/config.env b/llama-index-python-vectorstore/config.env index e4e5c83..00d312b 100644 --- a/llama-index-python-vectorstore/config.env +++ b/llama-index-python-vectorstore/config.env @@ -1,3 +1,3 @@ REPO_NAME=llama_index -CLONE_URL="https://github.com/run-llama/llama_index.git" +REPO_ORG=run-llama DATABASE=llama_index_test_db diff --git a/pymongo-voyageai/config.env b/pymongo-voyageai/config.env index 0cff80f..44e0eff 100644 --- a/pymongo-voyageai/config.env +++ b/pymongo-voyageai/config.env @@ -1,3 +1,3 @@ REPO_NAME=pymongo-voyageai -CLONE_URL="https://github.com/mongodb-labs/pymongo-voyageai.git" +REPO_ORG=mongodb-labs DATABASE="pymongo_voyageai_test_db" diff --git a/semantic-kernel-csharp/config.env b/semantic-kernel-csharp/config.env index a784ac4..2b036d5 100644 --- a/semantic-kernel-csharp/config.env +++ b/semantic-kernel-csharp/config.env @@ -1,3 +1,3 @@ REPO_NAME=semantic-kernel -CLONE_URL="https://github.com/microsoft/semantic-kernel.git" +REPO_ORG=microsoft DATABASE=dotnetMSKNearestTest diff --git a/semantic-kernel-python/config.env b/semantic-kernel-python/config.env index 0487af5..efda3c4 100644 --- a/semantic-kernel-python/config.env +++ b/semantic-kernel-python/config.env @@ -1,3 +1,3 @@ REPO_NAME=semantic-kernel -CLONE_URL="https://github.com/microsoft/semantic-kernel.git" +REPO_ORG=microsoft DATABASE=pyMSKTest