Skip to content

Commit 22e15f9

Browse files
authored
feat/break up int tests for CI (#333)
* break up int tests for CI * rename tests * make sure all tests tagged * Add check for untagged int tests * add back in discord channels secret * remove default container name * comment out non existing motherduck and snowflak secrets
1 parent 41f5b19 commit 22e15f9

35 files changed

+380
-175
lines changed

.github/workflows/e2e.yml

Lines changed: 146 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,15 @@ jobs:
2626
python-version: ${{ matrix.python-version }}
2727
check-only: 'true'
2828

29-
api_based_integration_test:
29+
check_untagged_tests:
30+
runs-on: ubuntu-latest
31+
steps:
32+
- uses: actions/checkout@v4
33+
- name: run check
34+
run: |
35+
make check-untagged-tests
36+
37+
api_based_int_test:
3038
strategy:
3139
matrix:
3240
test: ["partitioners", "chunkers"]
@@ -48,7 +56,7 @@ jobs:
4856
make integration-test-${{ matrix.test }}
4957
make parse-skipped-tests
5058
51-
embedders_integration_test:
59+
embedders_int_test:
5260
runs-on: ubuntu-latest
5361
steps:
5462
- uses: 'actions/checkout@v4'
@@ -74,9 +82,9 @@ jobs:
7482
make integration-test-embedders
7583
make parse-skipped-tests
7684
77-
source_connectors_integration_test:
85+
blob_storage_connectors_int_test:
7886
runs-on: ubuntu-latest-m
79-
needs: [ setup ]
87+
needs: [ setup, check_untagged_tests ]
8088
steps:
8189
- uses: 'actions/checkout@v4'
8290
- name: Set up Python ${{ matrix.python-version }}
@@ -96,31 +104,30 @@ jobs:
96104
docker compose version
97105
- name: Run Integration Tests
98106
env:
107+
# Databricks
99108
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
100109
DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
101110
DATABRICKS_CLIENT_ID: ${{secrets.DATABRICKS_CLIENT_ID}}
102111
DATABRICKS_CLIENT_SECRET: ${{secrets.DATABRICKS_CLIENT_SECRET}}
103-
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
104-
DISCORD_CHANNELS: ${{ secrets.DISCORD_CHANNELS }}
105-
CONFLUENCE_USER_EMAIL: ${{secrets.CONFLUENCE_USER_EMAIL}}
106-
CONFLUENCE_API_TOKEN: ${{secrets.CONFLUENCE_API_TOKEN}}
107-
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
108-
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_ENDPOINT }}
109-
MONGODB_URI: ${{ secrets.MONGODB_URI }}
110-
MONGODB_DATABASE: ${{ secrets.MONGODB_DATABASE_NAME }}
111-
KAFKA_API_KEY: ${{ secrets.KAFKA_API_KEY }}
112-
KAFKA_SECRET: ${{ secrets.KAFKA_SECRET }}
113-
KAFKA_BOOTSTRAP_SERVER: ${{ secrets.KAFKA_BOOTSTRAP_SERVER }}
114112
DATABRICKS_PAT: ${{ secrets.DATABRICKS_PAT }}
113+
# Onedrive
114+
MS_CLIENT_CRED: ${{ secrets.MS_CLIENT_CRED }}
115+
MS_CLIENT_ID: ${{ secrets.MS_CLIENT_ID }}
116+
MS_TENANT_ID: ${{ secrets.MS_TENANT_ID }}
117+
MS_USER_EMAIL: ${{ secrets.MS_USER_EMAIL }}
118+
MS_USER_PNAME: ${{ secrets.MS_USER_PNAME }}
119+
# S3
120+
S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
121+
S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
115122
run : |
116123
source .venv/bin/activate
117124
make install-test
118-
make integration-test-connectors-src
125+
make integration-test-connectors-blob-storage
119126
make parse-skipped-tests
120127
121-
destination_connectors_integration_test:
128+
sql_connectors_int_test:
122129
runs-on: ubuntu-latest-m
123-
needs: [ setup ]
130+
needs: [ setup, check_untagged_tests ]
124131
steps:
125132
- uses: 'actions/checkout@v4'
126133
- name: Set up Python ${{ matrix.python-version }}
@@ -140,39 +147,137 @@ jobs:
140147
docker compose version
141148
- name: Run Integration Tests
142149
env:
143-
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
144-
DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
145-
DATABRICKS_CLIENT_ID: ${{secrets.DATABRICKS_CLIENT_ID}}
146-
DATABRICKS_CLIENT_SECRET: ${{secrets.DATABRICKS_CLIENT_SECRET}}
150+
# Motherduck
151+
# MOTHERDUCK_TOKEN: ${{ secrets.MOTHERDUCK_TOKEN }}
152+
# Snowflake
153+
# LOCALSTACK_AUTH_TOKEN: ${{ secrets.LOCALSTACK_AUTH_TOKEN }}
154+
# Delta Tables
147155
S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
148156
S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
149-
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
150-
AZURE_DEST_CONNECTION_STR: ${{ secrets.AZURE_DEST_CONNECTION_STR }}
151-
MS_CLIENT_CRED: ${{ secrets.MS_CLIENT_CRED }}
152-
MS_CLIENT_ID: ${{ secrets.MS_CLIENT_ID }}
153-
MS_TENANT_ID: ${{ secrets.MS_TENANT_ID }}
154-
MS_USER_EMAIL: ${{ secrets.MS_USER_EMAIL }}
155-
MS_USER_PNAME: ${{ secrets.MS_USER_PNAME }}
156-
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
157+
run : |
158+
source .venv/bin/activate
159+
make install-test
160+
make integration-test-connectors-sql
161+
make parse-skipped-tests
162+
163+
nosql_connectors_int_test:
164+
runs-on: ubuntu-latest-m
165+
needs: [ setup, check_untagged_tests ]
166+
steps:
167+
- uses: 'actions/checkout@v4'
168+
- name: Set up Python ${{ matrix.python-version }}
169+
uses: actions/setup-python@v5
170+
with:
171+
python-version: "3.10"
172+
- name: Get full Python version
173+
id: full-python-version
174+
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
175+
- name: Setup virtual environment
176+
uses: ./.github/actions/base-cache
177+
with:
178+
python-version: "3.10"
179+
- name: Setup up docker
180+
run: |
181+
sudo make install-docker-compose
182+
docker compose version
183+
- name: Run Integration Tests
184+
env:
185+
# MongoDB
186+
MONGODB_URI: ${{ secrets.MONGODB_URI }}
187+
MONGODB_DATABASE: ${{ secrets.MONGODB_DATABASE_NAME }}
188+
# Redis
189+
AZURE_REDIS_INGEST_TEST_PASSWORD: ${{ secrets.AZURE_REDIS_INGEST_TEST_PASSWORD }}
190+
# Vectara
191+
VECTARA_OAUTH_CLIENT_ID: ${{secrets.VECTARA_OAUTH_CLIENT_ID}}
192+
VECTARA_OAUTH_SECRET: ${{secrets.VECTARA_OAUTH_SECRET}}
193+
VECTARA_CUSTOMER_ID: ${{secrets.VECTARA_CUSTOMER_ID}}
194+
run : |
195+
source .venv/bin/activate
196+
make install-test
197+
make integration-test-connectors-nosql
198+
make parse-skipped-tests
199+
200+
vector_db_connectors_int_test:
201+
runs-on: ubuntu-latest-m
202+
needs: [ setup, check_untagged_tests ]
203+
steps:
204+
- uses: 'actions/checkout@v4'
205+
- name: Set up Python ${{ matrix.python-version }}
206+
uses: actions/setup-python@v5
207+
with:
208+
python-version: "3.10"
209+
- name: Get full Python version
210+
id: full-python-version
211+
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
212+
- name: Setup virtual environment
213+
uses: ./.github/actions/base-cache
214+
with:
215+
python-version: "3.10"
216+
- name: Setup up docker
217+
run: |
218+
sudo make install-docker-compose
219+
docker compose version
220+
- name: Run Integration Tests
221+
env:
222+
# AstraDB
157223
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
158224
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_ENDPOINT }}
159-
AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }}
225+
# Azure AI Search
160226
AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
161-
AZURE_REDIS_INGEST_TEST_PASSWORD: ${{ secrets.AZURE_REDIS_INGEST_TEST_PASSWORD }}
162-
MONGODB_URI: ${{ secrets.MONGODB_URI }}
163-
MONGODB_DATABASE: ${{ secrets.MONGODB_DATABASE_NAME }}
164-
QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }}
165-
QDRANT_SERVER_URL: ${{ secrets.QDRANT_SERVER_URL }}
227+
# LanceDB
228+
AZURE_DEST_CONNECTION_STR: ${{ secrets.AZURE_DEST_CONNECTION_STR }}
229+
S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
230+
S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
231+
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
232+
# Pinecone
233+
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
234+
run : |
235+
source .venv/bin/activate
236+
make install-test
237+
make integration-test-connectors-vector-db
238+
make parse-skipped-tests
239+
240+
uncategorized_connectors_int_test:
241+
runs-on: ubuntu-latest-m
242+
needs: [ setup, check_untagged_tests ]
243+
steps:
244+
- uses: 'actions/checkout@v4'
245+
- name: Set up Python ${{ matrix.python-version }}
246+
uses: actions/setup-python@v5
247+
with:
248+
python-version: "3.10"
249+
- name: Get full Python version
250+
id: full-python-version
251+
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
252+
- name: Setup virtual environment
253+
uses: ./.github/actions/base-cache
254+
with:
255+
python-version: "3.10"
256+
- name: Setup up docker
257+
run: |
258+
sudo make install-docker-compose
259+
docker compose version
260+
- name: Run Integration Tests
261+
env:
262+
# Discord
263+
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
264+
DISCORD_CHANNELS: ${{ secrets.DISCORD_CHANNELS }}
265+
# Kafka
166266
KAFKA_API_KEY: ${{ secrets.KAFKA_API_KEY }}
167267
KAFKA_SECRET: ${{ secrets.KAFKA_SECRET }}
168268
KAFKA_BOOTSTRAP_SERVER: ${{ secrets.KAFKA_BOOTSTRAP_SERVER }}
269+
# Confluence
270+
CONFLUENCE_USER_EMAIL: ${{secrets.CONFLUENCE_USER_EMAIL}}
271+
CONFLUENCE_API_TOKEN: ${{secrets.CONFLUENCE_API_TOKEN}}
272+
# Notion
273+
NOTION_API_KEY: ${{ secrets.NOTION_API_KEY }}
169274
run : |
170275
source .venv/bin/activate
171276
make install-test
172-
make integration-test-connectors-dest
277+
make integration-test-connectors-uncategorized
173278
make parse-skipped-tests
174279
175-
test_src:
280+
src_e2e_test:
176281
strategy:
177282
matrix:
178283
python-version: ["3.9","3.10"]
@@ -243,8 +348,8 @@ jobs:
243348
pip freeze
244349
./test_e2e/test-src.sh
245350
246-
test_src_api:
247-
runs-on: ubuntu-latest-m
351+
src_api_test:
352+
runs-on: ubuntu-latest
248353
needs: [ setup ]
249354
steps:
250355
# actions/checkout MUST come before auth
@@ -266,9 +371,7 @@ jobs:
266371
run: |
267372
./test_e2e/src/against-api.sh
268373
269-
270-
271-
test_dest:
374+
dest_e2e_test:
272375
environment: ci
273376
strategy:
274377
matrix:

Makefile

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,16 +141,35 @@ integration-test-chunkers:
141141
integration-test-embedders:
142142
PYTHONPATH=. pytest -sv test/integration/embedders --json-report
143143

144-
.PHONY: integration-test-connectors-src
145-
integration-test-connectors-src:
146-
PYTHONPATH=. pytest --tags source -sv test/integration/connectors --cov-report=json --json-report
144+
.PHONY: integration-test-connectors-blob-storage
145+
integration-test-connectors-blob-storage:
146+
PYTHONPATH=. pytest --tags blob_storage -sv test/integration/connectors --json-report
147147

148+
.PHONY: integration-test-connectors-sql
149+
integration-test-connectors-sql:
150+
PYTHONPATH=. pytest --tags sql -sv test/integration/connectors --json-report
148151

149-
.PHONY: integration-test-connectors-dest
150-
integration-test-connectors-dest:
151-
PYTHONPATH=. pytest --tags destination -sv test/integration/connectors --json-report
152+
.PHONY: integration-test-connectors-nosql
153+
integration-test-connectors-nosql:
154+
PYTHONPATH=. pytest --tags nosql -sv test/integration/connectors --json-report
155+
156+
.PHONY: integration-test-connectors-vector-db
157+
integration-test-connectors-vector-db:
158+
PYTHONPATH=. pytest --tags vector_db -sv test/integration/connectors --json-report
159+
160+
.PHONY: integration-test-connectors-graph-db
161+
integration-test-connectors-graph-db:
162+
PYTHONPATH=. pytest --tags graph_db -sv test/integration/connectors --json-report
163+
164+
.PHONY: integration-test-connectors-uncategorized
165+
integration-test-connectors-uncategorized:
166+
PYTHONPATH=. pytest --tags uncategorized -sv test/integration/connectors --json-report
152167

153168
.PHONY: parse-skipped-tests
154169
parse-skipped-tests:
155170
PYTHONPATH=. python ./scripts/parse_pytest_report.py
156171

172+
.PHONY: check-untagged-tests
173+
check-untagged-tests:
174+
./scripts/check_untagged_tests.sh
175+

scripts/check_untagged_tests.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/usr/bin/env bash
2+
3+
function create_tags {
4+
echo "--exclude-tags "{blob_storage,sql,nosql,vector_db,graph_db,uncategorized}
5+
}
6+
7+
tags=$(create_tags)
8+
# shellcheck disable=SC2086
9+
missing_tags=$(PYTHONPATH=. pytest --collect-only test/integration/connectors $tags | grep "<Function|<Coroutine" -c)
10+
echo "$missing_tags"
11+
if [ "$missing_tags" -gt 0 ]; then
12+
echo "Missing tags in integration tests: "
13+
# shellcheck disable=SC2086
14+
PYTHONPATH=. pytest --collect-only test/integration/connectors $tags | grep "<Function|<Coroutine"
15+
exit 1
16+
fi

test/integration/connectors/databricks/test_volumes_native.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@
1010
from databricks.sdk import WorkspaceClient
1111
from databricks.sdk.errors.platform import NotFound
1212

13-
from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG
13+
from test.integration.connectors.utils.constants import (
14+
BLOB_STORAGE_TAG,
15+
DESTINATION_TAG,
16+
SOURCE_TAG,
17+
)
1418
from test.integration.connectors.utils.validation.source import (
1519
SourceValidationConfigs,
1620
source_connector_validation,
@@ -83,7 +87,7 @@ def get_pat_env_data() -> PATEnvData:
8387

8488

8589
@pytest.mark.asyncio
86-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
90+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
8791
@requires_env(
8892
"DATABRICKS_HOST", "DATABRICKS_CLIENT_ID", "DATABRICKS_CLIENT_SECRET", "DATABRICKS_CATALOG"
8993
)
@@ -115,7 +119,7 @@ async def test_volumes_native_source(tmp_path: Path):
115119

116120

117121
@pytest.mark.asyncio
118-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
122+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
119123
@requires_env("DATABRICKS_HOST", "DATABRICKS_PAT", "DATABRICKS_CATALOG")
120124
async def test_volumes_native_source_pat(tmp_path: Path):
121125
env_data = get_pat_env_data()
@@ -144,7 +148,7 @@ async def test_volumes_native_source_pat(tmp_path: Path):
144148
)
145149

146150

147-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
151+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
148152
@requires_env("DATABRICKS_HOST", "DATABRICKS_PAT", "DATABRICKS_CATALOG")
149153
def test_volumes_native_source_pat_invalid_catalog():
150154
env_data = get_pat_env_data()
@@ -162,7 +166,7 @@ def test_volumes_native_source_pat_invalid_catalog():
162166
_ = list(indexer.run())
163167

164168

165-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
169+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
166170
@requires_env("DATABRICKS_HOST")
167171
def test_volumes_native_source_pat_invalid_pat():
168172
host = os.environ["DATABRICKS_HOST"]
@@ -231,7 +235,7 @@ def validate_upload(client: WorkspaceClient, catalog: str, volume: str, volume_p
231235

232236

233237
@pytest.mark.asyncio
234-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
238+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, BLOB_STORAGE_TAG)
235239
@requires_env(
236240
"DATABRICKS_HOST", "DATABRICKS_CLIENT_ID", "DATABRICKS_CLIENT_SECRET", "DATABRICKS_CATALOG"
237241
)

0 commit comments

Comments
 (0)