Skip to content

Commit 4e6b53b

Browse files
chore: add CloudFetch call verification to proxy tests (#111)
## 🥞 Stacked PR Use this [link](https://github.com/adbc-drivers/databricks/pull/111/files) to review incremental changes. - [**stack/e.wang/add-thrift-call-verification**](#111) [[Files changed](https://github.com/adbc-drivers/databricks/pull/111/files)] - [stack/e.wang/fix-cloudfetch-timeout](#112) [[Files changed](https://github.com/adbc-drivers/databricks/pull/112/files/fbd12dc00750ec405c2130138ce59f4d1db9c46d..277c10fc5ba030ae3f3f18d4026c38186fc8941f)] --------- ## What's Changed This PR adds comprehensive call verification to CloudFetch proxy tests using dynamic baseline measurement. ### Changes - **Dynamic baseline verification**: Tests now establish baselines by running queries without failure scenarios, then compare actual vs expected call counts - **CloudFetch failure scenarios**: Added verification for: - cloudfetch_expired_link: Verifies FetchResults is called again to refresh expired links - cloudfetch_403: Verifies FetchResults is called again after 403 Forbidden - cloudfetch_timeout: Verifies retry behavior with configurable timeout - cloudfetch_connection_reset: Verifies retry behavior on connection reset - **Helper methods**: Added CreateProxiedConnectionWithParameters for tests requiring custom configuration - **Test infrastructure**: Uses ProxyControlClient to count Thrift method calls and cloud downloads ### Testing The tests validate that the driver correctly: 1. Refreshes CloudFetch download links by calling FetchResults again when links expire or return 403 2. Retries CloudFetch downloads with exponential backoff on timeout or connection reset 3. Uses dynamic baselines to account for different result set sizes and prefetch behavior Note: The CloudFetchTimeout test currently fails because the Thrift driver does not apply the configured CloudFetch timeout to the HttpClient. This will be addressed in a follow-up PR. Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 7c88c8b commit 4e6b53b

File tree

6 files changed

+488
-73
lines changed

6 files changed

+488
-73
lines changed

.github/workflows/proxy-tests.yml

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# Copyright (c) 2025 ADBC Drivers Contributors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
name: Proxy Tests
16+
17+
on:
18+
push:
19+
branches:
20+
- main
21+
paths:
22+
- '.github/workflows/proxy-tests.yml'
23+
- 'test-infrastructure/proxy-server/**'
24+
- 'test-infrastructure/tests/csharp/**'
25+
- 'csharp/src/**'
26+
pull_request:
27+
# Only runs on PRs from the repo itself, not forks
28+
paths:
29+
- '.github/workflows/proxy-tests.yml'
30+
- 'test-infrastructure/proxy-server/**'
31+
- 'test-infrastructure/tests/csharp/**'
32+
- 'csharp/src/**'
33+
workflow_dispatch:
34+
inputs:
35+
run_all_tests:
36+
description: 'Run all tests (ignores path filters)'
37+
required: false
38+
default: 'true'
39+
type: boolean
40+
41+
concurrency:
42+
group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
43+
cancel-in-progress: true
44+
45+
permissions:
46+
contents: read
47+
48+
jobs:
49+
proxy-tests:
50+
name: "Proxy Tests"
51+
runs-on: ubuntu-latest
52+
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
53+
timeout-minutes: 20
54+
steps:
55+
- name: Checkout repository
56+
uses: actions/checkout@v4
57+
with:
58+
ref: ${{ github.event.pull_request.head.sha || github.sha }}
59+
fetch-depth: 0
60+
submodules: recursive
61+
persist-credentials: false
62+
63+
- name: Setup .NET 8.0
64+
uses: actions/setup-dotnet@v4
65+
with:
66+
dotnet-version: '8.0.x'
67+
68+
- name: Setup Python
69+
uses: actions/setup-python@v5
70+
with:
71+
python-version: '3.11'
72+
73+
- name: Install Python dependencies
74+
run: |
75+
python -m pip install --upgrade pip
76+
pip install -r test-infrastructure/proxy-server/requirements.txt
77+
78+
- name: Generate mitmproxy certificates
79+
run: |
80+
# Start mitmdump in background to generate certificates
81+
mitmdump &
82+
MITM_PID=$!
83+
# Wait for certificate generation (happens on first run)
84+
sleep 5
85+
# Stop mitmdump forcefully
86+
kill -9 $MITM_PID || true
87+
wait $MITM_PID 2>/dev/null || true
88+
# Ensure no mitmdump processes are running
89+
pkill -9 mitmdump || true
90+
sleep 2
91+
# Verify certificate was created
92+
ls -la ~/.mitmproxy/
93+
cat ~/.mitmproxy/mitmproxy-ca-cert.pem
94+
95+
- name: Trust mitmproxy certificate
96+
run: |
97+
sudo cp ~/.mitmproxy/mitmproxy-ca-cert.pem /usr/local/share/ca-certificates/mitmproxy.crt
98+
sudo update-ca-certificates
99+
100+
- name: Build test project
101+
run: |
102+
cd test-infrastructure/tests/csharp
103+
dotnet build
104+
105+
- name: Create test configuration
106+
env:
107+
DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_HOST }}
108+
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
109+
DATABRICKS_HTTP_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }}
110+
run: |
111+
cat > databricks-test-config.json << EOF
112+
{
113+
"uri": "https://${{ env.DATABRICKS_SERVER_HOSTNAME }}${{ env.DATABRICKS_HTTP_PATH }}",
114+
"auth_type": "token",
115+
"token": "${{ env.DATABRICKS_TOKEN }}"
116+
}
117+
EOF
118+
echo "Configuration file created:"
119+
echo "URI: https://${{ env.DATABRICKS_SERVER_HOSTNAME }}${{ env.DATABRICKS_HTTP_PATH }}"
120+
echo "Auth type: token"
121+
122+
- name: Run proxy infrastructure tests
123+
env:
124+
DATABRICKS_TEST_CONFIG_FILE: ${{ github.workspace }}/databricks-test-config.json
125+
run: |
126+
cd test-infrastructure/tests/csharp
127+
dotnet test --filter "FullyQualifiedName~ProxyInfrastructureTests" \
128+
--logger "console;verbosity=normal" \
129+
--no-build
130+
131+
- name: Run CloudFetch proxy tests
132+
env:
133+
DATABRICKS_TEST_CONFIG_FILE: ${{ github.workspace }}/databricks-test-config.json
134+
run: |
135+
cd test-infrastructure/tests/csharp
136+
dotnet test --filter "FullyQualifiedName~CloudFetchTests" \
137+
--logger "console;verbosity=normal" \
138+
--no-build

test-infrastructure/proxy-server/mitmproxy_addon.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -337,20 +337,35 @@ def __init__(self):
337337

338338
# Start Flask control API in background thread
339339
def run_api():
340-
app.run(host='0.0.0.0', port=18081, threaded=True)
340+
# Disable reloader and use production mode for faster startup
341+
app.run(host='0.0.0.0', port=18081, threaded=True, use_reloader=False, debug=False)
341342

342343
api_thread = threading.Thread(target=run_api, daemon=True, name="ControlAPI")
343344
api_thread.start()
344345
ctx.log.info("Control API started on http://0.0.0.0:18081")
345346

346-
def request(self, flow: http.HTTPFlow) -> None:
347+
async def request(self, flow: http.HTTPFlow) -> None:
347348
"""
348349
Intercept requests and inject failures based on enabled scenarios.
349350
Called by mitmproxy for each HTTP request.
351+
Made async to support non-blocking delays.
350352
"""
351353
# Detect request type
352354
if self._is_cloudfetch_download(flow.request):
353-
self._handle_cloudfetch_request(flow)
355+
# Track cloud fetch download
356+
with state_lock:
357+
call_record = {
358+
"timestamp": time.time(),
359+
"type": "cloud_download",
360+
"url": flow.request.pretty_url,
361+
}
362+
call_history.append(call_record)
363+
364+
# Enforce max history limit
365+
if len(call_history) > MAX_CALL_HISTORY:
366+
del call_history[:len(call_history) - MAX_CALL_HISTORY]
367+
368+
await self._handle_cloudfetch_request(flow)
354369
elif self._is_thrift_request(flow.request):
355370
self._handle_thrift_request(flow)
356371

@@ -381,7 +396,7 @@ def _is_thrift_request(self, request: http.Request) -> bool:
381396
# SEA requests use /api/2.0/sql/statements path
382397
return "/sql/1.0/warehouses/" in request.path or "/sql/1.0/endpoints/" in request.path
383398

384-
def _handle_cloudfetch_request(self, flow: http.HTTPFlow) -> None:
399+
async def _handle_cloudfetch_request(self, flow: http.HTTPFlow) -> None:
385400
"""Handle CloudFetch requests and inject failures if scenario is enabled."""
386401
with state_lock:
387402
# Find first enabled CloudFetch scenario
@@ -425,10 +440,12 @@ def _handle_cloudfetch_request(self, flow: http.HTTPFlow) -> None:
425440
self._disable_scenario(scenario_name)
426441

427442
elif action == "delay":
428-
# Inject delay (simulates timeout) - now supports configurable duration
443+
# Inject delay using asyncio.sleep() to avoid blocking the event loop
444+
import asyncio
429445
duration_seconds = scenario_config.get("duration_seconds", 5)
430446
ctx.log.info(f"[INJECT] Delaying {duration_seconds}s for scenario: {scenario_name}")
431-
time.sleep(duration_seconds)
447+
await asyncio.sleep(duration_seconds)
448+
ctx.log.info(f"[INJECT] Delay complete, auto-disabled scenario: {scenario_name}")
432449
self._disable_scenario(scenario_name)
433450
# Let request continue after delay
434451

@@ -455,9 +472,11 @@ def _handle_thrift_request(self, flow: http.HTTPFlow) -> None:
455472
with state_lock:
456473
call_record = {
457474
"timestamp": time.time(),
475+
"type": "thrift",
458476
"method": decoded.get("method", "unknown"),
459477
"message_type": decoded.get("message_type", "unknown"),
460478
"sequence_id": decoded.get("sequence_id", 0),
479+
"fields": decoded.get("fields", {}),
461480
}
462481
call_history.append(call_record)
463482

0 commit comments

Comments
 (0)