|
| 1 | +import asyncio |
| 2 | +import tempfile |
| 3 | +from pathlib import Path |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | +import pytest |
| 7 | +from fastmcp import Client |
| 8 | + |
| 9 | +from pointblank_mcp_server.pointblank_server import mcp |
| 10 | + |
| 11 | + |
| 12 | +@pytest.fixture(scope="module") |
| 13 | +def mcp_server(): |
| 14 | + """Provides the FastMCP server instance.""" |
| 15 | + return mcp |
| 16 | + |
| 17 | + |
| 18 | +@pytest.fixture(scope="module") |
| 19 | +def sample_data(): |
| 20 | + """Provides test data for validation scenarios.""" |
| 21 | + return pd.DataFrame( |
| 22 | + { |
| 23 | + "id": [1, 2, 3, 4, 5], |
| 24 | + "email": [ |
| 25 | + "valid@test.com", |
| 26 | + "invalid-email", |
| 27 | + "another@test.com", |
| 28 | + "", |
| 29 | + "test@domain.org", |
| 30 | + ], |
| 31 | + "age": [25, -5, 35, 999, 40], # Mix of valid/invalid ages |
| 32 | + "score": [85.5, 92.0, 78.5, 88.0, 95.0], |
| 33 | + } |
| 34 | + ) |
| 35 | + |
| 36 | + |
| 37 | +@pytest.fixture |
| 38 | +def temp_csv_file(sample_data): |
| 39 | + """Creates a temporary CSV file for testing.""" |
| 40 | + with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: |
| 41 | + sample_data.to_csv(f.name, index=False) |
| 42 | + yield f.name |
| 43 | + Path(f.name).unlink(missing_ok=True) |
| 44 | + |
| 45 | + |
| 46 | +@pytest.mark.asyncio |
| 47 | +async def test_server_initialization(mcp_server): |
| 48 | + """Test that the MCP server initializes correctly.""" |
| 49 | + async with Client(mcp_server) as client: |
| 50 | + tools = await client.list_tools() |
| 51 | + expected_tools = [ |
| 52 | + "load_dataframe", |
| 53 | + "create_validator", |
| 54 | + "add_validation_step", |
| 55 | + "interrogate_validator", |
| 56 | + "get_validation_step_output", |
| 57 | + ] |
| 58 | + tool_names = [tool.name for tool in tools] |
| 59 | + for expected_tool in expected_tools: |
| 60 | + assert expected_tool in tool_names, f"Missing tool: {expected_tool}" |
| 61 | + |
| 62 | + |
| 63 | +@pytest.mark.asyncio |
| 64 | +async def test_data_loading_success(mcp_server, temp_csv_file): |
| 65 | + """Test successful data loading.""" |
| 66 | + async with Client(mcp_server) as client: |
| 67 | + result = await client.call_tool("load_dataframe", {"input_path": temp_csv_file}) |
| 68 | + |
| 69 | + assert not result.is_error, f"Data loading failed: {result.error}" |
| 70 | + assert hasattr(result.data, "df_id") |
| 71 | + # Shape is returned as a list [rows, cols] instead of tuple |
| 72 | + assert result.data.shape == [5, 4] |
| 73 | + |
| 74 | + |
| 75 | +@pytest.mark.asyncio |
| 76 | +async def test_data_loading_failure(mcp_server): |
| 77 | + """Test data loading with invalid file.""" |
| 78 | + async with Client(mcp_server) as client: |
| 79 | + try: |
| 80 | + result = await client.call_tool( |
| 81 | + "load_dataframe", {"input_path": "/nonexistent/file.csv"} |
| 82 | + ) |
| 83 | + # If we get here, the tool didn't raise an exception, check if it returned an error |
| 84 | + assert result.is_error |
| 85 | + assert "not found" in str(result.error).lower() |
| 86 | + except Exception as e: |
| 87 | + # Tool raised an exception as expected |
| 88 | + assert "not found" in str(e).lower() |
| 89 | + |
| 90 | + |
| 91 | +@pytest.mark.asyncio |
| 92 | +async def test_validation_workflow_success(mcp_server, temp_csv_file): |
| 93 | + """Test a complete successful validation workflow.""" |
| 94 | + async with Client(mcp_server) as client: |
| 95 | + # Load data |
| 96 | + load_result = await client.call_tool("load_dataframe", {"input_path": temp_csv_file}) |
| 97 | + assert not load_result.is_error |
| 98 | + df_id = load_result.data.df_id |
| 99 | + |
| 100 | + # Create validator |
| 101 | + validator_result = await client.call_tool("create_validator", {"df_id": df_id}) |
| 102 | + assert not validator_result.is_error |
| 103 | + validator_id = validator_result.data.validator_id |
| 104 | + |
| 105 | + # Add validation steps |
| 106 | + steps = [ |
| 107 | + {"validation_type": "col_vals_not_null", "params": {"columns": "id"}}, |
| 108 | + {"validation_type": "col_vals_gt", "params": {"columns": "age", "value": 0}}, |
| 109 | + { |
| 110 | + "validation_type": "col_vals_between", |
| 111 | + "params": {"columns": "score", "left": 0, "right": 100}, |
| 112 | + }, |
| 113 | + ] |
| 114 | + |
| 115 | + for step in steps: |
| 116 | + step_result = await client.call_tool( |
| 117 | + "add_validation_step", {"validator_id": validator_id, **step} |
| 118 | + ) |
| 119 | + assert not step_result.is_error |
| 120 | + |
| 121 | + # Interrogate |
| 122 | + interrogate_result = await client.call_tool( |
| 123 | + "interrogate_validator", {"validator_id": validator_id} |
| 124 | + ) |
| 125 | + assert not interrogate_result.is_error |
| 126 | + |
| 127 | + summary = interrogate_result.data["validation_summary"] |
| 128 | + assert len(summary) == 3 # Three validation steps |
| 129 | + |
| 130 | + |
| 131 | +@pytest.mark.asyncio |
| 132 | +async def test_validation_with_failures(mcp_server, temp_csv_file): |
| 133 | + """Test validation that detects data quality issues.""" |
| 134 | + async with Client(mcp_server) as client: |
| 135 | + # Load data |
| 136 | + load_result = await client.call_tool("load_dataframe", {"input_path": temp_csv_file}) |
| 137 | + df_id = load_result.data.df_id |
| 138 | + |
| 139 | + # Create validator |
| 140 | + validator_result = await client.call_tool("create_validator", {"df_id": df_id}) |
| 141 | + validator_id = validator_result.data.validator_id |
| 142 | + |
| 143 | + # Add validation that should fail for some rows |
| 144 | + step_result = await client.call_tool( |
| 145 | + "add_validation_step", |
| 146 | + { |
| 147 | + "validator_id": validator_id, |
| 148 | + "validation_type": "col_vals_between", |
| 149 | + "params": {"columns": "age", "left": 18, "right": 65}, |
| 150 | + }, |
| 151 | + ) |
| 152 | + assert not step_result.is_error |
| 153 | + |
| 154 | + # Interrogate |
| 155 | + interrogate_result = await client.call_tool( |
| 156 | + "interrogate_validator", {"validator_id": validator_id} |
| 157 | + ) |
| 158 | + assert not interrogate_result.is_error |
| 159 | + |
| 160 | + summary = interrogate_result.data["validation_summary"] |
| 161 | + assert summary[0]["f_passed"] < 1.0 # Some failures expected |
| 162 | + |
| 163 | + |
| 164 | +@pytest.mark.asyncio |
| 165 | +async def test_export_functionality(mcp_server, temp_csv_file): |
| 166 | + """Test exporting validation results.""" |
| 167 | + with tempfile.TemporaryDirectory() as temp_dir: |
| 168 | + async with Client(mcp_server) as client: |
| 169 | + # Setup validation with failures |
| 170 | + load_result = await client.call_tool("load_dataframe", {"input_path": temp_csv_file}) |
| 171 | + df_id = load_result.data.df_id |
| 172 | + |
| 173 | + validator_result = await client.call_tool("create_validator", {"df_id": df_id}) |
| 174 | + validator_id = validator_result.data.validator_id |
| 175 | + |
| 176 | + # Add validation that will fail |
| 177 | + await client.call_tool( |
| 178 | + "add_validation_step", |
| 179 | + { |
| 180 | + "validator_id": validator_id, |
| 181 | + "validation_type": "col_vals_gt", |
| 182 | + "params": {"columns": "age", "value": 100}, # Most ages will fail this |
| 183 | + }, |
| 184 | + ) |
| 185 | + |
| 186 | + # Interrogate |
| 187 | + await client.call_tool("interrogate_validator", {"validator_id": validator_id}) |
| 188 | + |
| 189 | + # Export failed rows |
| 190 | + output_path = str(Path(temp_dir) / "failed_rows.csv") |
| 191 | + export_result = await client.call_tool( |
| 192 | + "get_validation_step_output", |
| 193 | + {"validator_id": validator_id, "output_path": output_path, "step_index": 1}, |
| 194 | + ) |
| 195 | + |
| 196 | + assert not export_result.is_error |
| 197 | + assert Path(output_path).exists() |
| 198 | + |
| 199 | + # Verify exported data |
| 200 | + exported_df = pd.read_csv(output_path) |
| 201 | + assert len(exported_df) > 0 # Should have some failed rows |
| 202 | + |
| 203 | + |
| 204 | +@pytest.mark.asyncio |
| 205 | +async def test_concurrent_validators(mcp_server, temp_csv_file): |
| 206 | + """Test that multiple validators can work concurrently.""" |
| 207 | + async with Client(mcp_server) as client: |
| 208 | + # Load same data twice |
| 209 | + load_result1 = await client.call_tool("load_dataframe", {"input_path": temp_csv_file}) |
| 210 | + load_result2 = await client.call_tool("load_dataframe", {"input_path": temp_csv_file}) |
| 211 | + |
| 212 | + df_id1 = load_result1.data.df_id |
| 213 | + df_id2 = load_result2.data.df_id |
| 214 | + |
| 215 | + # Create two validators |
| 216 | + validator_result1 = await client.call_tool("create_validator", {"df_id": df_id1}) |
| 217 | + validator_result2 = await client.call_tool("create_validator", {"df_id": df_id2}) |
| 218 | + |
| 219 | + validator_id1 = validator_result1.data.validator_id |
| 220 | + validator_id2 = validator_result2.data.validator_id |
| 221 | + |
| 222 | + # They should be different |
| 223 | + assert validator_id1 != validator_id2 |
| 224 | + |
| 225 | + |
| 226 | +@pytest.mark.asyncio |
| 227 | +async def test_memory_cleanup(mcp_server, temp_csv_file): |
| 228 | + """Test that resources are properly cleaned up.""" |
| 229 | + async with Client(mcp_server) as client: |
| 230 | + # Get initial counts using the list tools |
| 231 | + initial_dfs = await client.call_tool("list_loaded_dataframes") |
| 232 | + initial_validators = await client.call_tool("list_active_validators") |
| 233 | + |
| 234 | + initial_df_count = len(initial_dfs.data["loaded_dataframes"]) |
| 235 | + initial_validator_count = len(initial_validators.data["active_validators"]) |
| 236 | + |
| 237 | + # Perform operations that create resources |
| 238 | + load_result = await client.call_tool("load_dataframe", {"input_path": temp_csv_file}) |
| 239 | + validator_result = await client.call_tool( |
| 240 | + "create_validator", {"df_id": load_result.data.df_id} |
| 241 | + ) |
| 242 | + |
| 243 | + # Check that resources were created |
| 244 | + current_dfs = await client.call_tool("list_loaded_dataframes") |
| 245 | + current_validators = await client.call_tool("list_active_validators") |
| 246 | + |
| 247 | + current_df_count = len(current_dfs.data["loaded_dataframes"]) |
| 248 | + current_validator_count = len(current_validators.data["active_validators"]) |
| 249 | + |
| 250 | + assert current_df_count > initial_df_count |
| 251 | + assert current_validator_count > initial_validator_count |
0 commit comments