|
1112 | 1112 | "relatedFiles": ["tests/test_cross_reference.py"], |
1113 | 1113 | "codeExample": "# Wrong:\nresponse = client.post('/api/endpoint', json={'params': {'crate': 'serde'}})\n\n# Correct:\nresponse = client.post('/api/endpoint', json={'crate': 'serde'})" |
1114 | 1114 | }, |
| 1115 | + { |
| 1116 | + "error": "MMR tests failing due to incorrect embedding dimensions", |
| 1117 | + "rootCause": "Test mock embeddings don't match expected model dimensions, causing shape mismatches in cosine similarity calculations", |
| 1118 | + "solution": "MMR tests need mock embeddings matching expected dimensions (384 for bge-small-en-v1.5). Use numpy arrays with correct shape for test embedding vectors.", |
| 1119 | + "context": "Testing MMR diversification algorithm with semantic similarity", |
| 1120 | + "implementation": [ |
| 1121 | + "Mock embeddings must match production model dimensions (384-dimensional vectors)", |
| 1122 | + "Use np.random.rand(384) or predefined test vectors with correct shape", |
| 1123 | + "Ensure embedding alignment is maintained during test result sorting" |
| 1124 | + ], |
| 1125 | + "pattern": "Match test data dimensions to production model requirements", |
| 1126 | + "dateEncountered": "2025-08-10", |
| 1127 | + "relatedFiles": ["tests/test_database.py", "src/docsrs_mcp/database.py"], |
| 1128 | + "codeExample": "# Correct test embedding setup:\nmock_embeddings = [np.random.rand(384) for _ in range(len(results))]\n# NOT: mock_embeddings = [np.array([0.1, 0.2, 0.3])] # Wrong dimensions" |
| 1129 | + }, |
| 1130 | + { |
| 1131 | + "error": "Function signature changes breaking existing test calls", |
| 1132 | + "rootCause": "When adding new parameters to functions (like embeddings to MMR), existing test calls fail with missing argument errors", |
| 1133 | + "solution": "When changing function signatures, systematically update all test calls. Use grep to find all test invocations before making signature changes.", |
| 1134 | + "context": "Adding embeddings parameter to MMR diversification functions", |
| 1135 | + "implementation": [ |
| 1136 | + "Search codebase for function calls before signature changes: grep -r 'function_name(' tests/", |
| 1137 | + "Update all test invocations with new required parameters", |
| 1138 | + "Consider backward compatibility with default parameters when possible" |
| 1139 | + ], |
| 1140 | + "pattern": "Comprehensive test update when changing function signatures", |
| 1141 | + "dateEncountered": "2025-08-10", |
| 1142 | + "relatedFiles": ["tests/", "src/docsrs_mcp/database.py"], |
| 1143 | + "codeExample": "# Before signature change, find all calls:\n# grep -r '_apply_mmr_diversification(' tests/\n# Then update each call with new embeddings parameter" |
| 1144 | + }, |
| 1145 | + { |
| 1146 | + "error": "Production testing confusion with server ports and API paths", |
| 1147 | + "rootCause": "Production server testing requires understanding correct port and API path structure", |
| 1148 | + "solution": "Production testing with --mode rest flag starts server on port 8000. API endpoints are under /mcp/tools/ path (e.g., /mcp/tools/search_items). Use curl or HTTP clients with correct base URL.", |
| 1149 | + "context": "Testing MMR and other features in production-like environment", |
| 1150 | + "implementation": [ |
| 1151 | + "Start server: uv run docsrs-mcp --mode rest (listens on port 8000)", |
| 1152 | + "API endpoints: http://localhost:8000/mcp/tools/{tool_name}", |
| 1153 | + "Example: curl -X POST http://localhost:8000/mcp/tools/search_items -H 'Content-Type: application/json' -d '{...}'" |
| 1154 | + ], |
| 1155 | + "pattern": "Use correct port and path structure for production API testing", |
| 1156 | + "dateEncountered": "2025-08-10", |
| 1157 | + "relatedFiles": ["src/docsrs_mcp/app.py"], |
| 1158 | + "codeExample": "# Correct production test URL:\ncurl -X POST http://localhost:8000/mcp/tools/search_items -H 'Content-Type: application/json' -d '{\"query\": \"test\", \"k\": 5}'" |
| 1159 | + }, |
1115 | 1160 | { |
1116 | 1161 | "error": "Database unique constraint violation - composite key needed for cross-references", |
1117 | 1162 | "rootCause": "Original UNIQUE constraint on (crate_id, alias_path) was insufficient for cross-references where the same alias can point to multiple actual paths with different link types", |
|
1929 | 1974 | "scalability": "Performance remains consistent with larger path datasets" |
1930 | 1975 | } |
1931 | 1976 | }, |
| 1977 | + { |
| 1978 | + "insight": "MMR enhancement requires embedding alignment during result sorting", |
| 1979 | + "context": "Implementing semantic similarity in MMR diversification algorithm", |
| 1980 | + "details": "When adding semantic similarity to MMR, embeddings must be passed alongside results and kept aligned during sorting operations. Use zip/unzip pattern to maintain correspondence between results and their embeddings. Cosine similarity calculation: np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))", |
| 1981 | + "impact": "Prevents misaligned embeddings that would corrupt similarity calculations and result ranking", |
| 1982 | + "dateMeasured": "2025-08-10", |
| 1983 | + "relatedFiles": ["src/docsrs_mcp/database.py"], |
| 1984 | + "metrics": { |
| 1985 | + "alignmentPattern": "zip(results, embeddings) → sort → unzip back to separate lists", |
| 1986 | + "similarityFormula": "np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))", |
| 1987 | + "integrityRequirement": "Embeddings must remain 1:1 aligned with search results", |
| 1988 | + "performanceNote": "Alignment operations add minimal overhead to MMR calculations" |
| 1989 | + } |
| 1990 | + }, |
| 1991 | + { |
| 1992 | + "insight": "Query preprocessing with term expansion benefits from order-preserving deduplication", |
| 1993 | + "context": "Implementing British-to-American spelling normalization and term expansion", |
| 1994 | + "details": "Term expansion should preserve original terms while adding expansions. Use set for deduplication but maintain original order. Configure expansions in config.py for maintainability rather than hardcoding in preprocessing logic", |
| 1995 | + "impact": "Maintains query intent while expanding coverage, prevents duplicate terms from affecting relevance scoring", |
| 1996 | + "dateMeasured": "2025-08-10", |
| 1997 | + "relatedFiles": ["src/docsrs_mcp/app.py", "src/docsrs_mcp/config.py"], |
| 1998 | + "metrics": { |
| 1999 | + "expansionPattern": "Original terms preserved + normalized variants added", |
| 2000 | + "deduplicationMethod": "Set-based with order preservation", |
| 2001 | + "configurationLocation": "config.py for maintainable expansion rules", |
| 2002 | + "queryIntegrityMaintenance": "Original intent preserved while expanding coverage" |
| 2003 | + } |
| 2004 | + }, |
1932 | 2005 | { |
1933 | 2006 | "insight": "Path caching with 5-minute TTL crucial for fuzzy search performance", |
1934 | 2007 | "context": "Caching strategy optimization for repeated fuzzy path lookups", |
|
2160 | 2233 | "Can be extended with additional spelling patterns as needed" |
2161 | 2234 | ], |
2162 | 2235 | "dateAdded": "2025-08-10" |
| 2236 | + }, |
| 2237 | + { |
| 2238 | + "pattern": "Adaptive TTL caching", |
| 2239 | + "implementation": "Implement complexity-based TTL with popularity tracking", |
| 2240 | + "description": "Cache TTL should adapt to query complexity and usage patterns for optimal performance", |
| 2241 | + "details": { |
| 2242 | + "simpleQueries": "Low complexity queries → longer TTL (1 hour) for better cache utilization", |
| 2243 | + "complexQueries": "High complexity queries with many filters → shorter TTL (15 minutes) to maintain freshness", |
| 2244 | + "popularityTracking": "Track hit counts for popularity-based TTL extension", |
| 2245 | + "storagePattern": "Store TTL with cache entries: (timestamp, results, ttl) for per-entry control" |
| 2246 | + } |
2163 | 2247 | } |
2164 | 2248 | ] |
2165 | 2249 | }, |
|
2187 | 2271 | "tradeoff": "Slight staleness acceptable for 90% cache hit rate", |
2188 | 2272 | "fallbackBehavior": "Direct database lookup on cache miss" |
2189 | 2273 | } |
| 2274 | + }, |
| 2275 | + { |
| 2276 | + "issue": "Adaptive TTL caching implementation for query complexity optimization", |
| 2277 | + "solution": "Implement complexity-based TTL calculation with hit count tracking. Simple queries (low filter count) get longer TTL (1 hour), complex queries get shorter TTL (15 minutes). Track popularity for TTL extension opportunities.", |
| 2278 | + "lesson": "Cache TTL should adapt to query complexity - simple queries benefit from longer caching while complex queries need fresher data. Popularity tracking enables intelligent TTL extension for frequently accessed content.", |
| 2279 | + "context": "Search result caching optimization to balance performance with data freshness", |
| 2280 | + "dateEncountered": "2025-08-10", |
| 2281 | + "relatedFiles": ["src/docsrs_mcp/database.py", "src/docsrs_mcp/app.py"], |
| 2282 | + "performanceImpact": { |
| 2283 | + "simpleQueryTTL": "1 hour for low complexity queries", |
| 2284 | + "complexQueryTTL": "15 minutes for high filter count queries", |
| 2285 | + "popularityBonus": "Hit count tracking enables TTL extension for popular queries", |
| 2286 | + "storageOverhead": "Minimal - store (timestamp, results, ttl) tuple per cache entry" |
| 2287 | + } |
2190 | 2288 | } |
2191 | 2289 | ] |
2192 | 2290 | }, |
|
2519 | 2617 | ], |
2520 | 2618 | "ignorableWarnings": [ |
2521 | 2619 | "PLR0912 (too-many-branches): Can be ignored for documentation processing tasks", |
2522 | | - "PLR0915 (too-many-statements): Can be ignored for documentation processing tasks" |
| 2620 | + "PLR0915 (too-many-statements): Can be ignored for documentation processing tasks and complex algorithms like MMR diversification" |
2523 | 2621 | ], |
| 2622 | + "lintingBestPractices": { |
| 2623 | + "formatCommand": "uv run ruff format", |
| 2624 | + "lintCommand": "uv run ruff check --fix", |
| 2625 | + "acceptableWarnings": "PLR0915 (too many statements) may be acceptable in complex algorithms like MMR diversification with multiple calculation steps", |
| 2626 | + "workflow": "Run formatting before linting to avoid style-related lint errors" |
| 2627 | + }, |
2524 | 2628 | "performanceNotes": [ |
2525 | 2629 | "10-100x faster than traditional Python tools", |
2526 | 2630 | "Processes entire codebase in milliseconds", |
|
0 commit comments