feat(frontend): Refine Gap Analysis link strength categorization (Weak threshold 20 -> 7) (#717)

PRAteek-singHWY · web-flow · commit ba4b27d2dc54 · 2026-02-14T20:21:37.000Z
* Adjust gap analysis strength categorization for weak links

* feat: implement feature toggle GAP_ANALYSIS_OPTIMIZED per feedback

* style: fix black formatting in db.py
diff --git a/application/config.py b/application/config.py
@@ -8,6 +8,10 @@ class Config:
     SQLALCHEMY_RECORD_QUERIES = False
     ITEMS_PER_PAGE = 20
     SLOW_DB_QUERY_TIME = 0.5
+    # Feature toggle for gap analysis optimization (default: False for safety)
+    GAP_ANALYSIS_OPTIMIZED = (
+        os.environ.get("GAP_ANALYSIS_OPTIMIZED", "False").lower() == "true"
+    )
 
 
 class DevelopmentConfig(Config):
diff --git a/application/database/db.py b/application/database/db.py
@@ -561,6 +561,175 @@ def link_CRE_to_Node(self, CRE_id, node_id, link_type):
         raise Exception(f"Unknown relation type {link_type} for Nodes to CREs")
 
     @classmethod
+    def gap_analysis(self, name_1, name_2):
+        """
+        Gap analysis with feature toggle support.
+
+        Toggle between original exhaustive traversal (default) and
+        optimized tiered pruning (opt-in via GAP_ANALYSIS_OPTIMIZED env var).
+        """
+        from application.config import Config
+
+        if Config.GAP_ANALYSIS_OPTIMIZED:
+            logger.info(
+                f"Gap Analysis: Using OPTIMIZED tiered pruning for {name_1}>>{name_2}"
+            )
+            return self._gap_analysis_optimized(name_1, name_2)
+        else:
+            logger.info(
+                f"Gap Analysis: Using ORIGINAL exhaustive traversal for {name_1}>>{name_2}"
+            )
+            return self._gap_analysis_original(name_1, name_2)
+
+    @classmethod
+    def _gap_analysis_optimized(self, name_1, name_2):
+        """
+        OPTIMIZED: Tiered Pruning Strategy with Early Exit
+
+        Tier 1: Strong links only (LINKED_TO, SAME, AUTOMATICALLY_LINKED_TO)
+        Tier 2: Add hierarchical (CONTAINS) if Tier 1 empty
+        Tier 3: Fallback to wildcard if both tiers empty
+        """
+        logger.info(
+            f"Performing OPTIMIZED GraphDB queries for gap analysis {name_1}>>{name_2}"
+        )
+        base_standard = NeoStandard.nodes.filter(name=name_1)
+        denylist = ["Cross-cutting concerns"]
+
+        # Tier 1: Strong Links (LINKED_TO, SAME, AUTOMATICALLY_LINKED_TO)
+        path_records, _ = db.cypher_query(
+            """
+         MATCH (BaseStandard:NeoStandard {name: $name1})
+         MATCH (CompareStandard:NeoStandard {name: $name2})
+         MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|AUTOMATICALLY_LINKED_TO|SAME)*..20]-(CompareStandard))
+         WITH p
+         WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist)
+         RETURN p
+            """,
+            {"name1": name_1, "name2": name_2, "denylist": denylist},
+            resolve_objects=True,
+        )
+
+        # If strict strong links found, return early (Pruning)
+        if path_records and len(path_records) > 0:
+            logger.info(
+                f"Gap Analysis: Tier 1 (Strong) found {len(path_records)} paths. Pruning remainder."
+            )
+            return self._format_gap_analysis_response(base_standard, path_records)
+
+        # Tier 2: Medium Links (Add CONTAINS to the mix)
+        path_records, _ = db.cypher_query(
+            """
+         MATCH (BaseStandard:NeoStandard {name: $name1})
+         MATCH (CompareStandard:NeoStandard {name: $name2})
+         MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|AUTOMATICALLY_LINKED_TO|SAME|CONTAINS)*..20]-(CompareStandard))
+         WITH p
+         WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist)
+         RETURN p
+            """,
+            {"name1": name_1, "name2": name_2, "denylist": denylist},
+            resolve_objects=True,
+        )
+
+        if path_records and len(path_records) > 0:
+            logger.info(
+                f"Gap Analysis: Tier 2 (Medium) found {len(path_records)} paths. Pruning remainder."
+            )
+            return self._format_gap_analysis_response(base_standard, path_records)
+
+        # Tier 3: Weak/All Links (Wildcard - The original expensive query)
+        logger.info(
+            "Gap Analysis: Tiers 1 & 2 empty. Executing Tier 3 (Wildcard search)."
+        )
+        path_records_all, _ = db.cypher_query(
+            """
+         MATCH (BaseStandard:NeoStandard {name: $name1})
+         MATCH (CompareStandard:NeoStandard {name: $name2})
+         MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard))
+         WITH p
+         WHERE length(p) > 1 AND ALL (n in NODES(p) where (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist) 
+         RETURN p
+            """,
+            {"name1": name_1, "name2": name_2, "denylist": denylist},
+            resolve_objects=True,
+        )
+
+        return self._format_gap_analysis_response(base_standard, path_records_all)
+
+    @classmethod
+    def _gap_analysis_original(self, name_1, name_2):
+        """
+        ORIGINAL: Exhaustive traversal (always runs both queries)
+
+        This is the safe default - maintains backward compatibility.
+        """
+        logger.info(
+            f"Performing ORIGINAL GraphDB queries for gap analysis {name_1}>>{name_2}"
+        )
+        base_standard = NeoStandard.nodes.filter(name=name_1)
+        denylist = ["Cross-cutting concerns"]
+        from datetime import datetime
+
+        # Query 1: Wildcard (all relationships)
+        path_records_all, _ = db.cypher_query(
+            """
+         MATCH (BaseStandard:NeoStandard {name: $name1})
+         MATCH (CompareStandard:NeoStandard {name: $name2})
+         MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard))
+         WITH p
+         WHERE length(p) > 1 AND ALL (n in NODES(p) where (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist) 
+         RETURN p
+            """,
+            {"name1": name_1, "name2": name_2, "denylist": denylist},
+            resolve_objects=True,
+        )
+
+        # Query 2: Filtered (LINKED_TO, AUTOMATICALLY_LINKED_TO, CONTAINS)
+        path_records, _ = db.cypher_query(
+            """
+         MATCH (BaseStandard:NeoStandard {name: $name1})
+         MATCH (CompareStandard:NeoStandard {name: $name2})
+         MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|AUTOMATICALLY_LINKED_TO|CONTAINS)*..20]-(CompareStandard))
+         WITH p
+         WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist)
+         RETURN p
+            """,
+            {"name1": name_1, "name2": name_2, "denylist": denylist},
+            resolve_objects=True,
+        )
+
+        # Combine results (original behavior)
+        def format_segment(seg: StructuredRel, nodes):
+            relation_map = {
+                RelatedRel: "RELATED",
+                ContainsRel: "CONTAINS",
+                LinkedToRel: "LINKED_TO",
+                AutoLinkedToRel: "AUTOMATICALLY_LINKED_TO",
+            }
+            start_node = [
+                node for node in nodes if node.element_id == seg._start_node_element_id
+            ][0]
+            end_node = [
+                node for node in nodes if node.element_id == seg._end_node_element_id
+            ][0]
+
+            return {
+                "start": NEO_DB.parse_node_no_links(start_node),
+                "end": NEO_DB.parse_node_no_links(end_node),
+                "relationship": relation_map[type(seg)],
+            }
+
+        def format_path_record(rec):
+            return {
+                "start": NEO_DB.parse_node_no_links(rec.start_node),
+                "end": NEO_DB.parse_node_no_links(rec.end_node),
+                "path": [format_segment(seg, rec.nodes) for seg in rec.relationships],
+            }
+
+        return [NEO_DB.parse_node_no_links(rec) for rec in base_standard], [
+            format_path_record(rec[0]) for rec in (path_records + path_records_all)
+        ]
+
     def gap_analysis(self, name_1, name_2):
         logger.info(f"Performing GraphDB queries for gap analysis {name_1}>>{name_2}")
         base_standard = NeoStandard.nodes.filter(name=name_1)
diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx
@@ -44,14 +44,14 @@ function useQuery() {
 const GetStrength = (score) => {
   if (score == 0) return 'Direct';
   if (score <= GA_STRONG_UPPER_LIMIT) return 'Strong';
-  if (score >= 20) return 'Weak';
+  if (score >= 7) return 'Weak';
   return 'Average';
 };
 
 const GetStrengthColor = (score) => {
   if (score === 0) return 'darkgreen';
   if (score <= GA_STRONG_UPPER_LIMIT) return '#93C54B';
-  if (score >= 20) return 'Red';
+  if (score >= 7) return 'Red';
   return 'Orange';
 };
 
@@ -102,7 +102,7 @@ const GetResultLine = (path, gapAnalysis, key) => {
             <b style={{ color: GetStrengthColor(6) }}>{GetStrength(6)}</b>: Connected likely to have partial
             overlap
             <br />
-            <b style={{ color: GetStrengthColor(22) }}>{GetStrength(22)}</b>: Weakly connected likely to have
+            <b style={{ color: GetStrengthColor(7) }}>{GetStrength(7)}</b>: Weakly connected likely to have
             small or no overlap
           </Popup.Content>
         </Popup>