@@ -561,6 +561,175 @@ def link_CRE_to_Node(self, CRE_id, node_id, link_type):
561561 raise Exception (f"Unknown relation type { link_type } for Nodes to CREs" )
562562
563563 @classmethod
564+ def gap_analysis (self , name_1 , name_2 ):
565+ """
566+ Gap analysis with feature toggle support.
567+
568+ Toggle between original exhaustive traversal (default) and
569+ optimized tiered pruning (opt-in via GAP_ANALYSIS_OPTIMIZED env var).
570+ """
571+ from application .config import Config
572+
573+ if Config .GAP_ANALYSIS_OPTIMIZED :
574+ logger .info (
575+ f"Gap Analysis: Using OPTIMIZED tiered pruning for { name_1 } >>{ name_2 } "
576+ )
577+ return self ._gap_analysis_optimized (name_1 , name_2 )
578+ else :
579+ logger .info (
580+ f"Gap Analysis: Using ORIGINAL exhaustive traversal for { name_1 } >>{ name_2 } "
581+ )
582+ return self ._gap_analysis_original (name_1 , name_2 )
583+
584+ @classmethod
585+ def _gap_analysis_optimized (self , name_1 , name_2 ):
586+ """
587+ OPTIMIZED: Tiered Pruning Strategy with Early Exit
588+
589+ Tier 1: Strong links only (LINKED_TO, SAME, AUTOMATICALLY_LINKED_TO)
590+ Tier 2: Add hierarchical (CONTAINS) if Tier 1 empty
591+ Tier 3: Fallback to wildcard if both tiers empty
592+ """
593+ logger .info (
594+ f"Performing OPTIMIZED GraphDB queries for gap analysis { name_1 } >>{ name_2 } "
595+ )
596+ base_standard = NeoStandard .nodes .filter (name = name_1 )
597+ denylist = ["Cross-cutting concerns" ]
598+
599+ # Tier 1: Strong Links (LINKED_TO, SAME, AUTOMATICALLY_LINKED_TO)
600+ path_records , _ = db .cypher_query (
601+ """
602+ MATCH (BaseStandard:NeoStandard {name: $name1})
603+ MATCH (CompareStandard:NeoStandard {name: $name2})
604+ MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|AUTOMATICALLY_LINKED_TO|SAME)*..20]-(CompareStandard))
605+ WITH p
606+ WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist)
607+ RETURN p
608+ """ ,
609+ {"name1" : name_1 , "name2" : name_2 , "denylist" : denylist },
610+ resolve_objects = True ,
611+ )
612+
613+ # If strict strong links found, return early (Pruning)
614+ if path_records and len (path_records ) > 0 :
615+ logger .info (
616+ f"Gap Analysis: Tier 1 (Strong) found { len (path_records )} paths. Pruning remainder."
617+ )
618+ return self ._format_gap_analysis_response (base_standard , path_records )
619+
620+ # Tier 2: Medium Links (Add CONTAINS to the mix)
621+ path_records , _ = db .cypher_query (
622+ """
623+ MATCH (BaseStandard:NeoStandard {name: $name1})
624+ MATCH (CompareStandard:NeoStandard {name: $name2})
625+ MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|AUTOMATICALLY_LINKED_TO|SAME|CONTAINS)*..20]-(CompareStandard))
626+ WITH p
627+ WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist)
628+ RETURN p
629+ """ ,
630+ {"name1" : name_1 , "name2" : name_2 , "denylist" : denylist },
631+ resolve_objects = True ,
632+ )
633+
634+ if path_records and len (path_records ) > 0 :
635+ logger .info (
636+ f"Gap Analysis: Tier 2 (Medium) found { len (path_records )} paths. Pruning remainder."
637+ )
638+ return self ._format_gap_analysis_response (base_standard , path_records )
639+
640+ # Tier 3: Weak/All Links (Wildcard - The original expensive query)
641+ logger .info (
642+ "Gap Analysis: Tiers 1 & 2 empty. Executing Tier 3 (Wildcard search)."
643+ )
644+ path_records_all , _ = db .cypher_query (
645+ """
646+ MATCH (BaseStandard:NeoStandard {name: $name1})
647+ MATCH (CompareStandard:NeoStandard {name: $name2})
648+ MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard))
649+ WITH p
650+ WHERE length(p) > 1 AND ALL (n in NODES(p) where (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist)
651+ RETURN p
652+ """ ,
653+ {"name1" : name_1 , "name2" : name_2 , "denylist" : denylist },
654+ resolve_objects = True ,
655+ )
656+
657+ return self ._format_gap_analysis_response (base_standard , path_records_all )
658+
659+ @classmethod
660+ def _gap_analysis_original (self , name_1 , name_2 ):
661+ """
662+ ORIGINAL: Exhaustive traversal (always runs both queries)
663+
664+ This is the safe default - maintains backward compatibility.
665+ """
666+ logger .info (
667+ f"Performing ORIGINAL GraphDB queries for gap analysis { name_1 } >>{ name_2 } "
668+ )
669+ base_standard = NeoStandard .nodes .filter (name = name_1 )
670+ denylist = ["Cross-cutting concerns" ]
671+ from datetime import datetime
672+
673+ # Query 1: Wildcard (all relationships)
674+ path_records_all , _ = db .cypher_query (
675+ """
676+ MATCH (BaseStandard:NeoStandard {name: $name1})
677+ MATCH (CompareStandard:NeoStandard {name: $name2})
678+ MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard))
679+ WITH p
680+ WHERE length(p) > 1 AND ALL (n in NODES(p) where (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist)
681+ RETURN p
682+ """ ,
683+ {"name1" : name_1 , "name2" : name_2 , "denylist" : denylist },
684+ resolve_objects = True ,
685+ )
686+
687+ # Query 2: Filtered (LINKED_TO, AUTOMATICALLY_LINKED_TO, CONTAINS)
688+ path_records , _ = db .cypher_query (
689+ """
690+ MATCH (BaseStandard:NeoStandard {name: $name1})
691+ MATCH (CompareStandard:NeoStandard {name: $name2})
692+ MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|AUTOMATICALLY_LINKED_TO|CONTAINS)*..20]-(CompareStandard))
693+ WITH p
694+ WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist)
695+ RETURN p
696+ """ ,
697+ {"name1" : name_1 , "name2" : name_2 , "denylist" : denylist },
698+ resolve_objects = True ,
699+ )
700+
701+ # Combine results (original behavior)
702+ def format_segment (seg : StructuredRel , nodes ):
703+ relation_map = {
704+ RelatedRel : "RELATED" ,
705+ ContainsRel : "CONTAINS" ,
706+ LinkedToRel : "LINKED_TO" ,
707+ AutoLinkedToRel : "AUTOMATICALLY_LINKED_TO" ,
708+ }
709+ start_node = [
710+ node for node in nodes if node .element_id == seg ._start_node_element_id
711+ ][0 ]
712+ end_node = [
713+ node for node in nodes if node .element_id == seg ._end_node_element_id
714+ ][0 ]
715+
716+ return {
717+ "start" : NEO_DB .parse_node_no_links (start_node ),
718+ "end" : NEO_DB .parse_node_no_links (end_node ),
719+ "relationship" : relation_map [type (seg )],
720+ }
721+
722+ def format_path_record (rec ):
723+ return {
724+ "start" : NEO_DB .parse_node_no_links (rec .start_node ),
725+ "end" : NEO_DB .parse_node_no_links (rec .end_node ),
726+ "path" : [format_segment (seg , rec .nodes ) for seg in rec .relationships ],
727+ }
728+
729+ return [NEO_DB .parse_node_no_links (rec ) for rec in base_standard ], [
730+ format_path_record (rec [0 ]) for rec in (path_records + path_records_all )
731+ ]
732+
564733 def gap_analysis (self , name_1 , name_2 ):
565734 logger .info (f"Performing GraphDB queries for gap analysis { name_1 } >>{ name_2 } " )
566735 base_standard = NeoStandard .nodes .filter (name = name_1 )
0 commit comments