SynaLinks
diff --git a/‎coverage-badge.svg‎
Lines changed: 1 addition & 1 deletion b/‎coverage-badge.svg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/Code Examples/Advanced/Knowledge Augmented Generation.md‎
Lines changed: 11 additions & 11 deletions b/‎docs/Code Examples/Advanced/Knowledge Augmented Generation.md‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎docs/Code Examples/Advanced/Knowledge Extraction.md‎
Lines changed: 20 additions & 3 deletions b/‎docs/Code Examples/Advanced/Knowledge Extraction.md‎
Lines changed: 20 additions & 3 deletions
diff --git a/‎docs/assets/multi_stage_graph.png‎
69.8 KB b/‎docs/assets/multi_stage_graph.png‎
69.8 KB
diff --git a/‎docs/assets/one_stage_graph.png‎
34.9 KB b/‎docs/assets/one_stage_graph.png‎
34.9 KB
diff --git a/‎docs/assets/relations_only_multi_stage_graph.png‎
124 KB b/‎docs/assets/relations_only_multi_stage_graph.png‎
124 KB
diff --git a/‎docs/assets/relations_only_one_stage_graph.png‎
66.3 KB b/‎docs/assets/relations_only_one_stage_graph.png‎
66.3 KB
diff --git a/‎docs/assets/two_stage_graph.png‎
42.7 KB b/‎docs/assets/two_stage_graph.png‎
42.7 KB
diff --git a/‎docs/index.md‎
Lines changed: 4 additions & 4 deletions b/‎docs/index.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎synalinks/src/knowledge_bases/database_adapters/neo4j_adapter.py‎
Lines changed: 5 additions & 10 deletions b/‎synalinks/src/knowledge_bases/database_adapters/neo4j_adapter.py‎
Lines changed: 5 additions & 10 deletions
@@ -107,7 +107,7 @@ if __name__ == "__main__":
         "name": "Vatican City",
         "label": "Country"
       },
-      "combined_score": 0.8975120754014027
+      "score": 0.8975120754014027
     },
     {
       "subj": {
@@ -119,7 +119,7 @@ if __name__ == "__main__":
         "name": "Vatican City",
         "label": "Country"
       },
-      "combined_score": 0.8975120754014027
+      "score": 0.8975120754014027
     },
     {
       "subj": {
@@ -131,7 +131,7 @@ if __name__ == "__main__":
         "name": "United Kingdom",
         "label": "Country"
       },
-      "combined_score": 0.9030690106522803
+      "score": 0.9030690106522803
     },
     {
       "subj": {
@@ -143,7 +143,7 @@ if __name__ == "__main__":
         "name": "United Kingdom",
         "label": "Country"
       },
-      "combined_score": 0.9030690106522803
+      "score": 0.9030690106522803
     },
     {
       "subj": {
@@ -155,7 +155,7 @@ if __name__ == "__main__":
         "name": "Italy",
         "label": "Country"
       },
-      "combined_score": 0.9245654142023485
+      "score": 0.9245654142023485
     },
     {
       "subj": {
@@ -167,7 +167,7 @@ if __name__ == "__main__":
         "name": "Germany",
         "label": "Country"
       },
-      "combined_score": 0.9306481791338741
+      "score": 0.9306481791338741
     },
     {
       "subj": {
@@ -179,7 +179,7 @@ if __name__ == "__main__":
         "name": "Europe",
         "label": "Country"
       },
-      "combined_score": 0.941624026613126
+      "score": 0.941624026613126
     },
     {
       "subj": {
@@ -191,7 +191,7 @@ if __name__ == "__main__":
         "name": "Europe",
         "label": "Country"
       },
-      "combined_score": 0.941624026613126
+      "score": 0.941624026613126
     },
     {
       "subj": {
@@ -203,7 +203,7 @@ if __name__ == "__main__":
         "name": "Europe",
         "label": "Country"
       },
-      "combined_score": 0.941624026613126
+      "score": 0.941624026613126
     },
     {
       "subj": {
@@ -215,7 +215,7 @@ if __name__ == "__main__":
         "name": "France",
         "label": "Country"
       },
-      "combined_score": 0.9998149700645786
+      "score": 0.9998149700645786
     }
   ],
   "answer": "The capital of France is Paris."
@@ -230,7 +230,7 @@ The relationship models (IsCapitalOf, IsLocatedIn, IsCityOf, TookPlaceIn) define
 The `return_inputs=True` parameter in both retriever and generator components ensures that information flows through your pipeline without loss. This allows downstream components to access both the original query and any intermediate results, enabling more sophisticated processing strategies.
 The instruction set for the generator provides crucial guidance for response generation. The instruction to acknowledge when search results aren't relevant prevents hallucination and maintains system reliability. You can customize these instructions based on your specific use case requirements.
 
-Don't forget that these instructions can be optimized to enhance the reasoning capabilities of your RAGs.
+Don't forget that these instructions can be optimized to enhance the reasoning capabilities of your KAGs.
 
 ## Key Takeaways
 
 
@@ -1,10 +1,12 @@
 # Knowledge Extraction
 
-Knowledge extraction from unstructured data is a cornerstone of neuro-symbolic AI applications, enabling systems to transform raw text into structured, logically queryable information. Synalinks provides a sophisticated framework that supports constrained property graph extraction and querying, offering unprecedented flexibility in how you architect your knowledge extraction pipelines.
+Knowledge extraction from unstructured data is a cornerstone of neuro-symbolic AI applications, enabling systems to transform raw text into structured, queryable information. Synalinks provides a sophisticated framework that supports constrained property graph extraction and querying, offering unprecedented flexibility in how you architect your knowledge extraction pipelines.
 
 Synalinks leverages constrained property graphs as its foundation, where the schema is rigorously enforced through constrained JSON decoding. This approach ensures data integrity while maintaining the flexibility to store extracted knowledge in dedicated graph databases for efficient querying and retrieval.
 The framework's modular design allows you to compose extraction pipelines from discrete, reusable components. Each component can be optimized independently, tested in isolation, and combined with others to create sophisticated data processing workflows.
 
+To illustrate our approach, we are going to use the same small language model with different architectures. So you can understand the pro and cons of each approach.
+
 ```python
 import synalinks
 import asyncio
@@ -77,6 +79,10 @@ async def one_stage_program(
 
 The one-stage approach minimizes latency and reduces the complexity of pipeline orchestration. However, it demands models with substantial reasoning capabilities and may not be effective for scenarios involving smaller, specialized models.
 
+#### Resulting Graph
+
+![one_stage_graph](../../assets/one_stage_graph.png)
+
 ### Two-Stage Extraction
 
 The two-stage approach represents a strategic decomposition of the extraction process, separating entity identification from relationship inference. This separation allows for specialized optimization at each stage and provides greater control.
@@ -101,7 +107,7 @@ async def two_stage_program(
     )(inputs)
 
     # inputs_with_entities = inputs AND entities (See Control Flow tutorial)
-    inputs_with_entities = inputs & entities 
+    inputs_with_entities = inputs & entities
     relations = await synalinks.Generator(
         data_model=MapRelations,
         language_model=language_model,
@@ -132,7 +138,6 @@ async def two_stage_program(
         to_folder="examples/knowledge_extraction",
         show_trainable=True,
     )
-
     return program
 
 ```
@@ -141,6 +146,10 @@ async def two_stage_program(
 
 This staged approach offers several advantages: entities can be extracted using lightweight models optimized for named entity recognition, while relationship inference can leverage more sophisticated reasoning models.
 
+#### Resulting Graph
+
+![two_stage_graph](../../assets/two_stage_graph.png)
+
 ### Multi-Stage Extraction
 
 If you have heterogeneous data models, or if you are using small language models (SLMs), you might want to consider using a separate generator for each entity or relation to extract. This approach enhances the predictions of LMs by making one call per entity or relation type, thereby reducing the scope of the task for each call and enhancing accuracy. You can then combine the results of your extraction using logical operators (`And` or `Or`), depending on whether you want your aggregation to be robust to failures from the LMs.
@@ -309,6 +318,10 @@ if __name__ == "__main__":
 
 ![multi_stage_extraction](../../assets/multi_stage_extraction.png)
 
+#### Resulting Graph
+
+![multi_stage_graph](../../assets/multi_stage_graph.png)
+
 ### Dealing with Orphan Nodes
 
 In some cases, specially if you want to use the `KnowledgeRetriever` you will have to extract nodes that are connected to each other. If intelligence is connecting the dot between your data, then orphan nodes are problematic.
@@ -439,6 +452,10 @@ if __name__ == "__main__":
 
 ![relations_only_multi_stage_extraction](../../assets/relations_only_multi_stage_extraction.png)
 
+#### Resulting Graph
+
+![relations_only_multi_stage_graph](../../assets/relations_only_multi_stage_graph.png)
+
 ## Conclusion
 
 Synalinks represents a paradigm shift in knowledge extraction, moving beyond monolithic, inflexible approaches toward a modular, production-first framework that adapts to the complexities of real-world applications.
 
@@ -38,7 +38,7 @@ async def main():
         )
 
     language_model = synalinks.LanguageModel(
-        model="ollama_chat/deepseek-r1",
+        model="ollama/mistral",
     )
 
     x0 = synalinks.Input(data_model=Query)
@@ -136,7 +136,7 @@ async def main():
             )
             return cls(language_model=language_model, **config)
 
-    language_model = synalinks.LanguageModel(model="ollama_chat/deepseek-r1")
+    language_model = synalinks.LanguageModel(model="ollama/mistral")
 
     program = ChainOfThought(language_model=language_model)
 
@@ -203,7 +203,7 @@ async def main():
             )
 
     language_model = synalinks.LanguageModel(
-        model="ollama_chat/deepseek-r1",
+        model="ollama/mistral",
     )
 
     program = ChainOfThought(
@@ -241,7 +241,7 @@ async def main():
         )
 
     language_model = synalinks.LanguageModel(
-        model="ollama_chat/deepseek-r1",
+        model="ollama/mistral",
     )
 
     program = synalinks.Sequential(
 
@@ -263,7 +263,6 @@ async def triplet_search(
         triplet_search,
         k=10,
         threshold=0.7,
-        combined_threshold=None,
     ):
         if not is_triplet_search(triplet_search):
             raise ValueError(
@@ -287,13 +286,9 @@ async def triplet_search(
         )
         object_similarity_query = triplet_search.get("object_similarity_query")
 
-        if combined_threshold is None:
-            combined_threshold = threshold
-
         params = {
             "numberOfNearestNeighbours": k,
             "threshold": threshold,
-            "combinedThreshold": combined_threshold,
             "k": k,
         }
 
@@ -480,15 +475,15 @@ async def triplet_search(
                 ]
             )
 
-        # Add geometric mean calculation for triplet returns
+        # Add geometric mean score calculation for triplets
         query_lines.append(
             (
                 "WITH subj, subj_score, relation, obj, obj_score, "
                 "sqrt(subj_score * obj_score) "
-                "AS combined_score"
+                "AS score"
             )
         )
-        where_conditions.append("combined_score >= $combinedThreshold")
+        where_conditions.append("score >= $threshold")
 
         if where_conditions:
             query_lines.append(f"WHERE {' AND '.join(where_conditions)}")
@@ -499,8 +494,8 @@ async def triplet_search(
                 "RETURN {name: subj.name, label: subj.label} AS subj,",
                 "       type(relation) AS relation,",
                 "       {name: obj.name, label: obj.label} AS obj,",
-                "       combined_score",
-                "ORDER BY combined_score DESC",
+                "       score",
+                "ORDER BY score DESC",
             ]
         )
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@ async def main():`
`38`	`38`	`)`
`39`	`39`
`40`	`40`	`language_model = synalinks.LanguageModel(`
`41`		`- model="ollama_chat/deepseek-r1",`
	`41`	`+ model="ollama/mistral",`
`42`	`42`	`)`
`43`	`43`
`44`	`44`	`x0 = synalinks.Input(data_model=Query)`
`@@ -136,7 +136,7 @@ async def main():`
`136`	`136`	`)`
`137`	`137`	`return cls(language_model=language_model, **config)`
`138`	`138`
`139`		`- language_model = synalinks.LanguageModel(model="ollama_chat/deepseek-r1")`
	`139`	`+ language_model = synalinks.LanguageModel(model="ollama/mistral")`
`140`	`140`
`141`	`141`	`program = ChainOfThought(language_model=language_model)`
`142`	`142`
`@@ -203,7 +203,7 @@ async def main():`
`203`	`203`	`)`
`204`	`204`
`205`	`205`	`language_model = synalinks.LanguageModel(`
`206`		`- model="ollama_chat/deepseek-r1",`
	`206`	`+ model="ollama/mistral",`
`207`	`207`	`)`
`208`	`208`
`209`	`209`	`program = ChainOfThought(`
`@@ -241,7 +241,7 @@ async def main():`
`241`	`241`	`)`
`242`	`242`
`243`	`243`	`language_model = synalinks.LanguageModel(`
`244`		`- model="ollama_chat/deepseek-r1",`
	`244`	`+ model="ollama/mistral",`
`245`	`245`	`)`
`246`	`246`
`247`	`247`	`program = synalinks.Sequential(`
Original file line number	Diff line number	Diff line change
`@@ -263,7 +263,6 @@ async def triplet_search(`
`263`	`263`	`triplet_search,`
`264`	`264`	`k=10,`
`265`	`265`	`threshold=0.7,`
`266`		`- combined_threshold=None,`
`267`	`266`	`):`
`268`	`267`	`if not is_triplet_search(triplet_search):`
`269`	`268`	`raise ValueError(`
`@@ -287,13 +286,9 @@ async def triplet_search(`
`287`	`286`	`)`
`288`	`287`	`object_similarity_query = triplet_search.get("object_similarity_query")`
`289`	`288`
`290`		`- if combined_threshold is None:`
`291`		`- combined_threshold = threshold`
`292`		`-`
`293`	`289`	`params = {`
`294`	`290`	`"numberOfNearestNeighbours": k,`
`295`	`291`	`"threshold": threshold,`
`296`		`- "combinedThreshold": combined_threshold,`
`297`	`292`	`"k": k,`
`298`	`293`	`}`
`299`	`294`
`@@ -480,15 +475,15 @@ async def triplet_search(`
`480`	`475`	`]`
`481`	`476`	`)`
`482`	`477`
`483`		`- # Add geometric mean calculation for triplet returns`
	`478`	`+ # Add geometric mean score calculation for triplets`
`484`	`479`	`query_lines.append(`
`485`	`480`	`(`
`486`	`481`	`"WITH subj, subj_score, relation, obj, obj_score, "`
`487`	`482`	`"sqrt(subj_score * obj_score) "`
`488`		`- "AS combined_score"`
	`483`	`+ "AS score"`
`489`	`484`	`)`
`490`	`485`	`)`
`491`		`- where_conditions.append("combined_score >= $combinedThreshold")`
	`486`	`+ where_conditions.append("score >= $threshold")`
`492`	`487`
`493`	`488`	`if where_conditions:`
`494`	`489`	`query_lines.append(f"WHERE {' AND '.join(where_conditions)}")`
`@@ -499,8 +494,8 @@ async def triplet_search(`
`499`	`494`	`"RETURN {name: subj.name, label: subj.label} AS subj,",`
`500`	`495`	`" type(relation) AS relation,",`
`501`	`496`	`" {name: obj.name, label: obj.label} AS obj,",`
`502`		`- " combined_score",`
`503`		`- "ORDER BY combined_score DESC",`
	`497`	`+ " score",`
	`498`	`+ "ORDER BY score DESC",`
`504`	`499`	`]`
`505`	`500`	`)`
`506`	`501`