Intugle · prinkanintugle · Sep 15, 2025 · Sep 15, 2025 · Sep 15, 2025 · Sep 15, 2025
diff --git a/docsite/docs/core-concepts/dataset.md b/docsite/docs/core-concepts/dataset.md
@@ -40,10 +40,12 @@ The library organizes metadata using Pydantic models, but you can access it thro
     -   `[column_name].tags: Optional[List[str]]`
     -   `[column_name].profiling_metrics: Optional[ColumnProfilingMetrics]`
 -   **`ColumnProfilingMetrics`**: Detailed statistics for a column.
-    -   `count: Optional[int]`
-    -   `null_count: Optional[int]`
-    -   `distinct_count: Optional[int]`
-    -   `sample_data: Optional[List[Any]]`
+    -   `.count: Optional[int]`
+    -   `.null_count: Optional[int]`
+    -   `.distinct_count: Optional[int]`
+    -   `.sample_data: Optional[List[Any]]`
+    -   `.uniqueness: Optional[float]` (Read-only property)
+    -   `.completeness: Optional[float]` (Read-only property)
 
 #### Example of accessing metadata
 
@@ -64,6 +66,8 @@ print(f"Column Description: {email_column.description}")
 metrics = email_column.profiling_metrics
 if metrics:
     print(f"Distinct Count: {metrics.distinct_count}")
+    print(f"Uniqueness: {metrics.uniqueness}")
+    print(f"Completeness: {metrics.completeness}")
 ```
 
 ### Automatic caching
@@ -75,7 +79,7 @@ The `DataSet` object avoids redundant work. When you initialize a `DataSet`, it
 You can run the analysis pipeline step-by-step for more granular control. Each of these methods includes a `save=True` option to persist the results of that specific stage.
 
 ```python
-from intugle import DataSet
+from intugle.analysis.models import DataSet
 
 # Initialize the dataset
 data_source = {"path": "path/to/my_data.csv", "type": "csv"}
@@ -124,4 +128,4 @@ profiles = dataset.profiling_df
 
 # Display the first 5 rows
 print(profiles.head())
-```
+```
diff --git a/docsite/docs/core-concepts/knowledge-builder.md b/docsite/docs/core-concepts/knowledge-builder.md
@@ -35,7 +35,8 @@ You can initialize the `KnowledgeBuilder` in two ways:
 2.  **With a List of `DataSet` Objects**: If you have already created `DataSet` objects, you can pass a list of them directly.
 
     ```python
-    from intugle import KnowledgeBuilder, DataSet
+    from intugle.analysis.models import DataSet
+    from intugle import KnowledgeBuilder
 
     # Create DataSet objects from file-based sources
     customers_data = {"path": "path/to/customers.csv", "type": "csv"}
@@ -129,7 +130,40 @@ link_predictor = kb.link_predictor
 print(f"Primary Key for customers: {customers_dataset.source_table_model.description}")
 print("Discovered Links:")
 print(link_predictor.get_links_df())
-
 ```
+
 Learn more about what you can do with these objects. See the [DataSet](./dataset.md) and [Link Prediction](./link-prediction.md) documentation.
 
+## Utility DataFrames
+
+The `KnowledgeBuilder` provides three convenient properties that consolidate the results from all processed datasets into single Pandas DataFrames.
+
+### `profiling_df`
+
+Returns a DataFrame containing the full profiling metrics for every column across all datasets.
+
+```python
+# Get a single DataFrame of all column profiles
+all_profiles = kb.profiling_df
+print(all_profiles.head())
+```
+
+### `links_df`
+
+A shortcut to the `get_links_df()` method on the `LinkPredictor`, this property returns a DataFrame of all discovered relationships.
+
+```python
+# Get a DataFrame of all predicted links
+all_links = kb.links_df
+print(all_links)
+```
+
+### `glossary_df`
+
+Returns a DataFrame that serves as a consolidated business glossary, listing the table name, column name, description, and tags for every column across all datasets.
+
+```python
+# Get a single, unified business glossary
+full_glossary = kb.glossary_df
+print(full_glossary.head())
+```
diff --git a/docsite/docs/core-concepts/link-prediction.md b/docsite/docs/core-concepts/link-prediction.md
@@ -28,7 +28,9 @@ links_list = predictor_instance.links
 To use the `LinkPredictor` manually, you must give it a list of fully profiled `DataSet` objects.
 
 ```python
-from intugle import DataSet, LinkPredictor
+from intugle.analysis.models import DataSet,
+from intugle.link_predictor.predictor import LinkPredictor
+
 
 # 1. Initialize and fully profile your DataSet objects first
 customers_data = {"path": "path/to/customers.csv", "type": "csv"}

diff --git a/notebooks/quickstart_healthcare.ipynb b/notebooks/quickstart_healthcare.ipynb
@@ -2916,7 +2916,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "0aa894eb",
    "metadata": {},
    "outputs": [
@@ -3610,8 +3610,8 @@
     }
    ],
    "source": [
-    "allergies_dataset = kb.datasets['patients']\n",
-    "allergies_dataset.profiling_df\n"
+    "patients_dataset = kb.datasets['patients']\n",
+    "patients_dataset.profiling_df"
    ]
   },
   {

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "intugle"
-version = "0.1.6"
+version = "0.1.7"
 authors = [
     { name="Intugle", email="[email protected]" },
 ]

diff --git a/src/intugle/knowledge_builder.py b/src/intugle/knowledge_builder.py
@@ -4,6 +4,8 @@
 
 from typing import TYPE_CHECKING, Any, Awaitable, Dict, List, TypeVar
 
+import pandas as pd
+
 from intugle.analysis.models import DataSet
 from intugle.core.console import console, success_style
 from intugle.link_predictor.predictor import LinkPredictor
@@ -125,6 +127,35 @@ def build(self, force_recreate: bool = False):
 
         return self
 
+    @property
+    def profiling_df(self) -> pd.DataFrame:
+        """Returns a consolidated DataFrame of profiling metrics for all datasets."""
+        all_profiles = [dataset.profiling_df for dataset in self.datasets.values()]
+        return pd.concat(all_profiles, ignore_index=True)
+
+    @property
+    def links_df(self) -> pd.DataFrame:
+        """Returns the predicted links as a pandas DataFrame."""
+        if hasattr(self, "link_predictor"):
+            return self.link_predictor.get_links_df()
+        return pd.DataFrame()
+
+    @property
+    def glossary_df(self) -> pd.DataFrame:
+        """Returns a consolidated DataFrame of glossary information for all datasets."""
+        glossary_data = []
+        for dataset in self.datasets.values():
+            for column in dataset.source_table_model.columns:
+                glossary_data.append(
+                    {
+                        "table_name": dataset.name,
+                        "column_name": column.name,
+                        "column_description": column.description,
+                        "column_tags": column.tags,
+                    }
+                )
+        return pd.DataFrame(glossary_data)
+
     def initialize_semantic_search(self):
         """Initialize the semantic search engine."""
         try:
@@ -150,5 +181,4 @@ def search(self, query: str):
             return _run_async_in_sync(search_client.search(query))
         except Exception as e:
             log.error(f"Could not perform semantic search: {e}")
-            raise e
-
+            raise e
diff --git a/src/intugle/models/resources/model.py b/src/intugle/models/resources/model.py
@@ -13,6 +13,20 @@ class ColumnProfilingMetrics(SchemaBase):
     sample_data: Optional[List[Any]] = Field(default_factory=list)
     dtype_sample: Optional[List[Any]] = Field(default_factory=list, exclude=True)
 
+    @property
+    def uniqueness(self) -> Optional[float]:
+        """The ratio of distinct values to total count."""
+        if self.count is not None and self.distinct_count is not None and self.count > 0:
+            return self.distinct_count / self.count
+        return None
+
+    @property
+    def completeness(self) -> Optional[float]:
+        """The ratio of non-null values to total count."""
+        if self.count is not None and self.null_count is not None and self.count > 0:
+            return (self.count - self.null_count) / self.count
+        return None
+
 
 class Column(SchemaBase):
     name: str
@@ -37,4 +51,4 @@ class ModelProfilingMetrics(SchemaBase):
 class Model(BaseResource):
     resource_type: NodeType = NodeType.MODEL
     columns: List[Column] = Field(default_factory=list)
-    profiling_metrics: Optional[ModelProfilingMetrics] = None
+    profiling_metrics: Optional[ModelProfilingMetrics] = None
diff --git a/uv.lock b/uv.lock