From 340396886d547486840e29fb1c9669c7c0944e56 Mon Sep 17 00:00:00 2001
From: Tyler Riccio <tylerriccio8@gmail.com>
Date: Sat, 31 May 2025 11:49:18 -0400
Subject: [PATCH 1/3] build out compare testing

---
 tests/test_compare.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/tests/test_compare.py b/tests/test_compare.py
index d8bf0ca07..8e082f392 100644
--- a/tests/test_compare.py
+++ b/tests/test_compare.py
@@ -1,15 +1,37 @@
 from __future__ import annotations
 import pytest
+import polars as pl
 
-from pointblank.compare import Compare
-import polars.testing.parametric as pt
-from hypothesis import given
+from pointblank.compare import Compare, MetaSummary
 
 
-@pytest.mark.xfail
-def test_compare_basic(dfa, dfb) -> None:
-    comp = Compare(dfa, dfb)
+def test_compare_basic() -> None:
+    df1 = {
+        "a": [1, 2, 3],
+        "b": [4, 5, 6],
+    }
+    df2 = {
+        "a": [1, 2, 3],
+        "b": ["4", "5", "7"],
+        "c": [8, 9, 10],
+    }
+    data1 = pl.DataFrame(df1)
+    data2 = pl.DataFrame(df2)
+    comp = Compare(data1, data2)
 
     comp.compare()
 
-    raise NotImplementedError
+    ## Pull out the summary data
+    summary: MetaSummary = comp.meta_summary
+
+    assert summary.name == ["a", "b"]
+    assert summary.n_observations == (3, 3)
+    assert summary.n_variables == (2, 3)
+    assert summary.in_a_only == set()
+    assert summary.in_b_only == {"c"}
+    assert summary.in_both == {"a", "b"}
+    assert summary.conflicting_types == ["b"]
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

From 4bf99cbacb8d6ecb5fab1a5a30983bbf8eacdc99 Mon Sep 17 00:00:00 2001
From: Tyler Riccio <tylerriccio8@gmail.com>
Date: Sat, 31 May 2025 11:49:38 -0400
Subject: [PATCH 2/3] __getitem__ to data profile

---
 pointblank/scan_profile.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pointblank/scan_profile.py b/pointblank/scan_profile.py
index efc4f1d3f..55ca0e8f7 100644
--- a/pointblank/scan_profile.py
+++ b/pointblank/scan_profile.py
@@ -257,6 +257,13 @@ def __init__(
         self.implementation = implementation
         self.column_profiles: list[ColumnProfile] = []
 
+    def __getitem__(self, colname: str) -> ColumnProfile:
+        """Get a column profile by its name."""
+        for prof in self.column_profiles:
+            if prof.colname == colname:
+                return prof
+        raise KeyError(f"Column profile for '{colname}' not found.")
+
     def set_row_count(self, data: Frame) -> None:
         assert self.columns  # internal: cols should already be set
 

From 534e8c3105102c263e361f2323453188ca2c2cad Mon Sep 17 00:00:00 2001
From: Tyler Riccio <tylerriccio8@gmail.com>
Date: Sat, 31 May 2025 11:49:54 -0400
Subject: [PATCH 3/3] add meta summary calculation

---
 pointblank/compare.py | 71 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 60 insertions(+), 11 deletions(-)

diff --git a/pointblank/compare.py b/pointblank/compare.py
index 04dd6ca95..f61068741 100644
--- a/pointblank/compare.py
+++ b/pointblank/compare.py
@@ -1,27 +1,76 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, NamedTuple
+
+import narwhals as nw
 
 from pointblank import DataScan
 
 if TYPE_CHECKING:
-    from narwhals.typing import IntoFrame
+    from typing import Any
+
+    from narwhals.typing import IntoFrameT
 
 
 class Compare:
-    def __init__(self, a: IntoFrame, b: IntoFrame) -> None:
-        self.a: IntoFrame = a
-        self.b: IntoFrame = b
+    def __init__(self, a: IntoFrameT, b: IntoFrameT, backend: Any = None) -> None:
+        self.a: IntoFrameT = a
+        self.b: IntoFrameT = b
 
     def compare(self) -> None:
-        ## Scan both frames
         self._scana = DataScan(self.a)
+        self._asummary: nw.DataFrame = nw.from_native(self._scana.summary_data)
         self._scanb = DataScan(self.b)
+        self._bsummary: nw.DataFrame = nw.from_native(self._scanb.summary_data)
+
+    @property
+    def meta_summary(self) -> MetaSummary:
+        """Return metadata summary."""
+        # TODO: elegant error if compare is not called first
+
+        ## Number of rows:
+        arows: int = self._scana.profile.row_count
+        brows: int = self._scanb.profile.row_count
+
+        ## Number of variables:
+        avars: int = len(self._scana.profile.columns)
+        bvars: int = len(self._scanb.profile.columns)
+
+        ## Cols only in `a`:
+        acols: set[str] = set(self._scana.profile.columns)
+        bcols: set[str] = set(self._scanb.profile.columns)
+        aonly: set[str] = acols - bcols
+        bonly: set[str] = bcols - acols
+        bothcols: set[str] = acols & bcols
+
+        ## Conflicting types:
+        conflicting: list[str] = []
+        for col in bothcols:
+            atype = self._scana.profile[col].coltype
+            btype = self._scanb.profile[col].coltype
+            if atype != btype:
+                conflicting.append(col)
+
+        ## Create the Summary Frame:
+        aname: str = self._scana.profile.table_name or "a"
+        bname: str = self._scanb.profile.table_name or "b"
 
-        ## Get summary outs
-        summarya = self._scana.summary_data
-        summaryb = self._scana.summary_data
+        return MetaSummary(
+            name=[aname, bname],
+            n_observations=(arows, brows),
+            n_variables=(avars, bvars),
+            in_a_only=aonly,
+            in_b_only=bonly,
+            in_both=bothcols,
+            conflicting_types=conflicting,
+        )
 
-        summarya.columns
 
-        self._scana.profile
+class MetaSummary(NamedTuple):
+    name: list[str]
+    n_observations: tuple[int, int]
+    n_variables: tuple[int, int]
+    in_a_only: set[str]
+    in_b_only: set[str]
+    in_both: set[str]
+    conflicting_types: list[str]