Skip to content
This repository was archived by the owner on Feb 18, 2026. It is now read-only.

Commit 35684de

Browse files
committed
PLAT 1424: Guard against foreign key cycles
GitOrigin-RevId: e2950807af423ca51e71d261e4a63f9d64328ec9
1 parent 32ac0e7 commit 35684de

File tree

3 files changed

+54
-1
lines changed

3 files changed

+54
-1
lines changed

src/gretel_trainer/relational/core.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import pandas as pd
2828
import smart_open
2929

30+
from networkx.algorithms.cycles import simple_cycles
3031
from networkx.algorithms.dag import dag_longest_path_length, topological_sort
3132
from networkx.classes.function import number_of_edges
3233
from pandas.api.types import is_string_dtype
@@ -202,6 +203,13 @@ def is_empty(self) -> bool:
202203
"""
203204
return not self.graph.number_of_nodes() > 0
204205

206+
@property
207+
def foreign_key_cycles(self) -> list[list[str]]:
208+
"""
209+
Returns lists of tables that have cyclic foreign key relationships.
210+
"""
211+
return list(simple_cycles(self.graph))
212+
205213
def restore(self, tableset: dict[str, pd.DataFrame]) -> dict[str, pd.DataFrame]:
206214
"""Restores a given tableset (presumably output from some MultiTable workflow,
207215
i.e. transforms or synthetics) to its original shape (specifically, "re-nests"
@@ -828,7 +836,10 @@ def any_table_relationships(self) -> bool:
828836
return number_of_edges(self.graph) > 0
829837

830838
def debug_summary(self) -> dict[str, Any]:
831-
max_depth = dag_longest_path_length(self.graph)
839+
if len(self.foreign_key_cycles) > 0:
840+
max_depth = "indeterminate (cycles in foreign keys)"
841+
else:
842+
max_depth = dag_longest_path_length(self.graph)
832843
public_table_count = len(self.list_all_tables(Scope.PUBLIC))
833844
invented_table_count = len(self.list_all_tables(Scope.INVENTED))
834845

src/gretel_trainer/relational/multi_table.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ def __init__(
108108
"or set `project` to run in an existing project."
109109
)
110110

111+
if len(cycles := relational_data.foreign_key_cycles) > 0:
112+
logger.warning(
113+
f"Detected cyclic foreign key relationships in schema: {cycles}. "
114+
"Support for cyclic table dependencies is limited. "
115+
"You may need to remove some foreign keys to ensure no cycles exist."
116+
)
117+
111118
self._strategy = _validate_strategy(strategy)
112119
self._set_refresh_interval(refresh_interval)
113120
self.relational_data = relational_data
@@ -611,6 +618,11 @@ def run_transforms(
611618
an additional level of privacy at the cost of referential integrity between transformed and
612619
original data.
613620
"""
621+
if encode_keys and len(self.relational_data.foreign_key_cycles) > 0:
622+
raise MultiTableException(
623+
"Cannot encode keys when schema includes cyclic foreign key relationships."
624+
)
625+
614626
if data is not None:
615627
unrunnable_tables = [
616628
table
@@ -735,6 +747,11 @@ def train_synthetics(
735747
Train synthetic data models for the tables in the tableset,
736748
optionally scoped by either `only` or `ignore`.
737749
"""
750+
if len(self.relational_data.foreign_key_cycles) > 0:
751+
raise MultiTableException(
752+
"Cyclic foreign key relationships are not supported by relational synthetics."
753+
)
754+
738755
if config is None:
739756
config = self._strategy.default_config
740757

tests/relational/test_relational_data.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,31 @@ def in_order(col, t1, t2):
193193
assert in_order(tables, "users", "order_items")
194194

195195

196+
def test_detect_cycles(ecom):
197+
assert ecom.foreign_key_cycles == []
198+
199+
ecom.add_foreign_key_constraint(
200+
table="users",
201+
constrained_columns=["first_name"],
202+
referred_table="users",
203+
referred_columns=["last_name"],
204+
)
205+
ecom.debug_summary()
206+
207+
assert ecom.foreign_key_cycles == [["users"]]
208+
assert "indeterminate" in ecom.debug_summary()["max_depth"]
209+
210+
ecom.add_foreign_key_constraint(
211+
table="users",
212+
constrained_columns=["first_name"],
213+
referred_table="events",
214+
referred_columns=["user_id"],
215+
)
216+
217+
sorted_cycles = sorted([sorted(cycle) for cycle in ecom.foreign_key_cycles])
218+
assert sorted_cycles == [["events", "users"], ["users"]]
219+
220+
196221
def test_debug_summary(ecom, mutagenesis):
197222
assert ecom.debug_summary() == {
198223
"foreign_key_count": 6,

0 commit comments

Comments
 (0)