Skip to content

Commit c32d6d4

Browse files
script3rclaude
andcommitted
Add string-based ontology API to eliminate double-interning
New constructors for ontology types that accept strings: - IdentityKey::from_names(vec!["name", "email"], "key_name") - StrongIdentifier::from_name("ssn", "ssn_unique") - Constraint::unique_from_name("email", "unique_email") The engine automatically interns pending attribute names when the ontology is applied, eliminating the need to: 1. Create a temporary engine to get AttrIds 2. Configure the ontology with those IDs 3. Create a new engine with the configured ontology 4. Re-intern attributes Before: let mut engine = Unirust::new(ontology.clone()); let name = engine.intern_attr("name"); ontology.add_identity_key(IdentityKey::new(vec![name], "key")); let mut engine = Unirust::new(ontology); // recreate! let name = engine.intern_attr("name"); // re-intern! After: ontology.add_identity_key(IdentityKey::from_names(vec!["name"], "key")); let mut engine = Unirust::new(ontology); // auto-interns let name = engine.intern_attr("name"); 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 3b39ae5 commit c32d6d4

File tree

5 files changed

+238
-66
lines changed

5 files changed

+238
-66
lines changed

examples/in_memory.rs

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,37 +40,25 @@ fn main() -> anyhow::Result<()> {
4040
// and strong identifiers as "these records are DEFINITELY different if this differs"
4141

4242
let mut ontology = Ontology::new();
43-
let mut engine = Unirust::new(ontology.clone());
44-
45-
// Create attribute IDs (interned strings for efficiency)
46-
let name_attr = engine.intern_attr("name");
47-
let email_attr = engine.intern_attr("email");
48-
let _phone_attr = engine.intern_attr("phone");
49-
let ssn_attr = engine.intern_attr("ssn");
5043

5144
// Identity Key: Records with same (name + email) are candidates for merging
52-
ontology.add_identity_key(IdentityKey::new(
53-
vec![name_attr, email_attr],
54-
"name_email".to_string(),
45+
// Using from_names() - no need to pre-intern attributes!
46+
ontology.add_identity_key(IdentityKey::from_names(
47+
vec!["name", "email"],
48+
"name_email",
5549
));
5650

5751
// Strong Identifier: SSN uniquely identifies a person
5852
// If two records have different SSNs, they CANNOT be merged
59-
ontology.add_strong_identifier(StrongIdentifier::new(ssn_attr, "ssn_unique".to_string()));
53+
ontology.add_strong_identifier(StrongIdentifier::from_name("ssn", "ssn_unique"));
6054

6155
println!("Ontology configured:");
6256
println!(" - Identity Key: name + email (records match if both are equal)");
6357
println!(" - Strong ID: SSN (prevents merging if different)\n");
6458

65-
// Recreate engine with the configured ontology
59+
// Create the engine - ontology attributes are automatically interned
6660
let mut engine = Unirust::new(ontology);
6761

68-
// Re-intern attributes (required after creating new engine)
69-
let name_attr = engine.intern_attr("name");
70-
let email_attr = engine.intern_attr("email");
71-
let phone_attr = engine.intern_attr("phone"); // Used in records below
72-
let ssn_attr = engine.intern_attr("ssn");
73-
7462
// =========================================================================
7563
// Step 2: Create Records from Multiple Source Systems
7664
// =========================================================================
@@ -81,7 +69,12 @@ fn main() -> anyhow::Result<()> {
8169
// - RecordIdentity: Source system info (entity_type, perspective, uid)
8270
// - Descriptors: Attribute values with temporal validity intervals
8371

84-
// Intern the values we'll use
72+
// Intern attributes and values for use in records
73+
let name_attr = engine.intern_attr("name");
74+
let email_attr = engine.intern_attr("email");
75+
let phone_attr = engine.intern_attr("phone");
76+
let ssn_attr = engine.intern_attr("ssn");
77+
8578
let john_name = engine.intern_value("John Doe");
8679
let john_email = engine.intern_value("john@example.com");
8780
let ssn_123 = engine.intern_value("123-45-6789");

examples/persistent_shard.rs

Lines changed: 16 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -73,47 +73,38 @@ fn main() -> anyhow::Result<()> {
7373
println!("Using tuning profile: {:?}\n", profile);
7474

7575
// =========================================================================
76-
// Step 3: Create Engine with Store
76+
// Step 3: Configure Ontology
7777
// =========================================================================
7878
//
79-
// Start with empty ontology - we'll configure it after interning attrs
79+
// Using from_names() - no need to pre-intern attributes!
80+
// The engine will automatically intern them when created.
8081

81-
let ontology = Ontology::new();
82-
let mut engine = Unirust::with_store_and_tuning(ontology, store, tuning.clone());
82+
let mut ontology = Ontology::new();
83+
84+
ontology.add_identity_key(IdentityKey::from_names(
85+
vec!["name", "email"],
86+
"name_email",
87+
));
88+
89+
ontology.add_strong_identifier(StrongIdentifier::from_name("ssn", "ssn_unique"));
90+
91+
println!("Ontology configured with identity key (name+email) and strong ID (ssn)\n");
8392

8493
// =========================================================================
85-
// Step 4: Configure Ontology
94+
// Step 4: Create Engine with Store and Ontology
8695
// =========================================================================
8796
//
88-
// Intern attribute IDs through the engine (which owns the store)
89-
let name_attr = engine.intern_attr("name");
90-
let email_attr = engine.intern_attr("email");
91-
let _phone_attr = engine.intern_attr("phone");
92-
let ssn_attr = engine.intern_attr("ssn");
93-
let _dept_attr = engine.intern_attr("department");
94-
95-
// Create ontology with interned attributes
96-
let mut ontology = Ontology::new();
97-
ontology.add_identity_key(IdentityKey::new(
98-
vec![name_attr, email_attr],
99-
"name_email".to_string(),
100-
));
101-
ontology.add_strong_identifier(StrongIdentifier::new(ssn_attr, "ssn_unique".to_string()));
97+
// The engine automatically interns the ontology's attribute names.
10298

103-
// Recreate engine with configured ontology
104-
// Note: Attributes are persisted, so they'll get the same IDs
105-
let store = PersistentStore::open(&data_dir)?;
10699
let mut engine = Unirust::with_store_and_tuning(ontology, store, tuning);
107100

108-
// Re-intern attrs (will get same IDs from persisted interner)
101+
// Intern attributes for use in records
109102
let name_attr = engine.intern_attr("name");
110103
let email_attr = engine.intern_attr("email");
111104
let phone_attr = engine.intern_attr("phone");
112105
let ssn_attr = engine.intern_attr("ssn");
113106
let dept_attr = engine.intern_attr("department");
114107

115-
println!("Ontology configured with identity key (name+email) and strong ID (ssn)\n");
116-
117108
// =========================================================================
118109
// Step 5: Generate and Ingest Records
119110
// =========================================================================
@@ -240,12 +231,6 @@ fn main() -> anyhow::Result<()> {
240231
println!(" 3. Re-open with PersistentStore::open()");
241232
println!(" 4. All data would be recovered automatically");
242233

243-
// To actually demonstrate recovery:
244-
// drop(engine);
245-
// let store = PersistentStore::open(&data_dir)?;
246-
// let engine = Unirust::with_store_and_tuning(ontology, store, tuning);
247-
// println!("Recovered {} records", engine.stats().record_count);
248-
249234
println!("\n✓ Example completed successfully!");
250235
println!("\nNext steps:");
251236
println!(" - Try 'cargo run --example cluster' for distributed mode");

src/conflicts.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,7 +1119,7 @@ impl ConflictDetector {
11191119
// Check constraints across clusters (for unique constraints)
11201120
for constraint in &ontology.constraints {
11211121
match constraint {
1122-
Constraint::Unique { attribute, name } => {
1122+
Constraint::Unique { attribute, name, .. } => {
11231123
let cross_cluster_violations = Self::check_unique_constraint_across_clusters(
11241124
store, clusters, *attribute, name,
11251125
)?;
@@ -1262,12 +1262,12 @@ impl ConflictDetector {
12621262
let mut violations = Vec::new();
12631263

12641264
match constraint {
1265-
Constraint::Unique { attribute, name } => {
1265+
Constraint::Unique { attribute, name, .. } => {
12661266
let violations_for_attr =
12671267
Self::check_unique_constraint(store, cluster, *attribute, name)?;
12681268
violations.extend(violations_for_attr);
12691269
}
1270-
Constraint::UniqueWithinPerspective { attribute, name } => {
1270+
Constraint::UniqueWithinPerspective { attribute, name, .. } => {
12711271
let violations_for_attr = Self::check_unique_within_perspective_constraint(
12721272
store, cluster, *attribute, name,
12731273
)?;

src/lib.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,17 @@ impl Unirust {
212212
}
213213

214214
/// Create a new Unirust instance with a custom store implementation.
215-
pub fn with_store<S>(ontology: Ontology, store: S) -> Self
215+
///
216+
/// Any identity keys, strong identifiers, or constraints created with string-based
217+
/// APIs (e.g., `IdentityKey::from_names`) will have their attribute names automatically
218+
/// interned using the store's interner.
219+
pub fn with_store<S>(mut ontology: Ontology, mut store: S) -> Self
216220
where
217221
S: RecordStore + 'static,
218222
{
223+
// Intern any pending attribute names in the ontology
224+
ontology.intern_attributes(|name| store.intern_attr(name));
225+
219226
Self {
220227
store: Box::new(store),
221228
ontology,
@@ -228,10 +235,18 @@ impl Unirust {
228235
}
229236
}
230237

231-
pub fn with_store_and_tuning<S>(ontology: Ontology, store: S, tuning: StreamingTuning) -> Self
238+
/// Create a new Unirust instance with a custom store and tuning configuration.
239+
///
240+
/// Any identity keys, strong identifiers, or constraints created with string-based
241+
/// APIs (e.g., `IdentityKey::from_names`) will have their attribute names automatically
242+
/// interned using the store's interner.
243+
pub fn with_store_and_tuning<S>(mut ontology: Ontology, mut store: S, tuning: StreamingTuning) -> Self
232244
where
233245
S: RecordStore + 'static,
234246
{
247+
// Intern any pending attribute names in the ontology
248+
ontology.intern_attributes(|name| store.intern_attr(name));
249+
235250
Self {
236251
store: Box::new(store),
237252
ontology,

0 commit comments

Comments
 (0)