open-metadata · harshach · May 16, 2026 · May 14, 2026 · May 14, 2026 · May 15, 2026
diff --git a/bootstrap/sql/migrations/native/2.0.1/mysql/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/2.0.1/mysql/postDataMigrationSQLScript.sql
@@ -1 +1,29 @@
 -- Post data migration script for Task workflow cutover - OpenMetadata 2.0.1
+
+-- RdfIndexApp: switch to weekly Saturday cron and full-rebuild every run.
+-- Previous defaults (daily, incremental) were producing unbounded triple growth
+-- because relationship-removal paths weren't fully reconciled. With per-run
+-- CLEAR ALL the dataset always converges to MySQL state; weekly cadence keeps
+-- per-run cost from saturating Fuseki.
+--
+-- Also rewrite `entities` to `["all"]`. Pre-upgrade, an operator could have
+-- narrowed RDF indexing to a subset of entity types; the new recreateIndex=true
+-- semantics issues a CLEAR ALL before indexing, which would otherwise wipe
+-- triples for entity types still in MySQL but missing from the subset list.
+-- Forcing the subset list back to `["all"]` ensures the post-CLEAR-ALL run
+-- repopulates the graph fully; operators can re-narrow after the migration if
+-- they need partial indexing.
+UPDATE installed_apps
+SET json = JSON_SET(
+    JSON_SET(
+        json,
+        '$.appConfiguration.recreateIndex', CAST('true' AS JSON),
+        '$.appSchedule.cronExpression', '0 0 * * 6'
+    ),
+    '$.appConfiguration.entities', JSON_ARRAY('all')
+)
+WHERE name = 'RdfIndexApp';
+
+UPDATE apps_marketplace
+SET json = JSON_SET(json, '$.appConfiguration.recreateIndex', CAST('true' AS JSON))
+WHERE name = 'RdfIndexApp';
diff --git a/bootstrap/sql/migrations/native/2.0.1/postgres/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/2.0.1/postgres/postDataMigrationSQLScript.sql
@@ -1 +1,30 @@
 -- Post data migration script for Task workflow cutover - OpenMetadata 2.0.1
+
+-- RdfIndexApp: switch to weekly Saturday cron and full-rebuild every run.
+-- Previous defaults (daily, incremental) were producing unbounded triple growth
+-- because relationship-removal paths weren't fully reconciled. With per-run
+-- CLEAR ALL the dataset always converges to MySQL state; weekly cadence keeps
+-- per-run cost from saturating Fuseki.
+--
+-- Also rewrite `entities` to `["all"]`. Pre-upgrade, an operator could have
+-- narrowed RDF indexing to a subset of entity types; the new recreateIndex=true
+-- semantics issues a CLEAR ALL before indexing, which would otherwise wipe
+-- triples for entity types still in MySQL but missing from the subset list.
+-- Forcing the subset list back to `["all"]` ensures the post-CLEAR-ALL run
+-- repopulates the graph fully; operators can re-narrow after the migration if
+-- they need partial indexing.
+UPDATE installed_apps
+SET json = jsonb_set(
+    jsonb_set(
+        jsonb_set(json::jsonb, '{appConfiguration,recreateIndex}', 'true'),
+        '{appSchedule,cronExpression}',
+        '"0 0 * * 6"'
+    ),
+    '{appConfiguration,entities}',
+    '["all"]'::jsonb
+)
+WHERE name = 'RdfIndexApp';
+
+UPDATE apps_marketplace
+SET json = jsonb_set(json::jsonb, '{appConfiguration,recreateIndex}', 'true')
+WHERE name = 'RdfIndexApp';
diff --git a/...ta-service/src/main/java/org/openmetadata/service/apps/bundles/rdf/RdfBatchProcessor.java b/...ta-service/src/main/java/org/openmetadata/service/apps/bundles/rdf/RdfBatchProcessor.java
@@ -14,6 +14,7 @@
 package org.openmetadata.service.apps.bundles.rdf;
 
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.UUID;
@@ -208,18 +209,38 @@ public RelationshipProcessingResult processBatchRelationships(
         }
       }
 
-      if (!allRelationships.isEmpty()) {
-        try {
-          rdfRepository.bulkAddRelationships(allRelationships);
-        } catch (Exception e) {
-          LOG.error(
-              "Failed to bulk add {} relationships for entity type {}",
-              allRelationships.size(),
-              entityType,
-              e);
-          failures += allRelationships.size();
-          lastError = describeBulkError(entityType, "bulkRelationships", e);
-        }
+      // Reconcile EVERY entity in the batch — not just those with current
+      // outgoing relationships. An entity whose last outgoing relationship was
+      // removed in MySQL contributes zero RelationshipData entries to
+      // allRelationships; we pass it explicitly via batchSources so
+      // bulkAddRelationships' per-source DELETE still fires for it.
+      //
+      // The clear+insert run in a SINGLE SPARQL update inside
+      // JenaFusekiStorage.bulkStoreRelationships, so the operation is atomic
+      // at the Fuseki side — a transient error can't leave the graph wiped
+      // without the replacement edges in place. (Previously the clear ran in
+      // a separate call to clearOutgoingEntityRelationships; if the
+      // subsequent bulkAdd failed, batch sources lost their relationships
+      // until the next weekly recreate-index.)
+      Set<RdfRepository.EntitySourceRef> batchSources = new HashSet<>();
+      for (EntityInterface entity : entities) {
+        batchSources.add(new RdfRepository.EntitySourceRef(entityType, entity.getId()));
+      }
+      try {
+        // Pass batchSources so bulkStoreRelationships only reconciles edges
+        // for entities IN this batch. Incoming-lineage rows can carry source
+        // IDs that are outside the batch (the `from` of an UPSTREAM edge
+        // where this batch's entity is the `to`); reconciling those would
+        // wipe the outside-batch entity's unrelated outgoing edges.
+        rdfRepository.bulkAddRelationships(allRelationships, batchSources);
+      } catch (Exception e) {
+        LOG.error(
+            "Failed to bulk add {} relationships for entity type {}",
+            allRelationships.size(),
+            entityType,
+            e);
+        failures += allRelationships.size();
+        lastError = describeBulkError(entityType, "bulkRelationships", e);
       }
     } catch (Exception e) {
       LOG.error("Failed to process batch relationships for entity type {}", entityType, e);

diff --git a/...metadata-service/src/main/java/org/openmetadata/service/apps/bundles/rdf/RdfIndexApp.java b/...metadata-service/src/main/java/org/openmetadata/service/apps/bundles/rdf/RdfIndexApp.java
@@ -215,6 +215,22 @@ private void initializeJob(JobExecutionContext jobExecutionContext) {
     rdfIndexStats.set(initializeTotalRecords(jobData.getEntities()));
     jobData.setStats(rdfIndexStats.get());
 
+    // bulkAddGlossaryTermRelations has no per-batch DELETE side, so stale
+    // glossary-term relations would accumulate forever across reindex runs.
+    // When recreateIndex=true clearAll() already wipes everything, so we
+    // only need this targeted cleanup on incremental runs.
+    //
+    // Let the failure propagate: clearAllGlossaryTermRelations rethrows on
+    // failure precisely so the indexer can fail loudly instead of silently
+    // marking a job successful while the graph still has stale predicates.
+    // The outer try/catch in execute() will set the run status to FAILED.
+    if (!Boolean.TRUE.equals(jobData.getRecreateIndex())
+        && jobData.getEntities() != null
+        && jobData.getEntities().contains(Entity.GLOSSARY_TERM)) {
+      LOG.info("Clearing existing glossary term relations before re-indexing");
+      rdfRepository.clearAllGlossaryTermRelations();
+    }
+
     if (Boolean.TRUE.equals(jobData.getUseDistributedIndexing())) {
       sendUpdates(jobExecutionContext, true);
       return;
@@ -242,6 +258,10 @@ private void clearRdfData() {
     try {
       rdfRepository.clearAll();
       LOG.info("Cleared all RDF data");
+      // CLEAR ALL wipes the ontology and shapes graphs as well; reload them
+      // before indexing starts so SPARQL queries that depend on the ontology
+      // (inference, federated, etc.) work after the wipe.
+      rdfRepository.reloadOntologies();
     } catch (Exception e) {
       LOG.error("Failed to clear RDF data", e);
       throw new RuntimeException("Failed to clear RDF data", e);

diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/OntologyLoader.java b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/OntologyLoader.java
@@ -87,7 +87,11 @@ public boolean areOntologiesLoaded() {
       String checkQuery = "ASK { GRAPH <" + ONTOLOGY_GRAPH + "> { ?s ?p ?o } }";
       String result =
           rdfRepository.executeSparqlQuery(checkQuery, "application/sparql-results+json");
-      return result.contains("\"boolean\" : true");
+      // JenaFusekiStorage formats ASK results as `{"head": {}, "boolean": true}`
+      // (no space before the colon), so a literal-substring check that includes
+      // a space would never match. Normalise whitespace and match either form.
+      String normalised = result.replaceAll("\\s+", "");
+      return normalised.contains("\"boolean\":true");
     } catch (Exception e) {
       LOG.error("Failed to check if ontologies are loaded", e);
       return false;