graphprotocol
diff --git a/‎core/src/subgraph/instance_manager.rs
Lines changed: 40 additions & 25 deletions b/‎core/src/subgraph/instance_manager.rs
Lines changed: 40 additions & 25 deletions
diff --git a/‎graph/src/components/store.rs
Lines changed: 14 additions & 7 deletions b/‎graph/src/components/store.rs
Lines changed: 14 additions & 7 deletions
@@ -458,7 +458,8 @@ where
     let store_for_err = ctx.inputs.store.cheap_clone();
     let logger = ctx.state.logger.cheap_clone();
     let id_for_err = ctx.inputs.deployment.hash.clone();
-    let mut first_run = true;
+    let mut should_try_unfail_deterministic = true;
+    let mut should_try_unfail_non_deterministic = true;
 
     loop {
         debug!(logger, "Starting or restarting subgraph");
@@ -592,30 +593,35 @@ where
             let start = Instant::now();
             let deployment_failed = ctx.block_stream_metrics.deployment_failed.clone();
 
-            // If the subgraph is failed, unfail it and revert the block on which
-            // it failed so that it is reprocessed. This gives the subgraph a chance
-            // to move past errors.
+            // If a subgraph failed for deterministic reasons, before processing a new block, we
+            // revert the deployment head. It should lead to the same result since the error was
+            // deterministic.
             //
             // As an optimization we check this only on the first run.
-            if first_run {
-                first_run = false;
-
-                let (current_ptr, parent_ptr) = match ctx.inputs.store.block_ptr()? {
-                    Some(current_ptr) => {
-                        let parent_ptr =
-                            ctx.inputs.triggers_adapter.parent_ptr(&current_ptr).await?;
-                        (Some(current_ptr), parent_ptr)
+            if should_try_unfail_deterministic {
+                should_try_unfail_deterministic = false;
+
+                if let Some(current_ptr) = ctx.inputs.store.block_ptr()? {
+                    if let Some(parent_ptr) =
+                        ctx.inputs.triggers_adapter.parent_ptr(&current_ptr).await?
+                    {
+                        // This reverts the deployment head to the parent_ptr if
+                        // deterministic errors happened.
+                        //
+                        // There's no point in calling it if we have no current or parent block
+                        // pointers, because there would be: no block to revert to or to search
+                        // errors from (first execution).
+                        ctx.inputs
+                            .store
+                            .unfail_deterministic_error(&current_ptr, &parent_ptr)?;
                     }
-                    None => (None, None),
-                };
-
-                ctx.inputs.store.unfail(current_ptr, parent_ptr)?;
+                }
             }
 
             let res = process_block(
                 &logger,
                 ctx.inputs.triggers_adapter.cheap_clone(),
-                ctx,
+                &mut ctx,
                 block_stream_cancel_handle.clone(),
                 block,
                 cursor.into(),
@@ -626,8 +632,17 @@ where
             subgraph_metrics.block_processing_duration.observe(elapsed);
 
             match res {
-                Ok((c, needs_restart)) => {
-                    ctx = c;
+                Ok(needs_restart) => {
+                    // Runs only once
+                    if should_try_unfail_non_deterministic {
+                        should_try_unfail_non_deterministic = false;
+
+                        // If the deployment head advanced, we can unfail
+                        // the non-deterministic error (if there's any).
+                        ctx.inputs
+                            .store
+                            .unfail_non_deterministic_error(&block_ptr)?;
+                    }
 
                     deployment_failed.set(0.0);
 
@@ -719,11 +734,11 @@ impl From<StoreError> for BlockProcessingError {
 async fn process_block<T: RuntimeHostBuilder<C>, C: Blockchain>(
     logger: &Logger,
     triggers_adapter: Arc<C::TriggersAdapter>,
-    mut ctx: IndexingContext<T, C>,
+    ctx: &mut IndexingContext<T, C>,
     block_stream_cancel_handle: CancelHandle,
     block: BlockWithTriggers<C>,
     firehose_cursor: Option<String>,
-) -> Result<(IndexingContext<T, C>, bool), BlockProcessingError> {
+) -> Result<bool, BlockProcessingError> {
     let triggers = block.trigger_data;
     let block = Arc::new(block.block);
     let block_ptr = block.ptr();
@@ -797,7 +812,7 @@ async fn process_block<T: RuntimeHostBuilder<C>, C: Blockchain>(
             // Losing the cache is a bit annoying but not an issue for correctness.
             //
             // See also b21fa73b-6453-4340-99fb-1a78ec62efb1.
-            return Ok((ctx, true));
+            return Ok(true);
         }
     };
 
@@ -818,7 +833,7 @@ async fn process_block<T: RuntimeHostBuilder<C>, C: Blockchain>(
         // Instantiate dynamic data sources, removing them from the block state.
         let (data_sources, runtime_hosts) = create_dynamic_data_sources(
             logger.clone(),
-            &mut ctx,
+            ctx,
             host_metrics.clone(),
             block_state.drain_created_data_sources(),
         )?;
@@ -849,7 +864,7 @@ async fn process_block<T: RuntimeHostBuilder<C>, C: Blockchain>(
         // and add runtimes for the data sources to the subgraph instance.
         persist_dynamic_data_sources(
             logger.clone(),
-            &mut ctx,
+            ctx,
             &mut block_state.entity_cache,
             data_sources,
         );
@@ -1004,7 +1019,7 @@ async fn process_block<T: RuntimeHostBuilder<C>, C: Blockchain>(
                 return Err(BlockProcessingError::Canceled);
             }
 
-            Ok((ctx, needs_restart))
+            Ok(needs_restart)
         }
 
         Err(e) => Err(anyhow!("Error while processing block stream for a subgraph: {}", e).into()),
 
@@ -1016,15 +1016,18 @@ pub trait WritableStore: Send + Sync + 'static {
     /// `block_ptr_to` must point to the parent block of the subgraph block pointer.
     fn revert_block_operations(&self, block_ptr_to: BlockPtr) -> Result<(), StoreError>;
 
-    /// This method:
-    /// - Sets the SubgraphDeployment status accordingly to it's SubgraphErrors
-    /// - Reverts block operations to the parent block if necessary
-    fn unfail(
+    /// If a deterministic error happened, this function reverts the block operations from the
+    /// current block to the previous block.
+    fn unfail_deterministic_error(
         &self,
-        current_ptr: Option<BlockPtr>,
-        parent_ptr: Option<BlockPtr>,
+        current_ptr: &BlockPtr,
+        parent_ptr: &BlockPtr,
     ) -> Result<(), StoreError>;
 
+    /// If a non-deterministic error happened and the current deployment head is past the error
+    /// block range, this function unfails the subgraph and deletes the error.
+    fn unfail_non_deterministic_error(&self, current_ptr: &BlockPtr) -> Result<(), StoreError>;
+
     /// Set subgraph status to failed with the given error as the cause.
     async fn fail_subgraph(&self, error: SubgraphError) -> Result<(), StoreError>;
 
@@ -1203,7 +1206,11 @@ impl WritableStore for MockStore {
         unimplemented!()
     }
 
-    fn unfail(&self, _: Option<BlockPtr>, _: Option<BlockPtr>) -> Result<(), StoreError> {
+    fn unfail_deterministic_error(&self, _: &BlockPtr, _: &BlockPtr) -> Result<(), StoreError> {
+        unimplemented!()
+    }
+
+    fn unfail_non_deterministic_error(&self, _: &BlockPtr) -> Result<(), StoreError> {
         unimplemented!()
     }