@@ -155,7 +155,7 @@ func NewSubmitProofReceiverLogic(cfg *config.ProverManager, chainCfg *params.Cha
155155// HandleZkProof handle a ZkProof submitted from a prover.
156156// For now only proving/verifying error will lead to setting status as skipped.
157157// db/unmarshal errors will not because they are errors on the business logic side.
158- func (m * ProofReceiverLogic ) HandleZkProof (ctx * gin.Context , proofParameter coordinatorType.SubmitProofParameter ) error {
158+ func (m * ProofReceiverLogic ) HandleZkProof (ctx * gin.Context , proofParameter coordinatorType.SubmitProofParameter ) ( rerr error ) {
159159 m .proofReceivedTotal .Inc ()
160160 pk := ctx .GetString (coordinatorType .PublicKey )
161161 if len (pk ) == 0 {
@@ -172,6 +172,18 @@ func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofParameter coor
172172 return ErrValidatorFailureProverTaskEmpty
173173 }
174174
175+ defer func () {
176+ if rerr != nil && types .ProverProveStatus (proverTask .ProvingStatus ) == types .ProverAssigned {
177+ // trigger a last-chance closing of current task if some routine had missed it
178+ log .Warn ("last chance proof recover triggerred" ,
179+ "proofID" , proofParameter .TaskID ,
180+ "err" , rerr ,
181+ )
182+ m .proofRecover (ctx .Copy (), proverTask , types .ProverTaskFailureTypeUndefined , proofParameter )
183+ }
184+
185+ }()
186+
175187 proofTime := time .Since (proverTask .CreatedAt )
176188 proofTimeSec := uint64 (proofTime .Seconds ())
177189
@@ -311,6 +323,20 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
311323 }
312324 }()
313325
326+ // Internally we overide the timeout failure:
327+ // if prover task FailureType is SessionInfoFailureTimeout, the submit proof is timeout, but we still accept it
328+ if types .ProverProveStatus (proverTask .ProvingStatus ) == types .ProverProofInvalid &&
329+ types .ProverTaskFailureType (proverTask .FailureType ) == types .ProverTaskFailureTypeTimeout {
330+ m .validateFailureProverTaskTimeout .Inc ()
331+ proverTask .ProvingStatus = int16 (types .ProverAssigned )
332+
333+ proofTime := time .Since (proverTask .CreatedAt )
334+ proofTimeSec := uint64 (proofTime .Seconds ())
335+ log .Warn ("proof submit proof have timeout" , "hash" , proofParameter .TaskID , "taskType" , proverTask .TaskType ,
336+ "proverName" , proverTask .ProverName , "proverPublicKey" , pk , "proofTime" , proofTimeSec )
337+
338+ }
339+
314340 // Ensure this prover is eligible to participate in the prover task.
315341 if types .ProverProveStatus (proverTask .ProvingStatus ) == types .ProverProofValid ||
316342 types .ProverProveStatus (proverTask .ProvingStatus ) == types .ProverProofInvalid {
@@ -328,9 +354,6 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
328354 return ErrValidatorFailureProverTaskCannotSubmitTwice
329355 }
330356
331- proofTime := time .Since (proverTask .CreatedAt )
332- proofTimeSec := uint64 (proofTime .Seconds ())
333-
334357 if proofParameter .Status != int (coordinatorType .StatusOk ) {
335358 // Temporarily replace "panic" with "pa-nic" to prevent triggering the alert based on logs.
336359 failureMsg := strings .Replace (proofParameter .FailureMsg , "panic" , "pa-nic" , - 1 )
@@ -346,14 +369,6 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
346369 return ErrValidatorFailureProofMsgStatusNotOk
347370 }
348371
349- // if prover task FailureType is SessionInfoFailureTimeout, the submit proof is timeout, need skip it
350- if types .ProverTaskFailureType (proverTask .FailureType ) == types .ProverTaskFailureTypeTimeout {
351- m .validateFailureProverTaskTimeout .Inc ()
352- log .Info ("proof submit proof have timeout, skip this submit proof" , "hash" , proofParameter .TaskID , "taskType" , proverTask .TaskType ,
353- "proverName" , proverTask .ProverName , "proverPublicKey" , pk , "proofTime" , proofTimeSec )
354- return ErrValidatorFailureProofTimeout
355- }
356-
357372 // store the proof to prover task
358373 if updateTaskProofErr := m .updateProverTaskProof (ctx , proverTask , proofParameter ); updateTaskProofErr != nil {
359374 log .Warn ("update prover task proof failure" , "hash" , proofParameter .TaskID , "proverPublicKey" , pk ,
@@ -368,6 +383,7 @@ func (m *ProofReceiverLogic) validator(ctx context.Context, proverTask *orm.Prov
368383 "taskType" , proverTask .TaskType , "proverName" , proverTask .ProverName , "proverPublicKey" , pk )
369384 return ErrValidatorFailureTaskHaveVerifiedSuccess
370385 }
386+
371387 return nil
372388}
373389
@@ -384,7 +400,7 @@ func (m *ProofReceiverLogic) closeProofTask(ctx context.Context, proverTask *orm
384400 log .Info ("proof close task update proof status" , "hash" , proverTask .TaskID , "proverPublicKey" , proverTask .ProverPublicKey ,
385401 "taskType" , message .ProofType (proverTask .TaskType ).String (), "status" , types .ProvingTaskVerified .String ())
386402
387- if err := m .updateProofStatus (ctx , proverTask , proofParameter , types .ProverProofValid , types .ProverTaskFailureTypeUndefined , proofTimeSec ); err != nil {
403+ if err := m .updateProofStatus (ctx , proverTask , proofParameter , types .ProverProofValid , types .ProverTaskFailureType ( proverTask . FailureType ) , proofTimeSec ); err != nil {
388404 log .Error ("failed to updated proof status ProvingTaskVerified" , "hash" , proverTask .TaskID , "proverPublicKey" , proverTask .ProverPublicKey , "error" , err )
389405 return err
390406 }
@@ -445,6 +461,9 @@ func (m *ProofReceiverLogic) updateProofStatus(ctx context.Context, proverTask *
445461 if err != nil {
446462 return err
447463 }
464+ // sync status and failture type into proverTask
465+ proverTask .ProvingStatus = int16 (status )
466+ proverTask .FailureType = int16 (failureType )
448467
449468 if status == types .ProverProofValid && message .ProofType (proofParameter .TaskType ) == message .ProofTypeChunk {
450469 if checkReadyErr := m .checkAreAllChunkProofsReady (ctx , proverTask .TaskID ); checkReadyErr != nil {
0 commit comments