Skip to content

Commit e51dd08

Browse files
zhaoxigfphoenix78
authored andcommitted
Fix the creation timeout retry logic of cdbgang_createGang_async should be synchronized with the reader to avoid slow creation due to platform, container, network and other reasons, which would cause the reader to prematurely consider it an abnormal termination.
2025-11-20 11:48:27.925475 CST,"gpadmin","regression",p14056,th-1958096896,"172.18.0.2","40060",2025-11-20 11:48:27 CST,0,con33,,seg0,,,,sx1,"WARNING","58M01","reader could not find writer proc entry","lock [0,1260] AccessShareLock 0. Probably because writer gang is gone somehow. Maybe try rerunning.",,,,,,0,,"lock.c",963,"Stack trace: 1 0xaaaab4db9f14 postgres errstart + 0x494 2 0xaaaab4b9b064 postgres LockAcquireExtended + 0x76c 3 0xaaaab4b97d98 postgres LockRelationOid + 0x3c 4 0xaaaab44a6e30 postgres relation_open + 0x60 5 0xaaaab45a04e8 postgres table_open + 0x1c 6 0xaaaab4d7f3e8 postgres <symbol not found> + 0xb4d7f3e8 7 0xaaaab4d7fdcc postgres <symbol not found> + 0xb4d7fdcc 8 0xaaaab4d7fc5c postgres SearchCatCache1 + 0x2c 9 0xaaaab4da0258 postgres SearchSysCache1 + 0xb4 10 0xaaaab4dd6f48 postgres InitializeSessionUserId + 0x98 11 0xaaaab4dda874 postgres InitPostgres + 0x504 12 0xaaaab4bc93bc postgres PostgresMain + 0x390 13 0xaaaab4ac90b8 postgres <symbol not found> + 0xb4ac90b8 14 0xaaaab4ac8918 postgres <symbol not found> + 0xb4ac8918 15 0xaaaab4ac3114 postgres <symbol not found> + 0xb4ac3114 16 0xaaaab4ac2804 postgres PostmasterMain + 0x1668 17 0xaaaab4936b50 postgres <symbol not found> + 0xb4936b50 18 0xffff8b4f1724 libc.so.6 __libc_start_main + 0xf0 19 0xaaaab448327c postgres <symbol not found> + 0xb448327c
1 parent 430c5b6 commit e51dd08

File tree

1 file changed

+19
-4
lines changed
  • src/backend/storage/lmgr

1 file changed

+19
-4
lines changed

src/backend/storage/lmgr/lock.c

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -939,11 +939,26 @@ LockAcquireExtended(const LOCKTAG *locktag,
939939
{
940940
/* Find the guy who should manage our locks */
941941
volatile PGPROC * proc = FindProcByGpSessionId(gp_session_id);
942-
int count = 0;
943-
while(proc==NULL && count < find_writer_proc_retry_time)
942+
TimestampTz current_time;
943+
TimestampTz start_time;
944+
long elapsed_secs;
945+
int elapsed_usecs;
946+
start_time = GetCurrentTimestamp();
947+
948+
while (proc == NULL)
944949
{
950+
/*
951+
* The creation timeout retry logic of cdbgang_createGang_async
952+
* should be synchronized with the reader to avoid slow creation
953+
* due to platform, container, network and other reasons,
954+
* which would cause the reader to prematurely consider it an abnormal termination.
955+
*/
956+
current_time = GetCurrentTimestamp();
957+
TimestampDifference(start_time, current_time, &elapsed_secs, &elapsed_usecs);
958+
if (elapsed_secs >= gp_segment_connect_timeout / 2)
959+
break;
960+
945961
pg_usleep( /* microseconds */ 2000);
946-
count++;
947962
CHECK_FOR_INTERRUPTS();
948963
/*
949964
* The reason for using pg_memory_barrier() is to ensure that
@@ -954,7 +969,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
954969
}
955970
if (proc != NULL)
956971
{
957-
elog(DEBUG1,"Found writer proc entry. My Pid %d, his pid %d", MyProc-> pid, proc->pid);
972+
elog(DEBUG1, "Found writer proc entry. My Pid %d, his pid %d", MyProc-> pid, proc->pid);
958973
lockHolderProcPtr = (PGPROC*) proc;
959974
}
960975
else

0 commit comments

Comments
 (0)