@@ -924,9 +924,9 @@ public void resetExecuteContinuousFailureCount(String tableName) {
924924 *
925925 * @throws Exception if fail
926926 */
927- public void syncRefreshMetadata () throws Exception {
927+ public void syncRefreshMetadata (boolean forceRenew ) throws Exception {
928928
929- if (System .currentTimeMillis () - lastRefreshMetadataTimestamp < metadataRefreshInterval ) {
929+ if (! forceRenew && System .currentTimeMillis () - lastRefreshMetadataTimestamp < metadataRefreshInterval ) {
930930 logger
931931 .warn (
932932 "try to lock metadata refreshing, it has refresh at: {}, dataSourceName: {}, url: {}" ,
@@ -947,7 +947,7 @@ public void syncRefreshMetadata() throws Exception {
947947
948948 try {
949949
950- if (System .currentTimeMillis () - lastRefreshMetadataTimestamp < metadataRefreshInterval ) {
950+ if (! forceRenew && System .currentTimeMillis () - lastRefreshMetadataTimestamp < metadataRefreshInterval ) {
951951 logger .warn ("it has refresh metadata at: {}, dataSourceName: {}, url: {}" ,
952952 lastRefreshMetadataTimestamp , dataSourceName , paramURL );
953953 return ;
@@ -1295,7 +1295,7 @@ public TableEntry getOrRefreshTableEntry(final String tableName, final boolean r
12951295 if (logger .isInfoEnabled ()) {
12961296 logger .info ("server addr is expired and it will refresh metadata." );
12971297 }
1298- syncRefreshMetadata ();
1298+ syncRefreshMetadata (false );
12991299 tableEntryRefreshContinuousFailureCount .set (0 );
13001300 } catch (ObTableEntryRefreshException e ) {
13011301 RUNTIME .error ("getOrRefreshTableEntry meet exception" , e );
@@ -1307,11 +1307,11 @@ public TableEntry getOrRefreshTableEntry(final String tableName, final boolean r
13071307 if (tableEntryRefreshContinuousFailureCount .incrementAndGet () > tableEntryRefreshContinuousFailureCeiling ) {
13081308 logger .error (LCD .convert ("01-00019" ),
13091309 tableEntryRefreshContinuousFailureCeiling );
1310- syncRefreshMetadata ();
1310+ syncRefreshMetadata (false );
13111311 tableEntryRefreshContinuousFailureCount .set (0 );
13121312 } else if (e .isConnectInactive ()) {
13131313 // getMetaRefreshConnection failed, maybe the server is down, so we need to refresh metadata directly
1314- syncRefreshMetadata ();
1314+ syncRefreshMetadata (false );
13151315 tableEntryRefreshContinuousFailureCount .set (0 );
13161316 }
13171317 } catch (Throwable t ) {
@@ -1326,7 +1326,7 @@ public TableEntry getOrRefreshTableEntry(final String tableName, final boolean r
13261326 "refresh table entry has tried {}-times failure and will sync refresh metadata" ,
13271327 refreshTryTimes );
13281328 }
1329- syncRefreshMetadata ();
1329+ syncRefreshMetadata (false );
13301330 return refreshTableEntry (tableEntry , tableName );
13311331 }
13321332 return tableEntry ;
@@ -1405,7 +1405,7 @@ public TableEntry refreshTableLocationByTabletId(TableEntry tableEntry, String t
14051405 throw e ;
14061406 } catch (ObTableServerCacheExpiredException e ) {
14071407 RUNTIME .warn ("RefreshTableEntry encountered an exception" , e );
1408- syncRefreshMetadata ();
1408+ syncRefreshMetadata (false );
14091409 tableEntryRefreshContinuousFailureCount .set (0 );
14101410 } catch (ObTableEntryRefreshException e ) {
14111411 RUNTIME .error ("getOrRefreshTableEntry meet exception" , e );
@@ -1416,11 +1416,11 @@ public TableEntry refreshTableLocationByTabletId(TableEntry tableEntry, String t
14161416 if (tableEntryRefreshContinuousFailureCount .incrementAndGet () > tableEntryRefreshContinuousFailureCeiling ) {
14171417 logger .error (LCD .convert ("01-00019" ),
14181418 tableEntryRefreshContinuousFailureCeiling );
1419- syncRefreshMetadata ();
1419+ syncRefreshMetadata (false );
14201420 tableEntryRefreshContinuousFailureCount .set (0 );
14211421 } else if (e .isConnectInactive ()) {
14221422 // getMetaRefreshConnection failed, maybe the server is down, so we need to refresh metadata directly
1423- syncRefreshMetadata ();
1423+ syncRefreshMetadata (false );
14241424 tableEntryRefreshContinuousFailureCount .set (0 );
14251425 }
14261426 } catch (Throwable t ) {
@@ -2022,38 +2022,49 @@ public ObPair<Long, ObTableParam> getTableInternal(String tableName, TableEntry
20222022 RUNTIME .error ("Cannot get replica by partId: " + partId );
20232023 throw new ObTableGetException ("Cannot get replica by partId: " + partId );
20242024 }
2025+ int retryTimes = 0 ;
20252026 ObServerAddr addr = replica .getAddr ();
20262027 ObTable obTable = tableRoster .get (addr );
20272028 boolean addrExpired = addr .isExpired (serverAddressCachingTimeout );
2028- if (obTable == null || addrExpired ) {
2029- if (obTable == null ) {
2030- logger .warn ("Cannot get ObTable by addr {}, refreshing metadata." , addr );
2031- syncRefreshMetadata ();
2032- }
2033- if (addr .isExpired (serverAddressCachingTimeout )) {
2029+ while ((obTable == null || addrExpired ) && retryTimes < 2 ) {
2030+ ++retryTimes ;
2031+ if (addrExpired ) {
20342032 logger .info ("Server addr {} is expired, refreshing tableEntry." , addr );
20352033 if (ObGlobal .obVsnMajor () >= 4 ) {
20362034 refreshTableLocationByTabletId (tableEntry , tableName , tabletId );
20372035 } else {
20382036 tableEntry = getOrRefreshTableEntry (tableName , true , waitForRefresh , false );
20392037 }
2038+ addrExpired = addr .isExpired (serverAddressCachingTimeout );
20402039 }
2041-
2042- if (ObGlobal .obVsnMajor () >= 4 ) {
2043- obPartitionLocationInfo = getOrRefreshPartitionInfo (tableEntry , tableName , tabletId );
2044- replica = getPartitionLocation (obPartitionLocationInfo , route );
2045- } else {
2046- replica = getPartitionReplica (tableEntry , partitionId , route ).getRight ();
2047- }
2048-
2049- addr = replica .getAddr ();
2050- obTable = tableRoster .get (addr );
2051-
20522040 if (obTable == null ) {
2053- RUNTIME .error ("Cannot get table by addr: " + addr );
2054- throw new ObTableGetException ("Cannot get table by addr: " + addr );
2041+ // need to refresh table roster to ensure the current roster is the latest
2042+ syncRefreshMetadata (true );
2043+ // the addr is wrong, need to refresh location
2044+ if (logger .isInfoEnabled ()) {
2045+ logger .info ("Cannot get ObTable by addr {}, refreshing metadata." , addr );
2046+ }
2047+ // refresh tablet location based on the latest roster, in case that some of the observers hase been killed
2048+ // and used the old location
2049+ tableEntry = refreshTableLocationByTabletId (tableEntry , tableName , tabletId );
2050+ if (ObGlobal .obVsnMajor () >= 4 ) {
2051+ obPartitionLocationInfo = getOrRefreshPartitionInfo (tableEntry , tableName , tabletId );
2052+ replica = getPartitionLocation (obPartitionLocationInfo , route );
2053+ } else {
2054+ replica = getPartitionReplica (tableEntry , partitionId , route ).getRight ();
2055+ }
2056+ if (replica == null ) {
2057+ RUNTIME .error ("Cannot get replica by partId: " + partId );
2058+ throw new ObTableGetException ("Cannot get replica by partId: " + partId );
2059+ }
2060+ addr = replica .getAddr ();
2061+ obTable = tableRoster .get (addr );
20552062 }
20562063 }
2064+ if (obTable == null ) {
2065+ RUNTIME .error ("cannot get table by addr: " + addr );
2066+ throw new ObTableGetException ("obTable is null, addr is: " + addr .getIp () + ":" + addr .getSvrPort ());
2067+ }
20572068 ObTableParam param = createTableParam (obTable , tableEntry , obPartitionLocationInfo , partId , tabletId );
20582069 if (ObGlobal .obVsnMajor () >= 4 ) {
20592070 } else {
@@ -2317,33 +2328,46 @@ private List<ObPair<Long, ObTableParam>> getTables(String tableName, ObTableQuer
23172328 ReplicaLocation replica = partIdWithReplica .getRight ();
23182329 ObServerAddr addr = replica .getAddr ();
23192330 ObTable obTable = tableRoster .get (addr );
2331+ int retryTimes = 0 ;
23202332 boolean addrExpired = addr .isExpired (serverAddressCachingTimeout );
2321- if (addrExpired || obTable == null ) {
2322- if (obTable == null ) {
2323- logger .warn ("Cannot get ObTable by addr {}, refreshing metadata." , addr );
2324- syncRefreshMetadata ();
2325- }
2333+ while ((obTable == null || addrExpired ) && retryTimes < 2 ) {
2334+ ++retryTimes ;
23262335 if (addrExpired ) {
23272336 logger .info ("Server addr {} is expired, refreshing tableEntry." , addr );
23282337 if (ObGlobal .obVsnMajor () >= 4 ) {
23292338 refreshTableLocationByTabletId (tableEntry , tableName , tabletId );
23302339 } else {
23312340 tableEntry = getOrRefreshTableEntry (tableName , true , waitForRefresh , false );
23322341 }
2342+ addrExpired = addr .isExpired (serverAddressCachingTimeout );
23332343 }
2334- if (ObGlobal .obVsnMajor () >= 4 ) {
2335- ObPartitionLocationInfo locationInfo = getOrRefreshPartitionInfo (tableEntry , tableName , tabletId );
2336- replica = getPartitionLocation (locationInfo , route );
2337- } else {
2338- replica = getPartitionLocation (tableEntry , partId , route );
2344+ if (obTable == null ) {
2345+ // need to refresh table roster to ensure the current roster is the latest
2346+ syncRefreshMetadata (true );
2347+ // the addr is wrong, need to refresh location
2348+ if (logger .isInfoEnabled ()) {
2349+ logger .info ("Cannot get ObTable by addr {}, refreshing metadata." , addr );
2350+ }
2351+ // refresh tablet location based on the latest roster, in case that some of the observers hase been killed
2352+ // and used the old location
2353+ tableEntry = refreshTableLocationByTabletId (tableEntry , tableName , tabletId );
2354+ if (ObGlobal .obVsnMajor () >= 4 ) {
2355+ ObPartitionLocationInfo locationInfo = getOrRefreshPartitionInfo (tableEntry , tableName , tabletId );
2356+ replica = getPartitionLocation (locationInfo , route );
2357+ } else {
2358+ replica = getPartitionLocation (tableEntry , partId , route );
2359+ }
2360+ if (replica == null ) {
2361+ RUNTIME .error ("Cannot get replica by partId: " + partId );
2362+ throw new ObTableGetException ("Cannot get replica by partId: " + partId );
2363+ }
2364+ addr = replica .getAddr ();
2365+ obTable = tableRoster .get (addr );
23392366 }
2340- addr = replica .getAddr ();
2341- obTable = tableRoster .get (addr );
23422367 }
2343-
23442368 if (obTable == null ) {
23452369 RUNTIME .error ("cannot get table by addr: " + addr );
2346- throw new ObTableGetException ("cannot get table by addr: " + addr );
2370+ throw new ObTableGetException ("obTable is null, addr is : " + addr . getIp () + ":" + addr . getSvrPort () );
23472371 }
23482372
23492373 ObTableParam param = new ObTableParam (obTable );
@@ -2487,7 +2511,7 @@ public String tryGetTableNameFromTableGroupCache(final String tableGroupName,
24872511 if (logger .isInfoEnabled ()) {
24882512 logger .info ("server addr is expired and it will refresh metadata." );
24892513 }
2490- syncRefreshMetadata ();
2514+ syncRefreshMetadata (false );
24912515 } catch (Throwable t ) {
24922516 RUNTIME .error ("getOrRefreshTableName from TableGroup meet exception" , t );
24932517 throw t ;
0 commit comments