@@ -924,9 +924,9 @@ public void resetExecuteContinuousFailureCount(String tableName) {
924924 *
925925 * @throws Exception if fail
926926 */
927- public void syncRefreshMetadata () throws Exception {
927+ public void syncRefreshMetadata (boolean forceRenew ) throws Exception {
928928
929- if (System .currentTimeMillis () - lastRefreshMetadataTimestamp < metadataRefreshInterval ) {
929+ if (! forceRenew && System .currentTimeMillis () - lastRefreshMetadataTimestamp < metadataRefreshInterval ) {
930930 logger
931931 .warn (
932932 "try to lock metadata refreshing, it has refresh at: {}, dataSourceName: {}, url: {}" ,
@@ -947,7 +947,7 @@ public void syncRefreshMetadata() throws Exception {
947947
948948 try {
949949
950- if (System .currentTimeMillis () - lastRefreshMetadataTimestamp < metadataRefreshInterval ) {
950+ if (! forceRenew && System .currentTimeMillis () - lastRefreshMetadataTimestamp < metadataRefreshInterval ) {
951951 logger .warn ("it has refresh metadata at: {}, dataSourceName: {}, url: {}" ,
952952 lastRefreshMetadataTimestamp , dataSourceName , paramURL );
953953 return ;
@@ -1295,7 +1295,7 @@ public TableEntry getOrRefreshTableEntry(final String tableName, final boolean r
12951295 if (logger .isInfoEnabled ()) {
12961296 logger .info ("server addr is expired and it will refresh metadata." );
12971297 }
1298- syncRefreshMetadata ();
1298+ syncRefreshMetadata (false );
12991299 tableEntryRefreshContinuousFailureCount .set (0 );
13001300 } catch (ObTableEntryRefreshException e ) {
13011301 RUNTIME .error ("getOrRefreshTableEntry meet exception" , e );
@@ -1307,11 +1307,11 @@ public TableEntry getOrRefreshTableEntry(final String tableName, final boolean r
13071307 if (tableEntryRefreshContinuousFailureCount .incrementAndGet () > tableEntryRefreshContinuousFailureCeiling ) {
13081308 logger .error (LCD .convert ("01-00019" ),
13091309 tableEntryRefreshContinuousFailureCeiling );
1310- syncRefreshMetadata ();
1310+ syncRefreshMetadata (false );
13111311 tableEntryRefreshContinuousFailureCount .set (0 );
13121312 } else if (e .isConnectInactive ()) {
13131313 // getMetaRefreshConnection failed, maybe the server is down, so we need to refresh metadata directly
1314- syncRefreshMetadata ();
1314+ syncRefreshMetadata (false );
13151315 tableEntryRefreshContinuousFailureCount .set (0 );
13161316 }
13171317 } catch (Throwable t ) {
@@ -1326,7 +1326,7 @@ public TableEntry getOrRefreshTableEntry(final String tableName, final boolean r
13261326 "refresh table entry has tried {}-times failure and will sync refresh metadata" ,
13271327 refreshTryTimes );
13281328 }
1329- syncRefreshMetadata ();
1329+ syncRefreshMetadata (false );
13301330 return refreshTableEntry (tableEntry , tableName );
13311331 }
13321332 return tableEntry ;
@@ -1405,7 +1405,7 @@ public TableEntry refreshTableLocationByTabletId(TableEntry tableEntry, String t
14051405 throw e ;
14061406 } catch (ObTableServerCacheExpiredException e ) {
14071407 RUNTIME .warn ("RefreshTableEntry encountered an exception" , e );
1408- syncRefreshMetadata ();
1408+ syncRefreshMetadata (false );
14091409 tableEntryRefreshContinuousFailureCount .set (0 );
14101410 } catch (ObTableEntryRefreshException e ) {
14111411 RUNTIME .error ("getOrRefreshTableEntry meet exception" , e );
@@ -1416,11 +1416,11 @@ public TableEntry refreshTableLocationByTabletId(TableEntry tableEntry, String t
14161416 if (tableEntryRefreshContinuousFailureCount .incrementAndGet () > tableEntryRefreshContinuousFailureCeiling ) {
14171417 logger .error (LCD .convert ("01-00019" ),
14181418 tableEntryRefreshContinuousFailureCeiling );
1419- syncRefreshMetadata ();
1419+ syncRefreshMetadata (false );
14201420 tableEntryRefreshContinuousFailureCount .set (0 );
14211421 } else if (e .isConnectInactive ()) {
14221422 // getMetaRefreshConnection failed, maybe the server is down, so we need to refresh metadata directly
1423- syncRefreshMetadata ();
1423+ syncRefreshMetadata (false );
14241424 tableEntryRefreshContinuousFailureCount .set (0 );
14251425 }
14261426 } catch (Throwable t ) {
@@ -2022,14 +2022,12 @@ public ObPair<Long, ObTableParam> getTableInternal(String tableName, TableEntry
20222022 RUNTIME .error ("Cannot get replica by partId: " + partId );
20232023 throw new ObTableGetException ("Cannot get replica by partId: " + partId );
20242024 }
2025+ int retryTimes = 0 ;
20252026 ObServerAddr addr = replica .getAddr ();
20262027 ObTable obTable = tableRoster .get (addr );
20272028 boolean addrExpired = addr .isExpired (serverAddressCachingTimeout );
2028- if (obTable == null || addrExpired ) {
2029- if (obTable == null ) {
2030- logger .warn ("Cannot get ObTable by addr {}, refreshing metadata." , addr );
2031- syncRefreshMetadata ();
2032- }
2029+ while ((obTable == null || addrExpired ) && retryTimes < 2 ) {
2030+ ++retryTimes ;
20332031 if (addr .isExpired (serverAddressCachingTimeout )) {
20342032 logger .info ("Server addr {} is expired, refreshing tableEntry." , addr );
20352033 if (ObGlobal .obVsnMajor () >= 4 ) {
@@ -2038,40 +2036,33 @@ public ObPair<Long, ObTableParam> getTableInternal(String tableName, TableEntry
20382036 tableEntry = getOrRefreshTableEntry (tableName , true , waitForRefresh , false );
20392037 }
20402038 }
2041-
2042- if (ObGlobal .obVsnMajor () >= 4 ) {
2043- obPartitionLocationInfo = getOrRefreshPartitionInfo (tableEntry , tableName , tabletId );
2044- replica = getPartitionLocation (obPartitionLocationInfo , route );
2045- } else {
2046- replica = getPartitionReplica (tableEntry , partitionId , route ).getRight ();
2047- }
2048-
2049- addr = replica .getAddr ();
2050- obTable = tableRoster .get (addr );
2051-
20522039 if (obTable == null ) {
2040+ // need to refresh table roster to ensure the current roster is the latest
2041+ syncRefreshMetadata (true );
20532042 // the addr is wrong, need to refresh location
2054- if (RUNTIME .isInfoEnabled ()) {
2055- RUNTIME . info ("Cannot get table by addr: " + addr );
2043+ if (logger .isInfoEnabled ()) {
2044+ logger . warn ("Cannot get ObTable by addr {}, refreshing metadata." , addr );
20562045 }
2046+ // refresh tablet location based on the latest roster, in case that some of the observers hase been killed
2047+ // and used the old location
20572048 tableEntry = refreshTableLocationByTabletId (tableEntry , tableName , tabletId );
2058- obPartitionLocationInfo = tableEntry .getPartitionEntry ().getPartitionInfo (tabletId );
2059- replica = getPartitionLocation (obPartitionLocationInfo , route );
2049+ if (ObGlobal .obVsnMajor () >= 4 ) {
2050+ obPartitionLocationInfo = getOrRefreshPartitionInfo (tableEntry , tableName , tabletId );
2051+ replica = getPartitionLocation (obPartitionLocationInfo , route );
2052+ } else {
2053+ replica = getPartitionReplica (tableEntry , partitionId , route ).getRight ();
2054+ }
20602055 if (replica == null ) {
20612056 RUNTIME .error ("Cannot get replica by partId: " + partId );
20622057 throw new ObTableGetException ("Cannot get replica by partId: " + partId );
20632058 }
20642059 addr = replica .getAddr ();
20652060 obTable = tableRoster .get (addr );
2066- if (obTable == null ) {
2067- syncRefreshMetadata ();
2068- obTable = tableRoster .get (addr );
2069- }
2070- if (obTable == null ) {
2071- throw new ObTableGetException ("obTable is null, addr is: " + addr .getIp () + ":" + addr .getSvrPort ());
2072- }
20732061 }
20742062 }
2063+ if (obTable == null ) {
2064+ throw new ObTableGetException ("obTable is null, addr is: " + addr .getIp () + ":" + addr .getSvrPort ());
2065+ }
20752066 ObTableParam param = createTableParam (obTable , tableEntry , obPartitionLocationInfo , partId , tabletId );
20762067 if (ObGlobal .obVsnMajor () >= 4 ) {
20772068 } else {
@@ -2335,33 +2326,45 @@ private List<ObPair<Long, ObTableParam>> getTables(String tableName, ObTableQuer
23352326 ReplicaLocation replica = partIdWithReplica .getRight ();
23362327 ObServerAddr addr = replica .getAddr ();
23372328 ObTable obTable = tableRoster .get (addr );
2329+ int retryTimes = 0 ;
23382330 boolean addrExpired = addr .isExpired (serverAddressCachingTimeout );
2339- if (addrExpired || obTable == null ) {
2340- if (obTable == null ) {
2341- logger .warn ("Cannot get ObTable by addr {}, refreshing metadata." , addr );
2342- syncRefreshMetadata ();
2343- }
2344- if (addrExpired ) {
2331+ while ((obTable == null || addrExpired ) && retryTimes < 2 ) {
2332+ ++retryTimes ;
2333+ if (addr .isExpired (serverAddressCachingTimeout )) {
23452334 logger .info ("Server addr {} is expired, refreshing tableEntry." , addr );
23462335 if (ObGlobal .obVsnMajor () >= 4 ) {
23472336 refreshTableLocationByTabletId (tableEntry , tableName , tabletId );
23482337 } else {
23492338 tableEntry = getOrRefreshTableEntry (tableName , true , waitForRefresh , false );
23502339 }
23512340 }
2352- if (ObGlobal .obVsnMajor () >= 4 ) {
2353- ObPartitionLocationInfo locationInfo = getOrRefreshPartitionInfo (tableEntry , tableName , tabletId );
2354- replica = getPartitionLocation (locationInfo , route );
2355- } else {
2356- replica = getPartitionLocation (tableEntry , partId , route );
2341+ if (obTable == null ) {
2342+ // need to refresh table roster to ensure the current roster is the latest
2343+ syncRefreshMetadata (true );
2344+ // the addr is wrong, need to refresh location
2345+ if (logger .isInfoEnabled ()) {
2346+ logger .warn ("Cannot get ObTable by addr {}, refreshing metadata." , addr );
2347+ }
2348+ // refresh tablet location based on the latest roster, in case that some of the observers hase been killed
2349+ // and used the old location
2350+ tableEntry = refreshTableLocationByTabletId (tableEntry , tableName , tabletId );
2351+ if (ObGlobal .obVsnMajor () >= 4 ) {
2352+ ObPartitionLocationInfo locationInfo = getOrRefreshPartitionInfo (tableEntry , tableName , tabletId );
2353+ replica = getPartitionLocation (locationInfo , route );
2354+ } else {
2355+ replica = getPartitionLocation (tableEntry , partId , route );
2356+ }
2357+ if (replica == null ) {
2358+ RUNTIME .error ("Cannot get replica by partId: " + partId );
2359+ throw new ObTableGetException ("Cannot get replica by partId: " + partId );
2360+ }
2361+ addr = replica .getAddr ();
2362+ obTable = tableRoster .get (addr );
23572363 }
2358- addr = replica .getAddr ();
2359- obTable = tableRoster .get (addr );
23602364 }
2361-
23622365 if (obTable == null ) {
23632366 RUNTIME .error ("cannot get table by addr: " + addr );
2364- throw new ObTableGetException ("cannot get table by addr: " + addr );
2367+ throw new ObTableGetException ("obTable is null, addr is : " + addr . getIp () + ":" + addr . getSvrPort () );
23652368 }
23662369
23672370 ObTableParam param = new ObTableParam (obTable );
@@ -2505,7 +2508,7 @@ public String tryGetTableNameFromTableGroupCache(final String tableGroupName,
25052508 if (logger .isInfoEnabled ()) {
25062509 logger .info ("server addr is expired and it will refresh metadata." );
25072510 }
2508- syncRefreshMetadata ();
2511+ syncRefreshMetadata (false );
25092512 } catch (Throwable t ) {
25102513 RUNTIME .error ("getOrRefreshTableName from TableGroup meet exception" , t );
25112514 throw t ;
0 commit comments