|
33 | 33 | import org.apache.geode.distributed.internal.DistributionConfig; |
34 | 34 | import org.apache.geode.distributed.internal.InternalDistributedSystem; |
35 | 35 | import org.apache.geode.internal.AvailablePortHelper; |
| 36 | +import org.apache.geode.internal.UniquePortSupplier; |
36 | 37 | import org.apache.geode.internal.cache.GemFireCacheImpl; |
37 | 38 | import org.apache.geode.internal.cache.InternalCache; |
38 | 39 | import org.apache.geode.internal.cache.tier.sockets.CacheServerHelper; |
@@ -198,61 +199,116 @@ public void startServer() { |
198 | 199 | servers = new ArrayList<>(); |
199 | 200 | } |
200 | 201 |
|
201 | | - // Close all keepers immediately before starting server |
202 | | - // This minimizes the race window to < 1ms (time between keeper.close() and service bind) |
203 | | - if (memberPortKeeper != null) { |
204 | | - memberPortKeeper.close(); |
205 | | - memberPortKeeper = null; |
206 | | - } |
207 | | - if (jmxPortKeeper != null) { |
208 | | - jmxPortKeeper.close(); |
209 | | - jmxPortKeeper = null; |
210 | | - } |
211 | | - if (httpPortKeeper != null) { |
212 | | - httpPortKeeper.close(); |
213 | | - httpPortKeeper = null; |
214 | | - } |
| 202 | + int maxRetries = 3; |
| 203 | + RuntimeException lastException = null; |
215 | 204 |
|
216 | | - CacheFactory cf = new CacheFactory(properties); |
217 | | - if (pdxPersistentUserSet) { |
218 | | - cf.setPdxPersistent(pdxPersistent); |
219 | | - } |
220 | | - if (pdxReadSerializedUserSet) { |
221 | | - cf.setPdxReadSerialized(pdxReadSerialized); |
222 | | - } |
223 | | - if (pdxSerializer != null) { |
224 | | - cf.setPdxSerializer(pdxSerializer); |
225 | | - } |
226 | | - cache = (InternalCache) cf.create(); |
227 | | - DistributionConfig config = |
228 | | - ((InternalDistributedSystem) cache.getDistributedSystem()).getConfig(); |
229 | | - jmxPort = config.getJmxManagerPort(); |
230 | | - httpPort = config.getHttpServicePort(); |
231 | | - |
232 | | - for (int i = 0; i < serverCount; i++) { |
233 | | - CacheServer server = cache.addCacheServer(); |
234 | | - if (i == 0) { |
235 | | - CacheServerHelper.setIsDefaultServer(server); |
236 | | - } |
237 | | - // memberPort is by default zero, which translates to "randomly select an available port," |
238 | | - // which is why it is updated after this try block |
239 | | - if (serverCount == 1) { |
240 | | - server.setPort(memberPort); |
241 | | - } else { |
242 | | - server.setPort(0); |
243 | | - } |
244 | | - if (maxThreads >= 0) { |
245 | | - server.setMaxThreads(maxThreads); |
246 | | - } |
| 205 | + for (int attempt = 1; attempt <= maxRetries; attempt++) { |
247 | 206 | try { |
248 | | - server.start(); |
249 | | - } catch (IOException e) { |
250 | | - throw new RuntimeException("unable to start server", e); |
| 207 | + // Close all keepers immediately before starting server |
| 208 | + // This minimizes the race window to < 1ms (time between keeper.close() and service bind) |
| 209 | + if (memberPortKeeper != null) { |
| 210 | + memberPortKeeper.close(); |
| 211 | + memberPortKeeper = null; |
| 212 | + } |
| 213 | + if (jmxPortKeeper != null) { |
| 214 | + jmxPortKeeper.close(); |
| 215 | + jmxPortKeeper = null; |
| 216 | + } |
| 217 | + if (httpPortKeeper != null) { |
| 218 | + httpPortKeeper.close(); |
| 219 | + httpPortKeeper = null; |
| 220 | + } |
| 221 | + |
| 222 | + CacheFactory cf = new CacheFactory(properties); |
| 223 | + if (pdxPersistentUserSet) { |
| 224 | + cf.setPdxPersistent(pdxPersistent); |
| 225 | + } |
| 226 | + if (pdxReadSerializedUserSet) { |
| 227 | + cf.setPdxReadSerialized(pdxReadSerialized); |
| 228 | + } |
| 229 | + if (pdxSerializer != null) { |
| 230 | + cf.setPdxSerializer(pdxSerializer); |
| 231 | + } |
| 232 | + cache = (InternalCache) cf.create(); |
| 233 | + DistributionConfig config = |
| 234 | + ((InternalDistributedSystem) cache.getDistributedSystem()).getConfig(); |
| 235 | + jmxPort = config.getJmxManagerPort(); |
| 236 | + httpPort = config.getHttpServicePort(); |
| 237 | + |
| 238 | + for (int i = 0; i < serverCount; i++) { |
| 239 | + CacheServer server = cache.addCacheServer(); |
| 240 | + if (i == 0) { |
| 241 | + CacheServerHelper.setIsDefaultServer(server); |
| 242 | + } |
| 243 | + // memberPort is by default zero, which translates to "randomly select an available port," |
| 244 | + // which is why it is updated after this try block |
| 245 | + if (serverCount == 1) { |
| 246 | + server.setPort(memberPort); |
| 247 | + } else { |
| 248 | + server.setPort(0); |
| 249 | + } |
| 250 | + if (maxThreads >= 0) { |
| 251 | + server.setMaxThreads(maxThreads); |
| 252 | + } |
| 253 | + try { |
| 254 | + server.start(); |
| 255 | + } catch (IOException e) { |
| 256 | + // Check if this is a retryable port binding failure |
| 257 | + if (e.getMessage() != null && e.getMessage().contains("Address already in use") |
| 258 | + && attempt < maxRetries && serverCount == 1) { |
| 259 | + // Port was stolen - clean up and retry with new ports |
| 260 | + cache.close(); |
| 261 | + cache = null; |
| 262 | + servers.clear(); |
| 263 | + throw new RuntimeException("Port binding failed, will retry", e); |
| 264 | + } |
| 265 | + throw new RuntimeException("unable to start server", e); |
| 266 | + } |
| 267 | + // if this member has multiple cache servers, the memberPort will be the last server's |
| 268 | + // port started. |
| 269 | + memberPort = server.getPort(); |
| 270 | + servers.add(server); |
| 271 | + } |
| 272 | + return; // Success - exit retry loop |
| 273 | + |
| 274 | + } catch (RuntimeException e) { |
| 275 | + lastException = e; |
| 276 | + if (e.getMessage() != null && e.getMessage().contains("Port binding failed, will retry") |
| 277 | + && attempt < maxRetries) { |
| 278 | + // Allocate new ports and retry |
| 279 | + UniquePortSupplier portSupplier = new UniquePortSupplier(); |
| 280 | + memberPort = portSupplier.getAvailablePort(); |
| 281 | + try { |
| 282 | + memberPortKeeper = new PortKeeper(memberPort); |
| 283 | + // Don't reallocate JMX/HTTP ports if they're user-specified (>0) |
| 284 | + if (jmxPort <= 0) { |
| 285 | + int newJmxPort = portSupplier.getAvailablePort(); |
| 286 | + jmxPortKeeper = new PortKeeper(newJmxPort); |
| 287 | + if (jmxPort == 0) { |
| 288 | + properties.setProperty(DistributionConfig.JMX_MANAGER_PORT_NAME, |
| 289 | + String.valueOf(newJmxPort)); |
| 290 | + } |
| 291 | + } |
| 292 | + if (httpPort <= 0) { |
| 293 | + int newHttpPort = portSupplier.getAvailablePort(); |
| 294 | + httpPortKeeper = new PortKeeper(newHttpPort); |
| 295 | + if (httpPort == 0) { |
| 296 | + properties.setProperty(DistributionConfig.HTTP_SERVICE_PORT_NAME, |
| 297 | + String.valueOf(newHttpPort)); |
| 298 | + } |
| 299 | + } |
| 300 | + } catch (IOException retryException) { |
| 301 | + // Failed to allocate new ports - will retry with what we have |
| 302 | + } |
| 303 | + continue; // Retry with new ports |
| 304 | + } |
| 305 | + // Not a retryable error or max retries reached |
| 306 | + throw e; |
251 | 307 | } |
252 | | - // if this member has multiple cache servers, the memberPort will be the last server's port |
253 | | - // started. |
254 | | - memberPort = server.getPort(); |
255 | | - servers.add(server); |
| 308 | + } |
| 309 | + |
| 310 | + if (cache == null && lastException != null) { |
| 311 | + throw lastException; |
256 | 312 | } |
257 | 313 | } |
258 | 314 |
|
|
0 commit comments