Skip to content

Commit c41a65f

Browse files
committed
增加一个自动 dump 的功能试试
1 parent e936b91 commit c41a65f

File tree

5 files changed

+260
-12
lines changed

5 files changed

+260
-12
lines changed

server/server/meta/src/main/java/com/alipay/sofa/registry/server/meta/bootstrap/MetaServerBootstrap.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,19 @@
4141
import com.github.rholder.retry.StopStrategies;
4242
import com.github.rholder.retry.WaitStrategies;
4343
import com.google.common.base.Predicate;
44+
import java.lang.annotation.Annotation;
45+
import java.util.*;
46+
import java.util.concurrent.TimeUnit;
47+
import java.util.concurrent.atomic.AtomicBoolean;
48+
import javax.annotation.Resource;
49+
import javax.ws.rs.Path;
50+
import javax.ws.rs.ext.Provider;
4451
import org.apache.commons.collections.CollectionUtils;
4552
import org.apache.commons.lang.StringUtils;
4653
import org.glassfish.jersey.server.ResourceConfig;
4754
import org.springframework.beans.factory.annotation.Autowired;
4855
import org.springframework.context.ApplicationContext;
4956

50-
import javax.annotation.Resource;
51-
import javax.ws.rs.Path;
52-
import javax.ws.rs.ext.Provider;
53-
import java.lang.annotation.Annotation;
54-
import java.util.*;
55-
import java.util.concurrent.TimeUnit;
56-
import java.util.concurrent.atomic.AtomicBoolean;
57-
5857
/**
5958
* @author shangyu.wh
6059
* @version $Id: MetaServerBootstrap.java, v 0.1 2018-01-16 11:28 shangyu.wh Exp $

server/server/meta/src/main/java/com/alipay/sofa/registry/server/meta/resource/DataCenterResource.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,5 +53,4 @@ public Result queryBlackList() {
5353
return Result.failed("Query meta local datacenter exception");
5454
}
5555
}
56-
5756
}

server/server/session/src/main/java/com/alipay/sofa/registry/server/session/push/PushEfficiencyConfigUpdater.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,14 @@
1818

1919
import com.alipay.sofa.registry.server.session.resource.ClientManagerResource;
2020
import com.google.common.annotations.VisibleForTesting;
21+
import java.util.concurrent.locks.Lock;
22+
import java.util.concurrent.locks.ReentrantLock;
2123
import org.slf4j.Logger;
2224
import org.slf4j.LoggerFactory;
2325
import org.springframework.beans.factory.annotation.Autowired;
2426
import org.springframework.context.SmartLifecycle;
2527
import org.springframework.stereotype.Component;
2628

27-
import java.util.concurrent.locks.Lock;
28-
import java.util.concurrent.locks.ReentrantLock;
29-
3029
/**
3130
* @author huicha
3231
* @date 2025/7/24
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package com.alipay.sofa.registry.server.session.push;
18+
19+
import java.util.concurrent.TimeUnit;
20+
import org.junit.Test;
21+
22+
/**
23+
* @author huicha
24+
* @date 2025/12/31
25+
*/
26+
public class UTHeapWatcherTest {
27+
28+
/** 这个方法用于监控堆内存使用情况 异常的话,尝试 Dump */
29+
@Test
30+
public void test() {
31+
UnitTestGCMonitor unitTestGCMonitor = new UnitTestGCMonitor("/tmp", 100, 10, TimeUnit.SECONDS);
32+
unitTestGCMonitor.startMonitoring();
33+
}
34+
}
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package com.alipay.sofa.registry.server.session.push;
18+
19+
import com.sun.management.HotSpotDiagnosticMXBean;
20+
import java.lang.management.GarbageCollectorMXBean;
21+
import java.lang.management.ManagementFactory;
22+
import java.nio.file.Files;
23+
import java.nio.file.Path;
24+
import java.nio.file.Paths;
25+
import java.util.HashMap;
26+
import java.util.List;
27+
import java.util.Map;
28+
import java.util.concurrent.TimeUnit;
29+
import java.util.concurrent.locks.Lock;
30+
import java.util.concurrent.locks.LockSupport;
31+
import java.util.concurrent.locks.ReentrantLock;
32+
import org.slf4j.Logger;
33+
import org.slf4j.LoggerFactory;
34+
35+
public class UnitTestGCMonitor implements Runnable {
36+
37+
private static final Logger LOGGER = LoggerFactory.getLogger(UnitTestGCMonitor.class);
38+
39+
// Heap Dump 保存路径
40+
private final String dumpDir;
41+
42+
// GC 次数阈值
43+
private final long gcCountThreshold;
44+
45+
// 监控间隔(单位:毫秒)
46+
private final long monitorIntervalMs;
47+
48+
private Lock lock;
49+
50+
private boolean running;
51+
52+
private Thread monitoryThread;
53+
54+
private final Map<String, Long> countMap;
55+
56+
public UnitTestGCMonitor(
57+
String dumpDir, long gcCountThreshold, long monitorInterval, TimeUnit timeUnit) {
58+
this.dumpDir = dumpDir;
59+
this.gcCountThreshold = gcCountThreshold;
60+
this.monitorIntervalMs = timeUnit.toMillis(monitorInterval);
61+
this.lock = new ReentrantLock(false);
62+
this.running = false;
63+
this.countMap = new HashMap<>();
64+
}
65+
66+
public void startMonitoring() {
67+
this.lock.lock();
68+
try {
69+
if (this.running) {
70+
LOGGER.error("[UnitTestGCMonitor] GC 监控已经开启了");
71+
return;
72+
}
73+
74+
LOGGER.error("[UnitTestGCMonitor] 开始监控 GC 情况");
75+
76+
Thread monitoryThread = new Thread(this, "UnitTestGCMonitorThread");
77+
monitoryThread.start();
78+
79+
this.monitoryThread = monitoryThread;
80+
} finally {
81+
this.lock.unlock();
82+
}
83+
}
84+
85+
public void stopMonitoring() {
86+
this.lock.lock();
87+
try {
88+
if (!this.running) {
89+
LOGGER.error("[UnitTestGCMonitor] GC 监控已经停止");
90+
return;
91+
}
92+
93+
// 中断线程,然后退出
94+
this.monitoryThread.interrupt();
95+
this.monitoryThread = null;
96+
} finally {
97+
this.lock.unlock();
98+
}
99+
}
100+
101+
@Override
102+
public void run() {
103+
try {
104+
while (true) {
105+
List<GarbageCollectorMXBean> gcBeans = ManagementFactory.getGarbageCollectorMXBeans();
106+
for (GarbageCollectorMXBean gcBean : gcBeans) {
107+
String gcBeanName = gcBean.getName();
108+
long currentGCCount = gcBean.getCollectionCount();
109+
110+
LOGGER.error(
111+
"[UnitTestGCMonitor] 获取到 GC Bean: {}, 当前 GC 次数: {}", gcBeanName, currentGCCount);
112+
113+
if (!this.isFGC(gcBeanName)) {
114+
continue;
115+
}
116+
117+
LOGGER.error("[UnitTestGCMonitor] GC Bean: {} 是涉及 FGC 的 GC Bean!", gcBeanName);
118+
119+
// 获取上一次的 GC 次数
120+
long oldGCCount = this.countMap.getOrDefault(gcBeanName, 0L);
121+
122+
// 更新一下新的
123+
this.countMap.put(gcBeanName, currentGCCount);
124+
125+
LOGGER.error(
126+
"[UnitTestGCMonitor] 获取到 GC Bean: {}, 上一次 GC 次数: {}, 当前 GC 次数: {}",
127+
gcBeanName,
128+
oldGCCount,
129+
currentGCCount);
130+
131+
// 计算一下期间的 GC 次数
132+
long count = currentGCCount - oldGCCount;
133+
if (count < this.gcCountThreshold) {
134+
continue;
135+
}
136+
137+
String dumpPath = this.dumpDir + "/" + gcBeanName + ".hprof";
138+
139+
if (Files.exists(Paths.get(dumpPath))) {
140+
LOGGER.error(
141+
"[UnitTestGCMonitor] 获取到 GC Bean: {}, Heap Dump 文件已经存在: {},跳过 Heap Dump",
142+
gcBeanName,
143+
dumpPath);
144+
continue;
145+
}
146+
147+
LOGGER.error(
148+
"[UnitTestGCMonitor] 获取到 GC Bean: {}, 上一次 GC 次数: {}, 当前 GC 次数: {}, 触发 Heap Dump: {}",
149+
gcBeanName,
150+
oldGCCount,
151+
currentGCCount,
152+
dumpPath);
153+
if (this.generateHeapDump(dumpPath)) {
154+
LOGGER.error(
155+
"[UnitTestGCMonitor] 获取到 GC Bean: {}, Heap Dump 成功: {}", gcBeanName, dumpPath);
156+
return;
157+
} else {
158+
LOGGER.error(
159+
"[UnitTestGCMonitor] 获取到 GC Bean: {}, Heap Dump 失败: {}", gcBeanName, dumpPath);
160+
}
161+
}
162+
163+
// 全部检查完成,等待下一个监控周期
164+
LockSupport.parkNanos(TimeUnit.MILLISECONDS.toNanos(monitorIntervalMs));
165+
166+
if (Thread.interrupted()) {
167+
// 响应中断
168+
LOGGER.error("[UnitTestGCMonitor] 监控 GC 线程被中断,退出!");
169+
}
170+
}
171+
} catch (Throwable throwable) {
172+
LOGGER.error("[UnitTestGCMonitor] 监控 GC 线程执行异常,退出" + throwable);
173+
}
174+
}
175+
176+
private boolean isFGC(String gcBeanName) {
177+
// 涵盖 JDK 8 常见收集器的 Full GC/Major GC Bean 名称
178+
return gcBeanName.contains("MarkSweep")
179+
|| // Parallel GC (PS MarkSweep) 或 Serial GC
180+
gcBeanName.contains("Old")
181+
|| // G1 Old Generation
182+
gcBeanName.contains("ConcurrentMarkSweep"); // CMS
183+
}
184+
185+
private boolean generateHeapDump(String dumpPath) {
186+
try {
187+
// 尝试获取 HotSpotDiagnosticMXBean
188+
HotSpotDiagnosticMXBean hotSpotBean =
189+
ManagementFactory.newPlatformMXBeanProxy(
190+
ManagementFactory.getPlatformMBeanServer(),
191+
"com.sun.management:type=HotSpotDiagnostic",
192+
HotSpotDiagnosticMXBean.class);
193+
194+
if (null == hotSpotBean) {
195+
LOGGER.error("[UnitTestGCMonitor] 获取 HotSpotDiagnosticMXBean 失败");
196+
return false;
197+
}
198+
199+
Path filePath = Paths.get(dumpPath);
200+
Path dirPath = filePath.getParent();
201+
202+
if (!Files.exists(dirPath)) {
203+
LOGGER.error("[UnitTestGCMonitor] Heap Dump 路径不存在,创建一次: {}", dirPath.toAbsolutePath());
204+
Files.createDirectories(dirPath);
205+
}
206+
207+
// 执行 Dump
208+
hotSpotBean.dumpHeap(dumpPath, true);
209+
210+
LOGGER.error("[UnitTestGCMonitor] Heap Dump 成功: {}", dumpPath);
211+
return true;
212+
} catch (Throwable throwable) {
213+
LOGGER.error("[UnitTestGCMonitor] Heap Dump 异常: {}", dumpPath, throwable);
214+
return false;
215+
}
216+
}
217+
}

0 commit comments

Comments
 (0)