Skip to content

Commit 1bb9994

Browse files
authored
Merge pull request #33 from JaD1ng/fix/remove-disk
Fix/remove disk
2 parents 8ffa621 + b213234 commit 1bb9994

File tree

8 files changed

+300
-130
lines changed

8 files changed

+300
-130
lines changed

mock/s3/API.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1149,7 +1149,7 @@ curl -X POST http://localhost:8085/api/v1/metric-anomaly \
11491149
"metric_name": "system_machine_online_status",
11501150
"anomaly_type": "machine_down",
11511151
"enabled": true,
1152-
"target_value": 0.0,
1152+
"target_value": 80.0,
11531153
"duration": 60000000000,
11541154
"max_triggers": 1
11551155
}'

mock/s3/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
|---------|---------|----------|
3535
| **CPU峰值** | 真实CPU密集计算 | CPU使用率、负载、响应时间 |
3636
| **内存泄露** | 实际分配内存不释放 | 内存使用率、GC频率、OOM事件 |
37-
| **磁盘满载** | 创建大文件占用磁盘 | 磁盘使用率、I/O延迟、写入失败 |
3837
| **网络风暴** | 大量并发连接 | 网络带宽、连接数、超时率 |
3938
| **服务宕机** | 完整服务停止响应 | 服务健康状态、请求成功率 |
4039

mock/s3/deployments/observability/grafana/dashboards/mock-s3-services-metrics.json

Lines changed: 6 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -200,42 +200,6 @@
200200
"y": 9
201201
}
202202
},
203-
{
204-
"id": 5,
205-
"title": "Services Disk Usage",
206-
"type": "timeseries",
207-
"targets": [
208-
{
209-
"expr": "system_disk_usage_percent_percent{service=~\"$service\"}",
210-
"legendFormat": "{{service}} Disk",
211-
"refId": "A"
212-
}
213-
],
214-
"fieldConfig": {
215-
"defaults": {
216-
"color": {
217-
"mode": "palette-classic"
218-
},
219-
"custom": {
220-
"axisPlacement": "auto",
221-
"axisLabel": "Disk Usage (%)",
222-
"drawStyle": "line",
223-
"lineInterpolation": "smooth",
224-
"lineWidth": 2,
225-
"fillOpacity": 20
226-
},
227-
"unit": "percent",
228-
"min": 0,
229-
"max": 100
230-
}
231-
},
232-
"gridPos": {
233-
"h": 9,
234-
"w": 12,
235-
"x": 0,
236-
"y": 17
237-
}
238-
},
239203
{
240204
"id": 6,
241205
"title": "Services Network QPS",
@@ -266,8 +230,8 @@
266230
},
267231
"gridPos": {
268232
"h": 9,
269-
"w": 12,
270-
"x": 12,
233+
"w": 24,
234+
"x": 0,
271235
"y": 17
272236
}
273237
},
@@ -337,11 +301,6 @@
337301
"expr": "system_memory_usage_percent_percent{service=~\"$service\"}",
338302
"legendFormat": "{{service}} Memory",
339303
"refId": "B"
340-
},
341-
{
342-
"expr": "system_disk_usage_percent_percent{service=~\"$service\"}",
343-
"legendFormat": "{{service}} Disk",
344-
"refId": "C"
345304
}
346305
],
347306
"fieldConfig": {
@@ -405,24 +364,17 @@
405364
"instant": true,
406365
"format": "table"
407366
},
408-
{
409-
"expr": "system_disk_usage_percent_percent{service=~\"$service\"}",
410-
"legendFormat": "",
411-
"refId": "C",
412-
"instant": true,
413-
"format": "table"
414-
},
415367
{
416368
"expr": "system_network_qps_per_second{service=~\"$service\"}",
417369
"legendFormat": "",
418-
"refId": "D",
370+
"refId": "C",
419371
"instant": true,
420372
"format": "table"
421373
},
422374
{
423375
"expr": "system_machine_online_status{service=~\"$service\"}",
424376
"legendFormat": "",
425-
"refId": "E",
377+
"refId": "D",
426378
"instant": true,
427379
"format": "table"
428380
}
@@ -447,9 +399,8 @@
447399
"service": "Service",
448400
"Value #A": "CPU (%)",
449401
"Value #B": "Memory (%)",
450-
"Value #C": "Disk (%)",
451-
"Value #D": "Network QPS",
452-
"Value #E": "Status"
402+
"Value #C": "Network QPS",
403+
"Value #D": "Status"
453404
}
454405
}
455406
}

mock/s3/shared/middleware/error_injection/cpu_spike_injector.go

Lines changed: 108 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,26 @@ package error_injection
33
import (
44
"context"
55
"mocks3/shared/observability"
6+
"os"
67
"runtime"
8+
"strconv"
9+
"strings"
710
"sync"
811
"time"
912
)
1013

1114
// CPUSpikeInjector CPU峰值异常注入器
1215
type CPUSpikeInjector struct {
13-
logger *observability.Logger
14-
isActive bool
15-
mu sync.RWMutex
16-
stopChan chan struct{}
17-
goroutines []chan struct{}
16+
logger *observability.Logger
17+
isActive bool
18+
mu sync.RWMutex
19+
stopChan chan struct{}
20+
goroutines []chan struct{}
21+
targetPercent float64 // 目标CPU使用率
22+
// CPU统计
23+
lastTotal uint64
24+
lastIdle uint64
25+
lastUpdate time.Time
1826
}
1927

2028
// NewCPUSpikeInjector 创建CPU峰值异常注入器
@@ -36,6 +44,7 @@ func (c *CPUSpikeInjector) StartCPUSpike(ctx context.Context, targetCPUPercent f
3644
}
3745

3846
c.isActive = true
47+
c.targetPercent = targetCPUPercent
3948
c.logger.Info(ctx, "Starting CPU spike injection",
4049
observability.Float64("target_cpu_percent", targetCPUPercent),
4150
observability.String("duration", duration.String()))
@@ -76,6 +85,7 @@ func (c *CPUSpikeInjector) StopCPUSpike(ctx context.Context) {
7685

7786
c.logger.Info(ctx, "Stopping CPU spike injection")
7887
c.isActive = false
88+
c.targetPercent = 0
7989

8090
// 停止所有CPU密集型协程
8191
for _, stopChan := range c.goroutines {
@@ -91,23 +101,112 @@ func (c *CPUSpikeInjector) IsActive() bool {
91101
return c.isActive
92102
}
93103

104+
// GetCurrentCPUUsage 获取当前CPU使用率
105+
func (c *CPUSpikeInjector) GetCurrentCPUUsage() float64 {
106+
c.mu.RLock()
107+
defer c.mu.RUnlock()
108+
109+
if !c.isActive {
110+
return c.readSystemCPUUsage()
111+
}
112+
113+
// CPU注入活跃时,读取真实的CPU使用率
114+
return c.readSystemCPUUsage()
115+
}
116+
117+
// readSystemCPUUsage 读取系统真实CPU使用率
118+
func (c *CPUSpikeInjector) readSystemCPUUsage() float64 {
119+
// 读取/proc/stat获取CPU统计
120+
data, err := os.ReadFile("/proc/stat")
121+
if err != nil {
122+
return 0.0
123+
}
124+
125+
lines := strings.Split(string(data), "\n")
126+
if len(lines) == 0 {
127+
return 0.0
128+
}
129+
130+
// 解析第一行 CPU总计
131+
fields := strings.Fields(lines[0])
132+
if len(fields) < 8 || fields[0] != "cpu" {
133+
return 0.0
134+
}
135+
136+
// 解析CPU时间值
137+
var values [7]uint64
138+
for i := 0; i < 7; i++ {
139+
val, err := strconv.ParseUint(fields[i+1], 10, 64)
140+
if err != nil {
141+
return 0.0
142+
}
143+
values[i] = val
144+
}
145+
146+
// 计算总时间和空闲时间
147+
total := values[0] + values[1] + values[2] + values[3] + values[4] + values[5] + values[6]
148+
idle := values[3] + values[4] // idle + iowait
149+
150+
now := time.Now()
151+
152+
// 第一次读取,保存基准值
153+
if c.lastTotal == 0 {
154+
c.lastTotal = total
155+
c.lastIdle = idle
156+
c.lastUpdate = now
157+
return 0.0
158+
}
159+
160+
// 计算时间差值
161+
totalDiff := total - c.lastTotal
162+
idleDiff := idle - c.lastIdle
163+
164+
// 更新基准值
165+
c.lastTotal = total
166+
c.lastIdle = idle
167+
c.lastUpdate = now
168+
169+
// 计算CPU使用率
170+
if totalDiff > 0 {
171+
cpuUsage := float64(totalDiff-idleDiff) / float64(totalDiff) * 100.0
172+
if cpuUsage < 0 {
173+
cpuUsage = 0
174+
}
175+
if cpuUsage > 100 {
176+
cpuUsage = 100
177+
}
178+
return cpuUsage
179+
}
180+
181+
return 0.0
182+
}
183+
94184
// cpuIntensiveTask CPU密集型任务
95185
func (c *CPUSpikeInjector) cpuIntensiveTask(stopChan chan struct{}) {
96186
for {
97187
select {
98188
case <-stopChan:
99189
return
100190
default:
101-
// 执行CPU密集型计算
102-
for i := 0; i < 10000; i++ {
191+
// 执行CPU密集型计算,减少频繁让出CPU
192+
for i := 0; i < 1000000; i++ {
103193
_ = i * i * i
104194
}
105-
// 短暂让出CPU,避免完全阻塞
106-
runtime.Gosched()
195+
// 降低让出CPU的频率,每完成大量计算后才让出
196+
if c.shouldYield() {
197+
runtime.Gosched()
198+
}
107199
}
108200
}
109201
}
110202

203+
// shouldYield 控制让出CPU的频率,避免过于频繁的调度
204+
func (c *CPUSpikeInjector) shouldYield() bool {
205+
// 只有在非常必要时才让出CPU,大幅减少调度频率
206+
// 可以根据系统负载动态调整
207+
return false // 暂时完全禁用主动让出,让系统调度器控制
208+
}
209+
111210
// Cleanup 清理资源
112211
func (c *CPUSpikeInjector) Cleanup() {
113212
close(c.stopChan)

0 commit comments

Comments
 (0)