Skip to content

Commit 22df443

Browse files
committed
增加一些测试脚本
1 parent 41ba9ab commit 22df443

File tree

3 files changed

+309
-1
lines changed

3 files changed

+309
-1
lines changed

mock/s3/deployments/observability/prometheus.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ alerting:
1414
alertmanagers:
1515
- static_configs:
1616
- targets:
17-
- 'host.docker.internal:8081' # Prometheus Adapter 运行在宿主机的 8081 端口
17+
- '10.210.10.33:9999' # Prometheus Adapter 运行在 9999 端口
1818
api_version: v2 # 使用 Alertmanager API v2
1919

2020
scrape_configs:

simple_webhook_server.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
#!/usr/bin/env python3
2+
"""
3+
简单的 Webhook 服务器 - 使用标准库接收 Alertmanager 告警
4+
"""
5+
6+
from http.server import HTTPServer, BaseHTTPRequestHandler
7+
import json
8+
from datetime import datetime
9+
import threading
10+
import time
11+
12+
# 存储接收到的告警
13+
alerts_received = []
14+
alerts_lock = threading.Lock()
15+
16+
class WebhookHandler(BaseHTTPRequestHandler):
17+
def do_POST(self):
18+
"""处理 POST 请求"""
19+
if self.path == '/v1/integrations/alertmanager/webhook':
20+
try:
21+
# 读取请求体
22+
content_length = int(self.headers['Content-Length'])
23+
post_data = self.rfile.read(content_length)
24+
25+
# 解析 JSON
26+
data = json.loads(post_data.decode('utf-8'))
27+
28+
# 记录告警
29+
timestamp = datetime.now().isoformat()
30+
with alerts_lock:
31+
alert_record = {
32+
"timestamp": timestamp,
33+
"data": data
34+
}
35+
alerts_received.append(alert_record)
36+
37+
# 只保留最近100条
38+
if len(alerts_received) > 100:
39+
alerts_received.pop(0)
40+
41+
# 打印到控制台
42+
print(f"\n[{timestamp}] 收到告警:")
43+
print(json.dumps(data, indent=2, ensure_ascii=False))
44+
45+
# 提取并显示关键信息
46+
if 'alerts' in data:
47+
print("\n告警摘要:")
48+
for alert in data['alerts']:
49+
alert_name = alert.get('labels', {}).get('alertname', 'Unknown')
50+
status = alert.get('status', 'Unknown')
51+
severity = alert.get('labels', {}).get('severity', 'Unknown')
52+
service = alert.get('labels', {}).get('service', 'N/A')
53+
print(f" - {alert_name}: {status} (severity: {severity}, service: {service})")
54+
55+
print("-" * 50)
56+
57+
# 返回成功响应
58+
self.send_response(200)
59+
self.send_header('Content-Type', 'application/json')
60+
self.end_headers()
61+
response = {"status": "success", "message": "Alert received"}
62+
self.wfile.write(json.dumps(response).encode())
63+
64+
except Exception as e:
65+
print(f"Error processing alert: {e}")
66+
self.send_response(400)
67+
self.send_header('Content-Type', 'application/json')
68+
self.end_headers()
69+
response = {"status": "error", "message": str(e)}
70+
self.wfile.write(json.dumps(response).encode())
71+
else:
72+
self.send_response(404)
73+
self.end_headers()
74+
self.wfile.write(b"Not Found")
75+
76+
def do_GET(self):
77+
"""处理 GET 请求"""
78+
if self.path == '/alerts':
79+
# 返回接收到的告警列表
80+
self.send_response(200)
81+
self.send_header('Content-Type', 'application/json')
82+
self.end_headers()
83+
with alerts_lock:
84+
self.wfile.write(json.dumps(alerts_received, indent=2).encode())
85+
86+
elif self.path == '/health':
87+
# 健康检查
88+
self.send_response(200)
89+
self.end_headers()
90+
self.wfile.write(b"OK")
91+
92+
elif self.path == '/':
93+
# 首页
94+
self.send_response(200)
95+
self.send_header('Content-Type', 'text/html')
96+
self.end_headers()
97+
html = """
98+
<html>
99+
<head><title>Webhook Server</title></head>
100+
<body>
101+
<h1>Mock Webhook Server</h1>
102+
<p>Webhook endpoint: POST /v1/integrations/alertmanager/webhook</p>
103+
<p>View alerts: <a href="/alerts">GET /alerts</a></p>
104+
<p>Health check: <a href="/health">GET /health</a></p>
105+
<hr>
106+
<p>Alerts received: {}</p>
107+
</body>
108+
</html>
109+
""".format(len(alerts_received))
110+
self.wfile.write(html.encode())
111+
else:
112+
self.send_response(404)
113+
self.end_headers()
114+
self.wfile.write(b"Not Found")
115+
116+
def log_message(self, format, *args):
117+
"""自定义日志格式"""
118+
return # 禁用默认的日志输出,避免太多噪音
119+
120+
def run_server(port=8080):
121+
"""运行服务器"""
122+
server_address = ('', port)
123+
httpd = HTTPServer(server_address, WebhookHandler)
124+
125+
print("=" * 60)
126+
print("Mock Webhook Server 已启动")
127+
print(f"监听地址: http://0.0.0.0:{port}")
128+
print(f"Webhook 端点: POST /v1/integrations/alertmanager/webhook")
129+
print(f"查看告警: GET /alerts")
130+
print(f"健康检查: GET /health")
131+
print("=" * 60)
132+
print("\n等待接收告警...\n")
133+
134+
try:
135+
httpd.serve_forever()
136+
except KeyboardInterrupt:
137+
print("\n服务器已停止")
138+
httpd.shutdown()
139+
140+
if __name__ == '__main__':
141+
import sys
142+
143+
# 检查是否指定端口
144+
port = 8080
145+
if len(sys.argv) > 1:
146+
try:
147+
port = int(sys.argv[1])
148+
except ValueError:
149+
print(f"无效的端口: {sys.argv[1]}")
150+
sys.exit(1)
151+
152+
# 检查端口是否被占用
153+
import socket
154+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
155+
result = sock.connect_ex(('127.0.0.1', port))
156+
sock.close()
157+
158+
if result == 0:
159+
print(f"警告: 端口 {port} 已被占用")
160+
print("你可以:")
161+
print(f"1. 使用其他端口: python3 {sys.argv[0]} 8081")
162+
print(f"2. 或者停止占用端口 {port} 的服务")
163+
response = input(f"\n是否继续在端口 {port} 上启动? (y/N): ")
164+
if response.lower() != 'y':
165+
sys.exit(0)
166+
167+
run_server(port)

test_complete_alert_flow.sh

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#!/bin/bash
2+
3+
# 禁用代理
4+
export no_proxy="10.210.10.33,10.99.181.164,localhost,127.0.0.1"
5+
export NO_PROXY="10.210.10.33,10.99.181.164,localhost,127.0.0.1"
6+
7+
echo "=================================================="
8+
echo " 完整告警流程测试"
9+
echo "=================================================="
10+
echo ""
11+
echo "告警流程:"
12+
echo "Prometheus (10.210.10.33:9090)"
13+
echo " ↓ [告警触发]"
14+
echo "Adapter (10.210.10.33:9999)"
15+
echo " ↓ [转发]"
16+
echo "Webhook (10.99.181.164:8080)"
17+
echo ""
18+
echo "=================================================="
19+
20+
# Step 1: 检查 Webhook 服务
21+
echo -e "\n[Step 1] 检查 Webhook 服务状态"
22+
echo -n " 测试 webhook 端点: "
23+
response=$(curl -s --noproxy "*" -X POST http://10.99.181.164:8080/v1/integrations/alertmanager/webhook \
24+
-H "Content-Type: application/json" \
25+
-d '{"test": "connectivity_check"}' \
26+
-o /dev/null -w "%{http_code}")
27+
if [ "$response" = "200" ]; then
28+
echo "✅ Webhook 服务正常 (HTTP 200)"
29+
else
30+
echo "❌ Webhook 服务异常 (HTTP $response)"
31+
exit 1
32+
fi
33+
34+
# Step 2: 检查 Adapter 服务
35+
echo -e "\n[Step 2] 检查 Adapter 服务状态"
36+
echo -n " 健康检查: "
37+
curl -s --noproxy "*" http://10.210.10.33:9999/-/healthy &>/dev/null && echo "✅ Healthy" || echo "❌ Unhealthy"
38+
echo -n " 就绪检查: "
39+
curl -s --noproxy "*" http://10.210.10.33:9999/-/ready &>/dev/null && echo "✅ Ready" || echo "❌ Not Ready"
40+
41+
# Step 3: 注意事项提醒
42+
echo -e "\n[Step 3] 重要提醒"
43+
echo " ⚠️ 确保 Adapter 服务已使用新配置重启"
44+
echo " 配置文件: internal/prometheus_adapter/config/prometheus_adapter.yml"
45+
echo " Webhook URL 应为: http://10.99.181.164:8080/v1/integrations/alertmanager/webhook"
46+
echo ""
47+
read -p " Adapter 服务是否已重启?(y/n): " -n 1 -r
48+
echo ""
49+
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
50+
echo " 请先重启 Adapter 服务后再运行测试"
51+
exit 1
52+
fi
53+
54+
# Step 4: 手动发送测试告警到 Adapter
55+
echo -e "\n[Step 4] 发送测试告警到 Adapter"
56+
timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)
57+
alert_data='[{
58+
"labels": {
59+
"alertname": "TestAlertFlow",
60+
"severity": "warning",
61+
"service": "test_service",
62+
"environment": "test",
63+
"source": "manual_test"
64+
},
65+
"annotations": {
66+
"summary": "测试告警流程",
67+
"description": "验证 Prometheus → Adapter → Webhook 完整链路"
68+
},
69+
"startsAt": "'$timestamp'",
70+
"generatorURL": "http://test.example.com/alerts"
71+
}]'
72+
73+
echo " 发送告警数据..."
74+
response=$(curl -s --noproxy "*" -X POST http://10.210.10.33:9999/api/v2/alerts \
75+
-H "Content-Type: application/json" \
76+
-d "$alert_data" \
77+
-w "\n%{http_code}")
78+
79+
http_code=$(echo "$response" | tail -n1)
80+
body=$(echo "$response" | sed '$d')
81+
82+
if [ "$http_code" = "200" ]; then
83+
echo " ✅ Adapter 成功接收告警 (HTTP 200)"
84+
if [ -n "$body" ]; then
85+
echo " 响应: $body"
86+
fi
87+
else
88+
echo " ❌ Adapter 接收告警失败 (HTTP $http_code)"
89+
echo " 响应: $body"
90+
echo ""
91+
echo " 可能的原因:"
92+
echo " 1. Adapter 服务未运行"
93+
echo " 2. Adapter 配置未更新"
94+
echo " 3. 网络连接问题"
95+
fi
96+
97+
# Step 5: 检查 Webhook 是否收到告警
98+
echo -e "\n[Step 5] 验证 Webhook 是否收到告警"
99+
echo " 等待 2 秒让告警传递..."
100+
sleep 2
101+
102+
echo " 查询 Webhook 收到的告警:"
103+
alerts=$(curl -s --noproxy "*" http://10.99.181.164:8080/alerts)
104+
105+
if [ -z "$alerts" ] || [ "$alerts" = "[]" ]; then
106+
echo " ⚠️ Webhook 未收到任何告警"
107+
echo ""
108+
echo " 请检查:"
109+
echo " 1. Adapter 配置中的 webhook URL 是否正确"
110+
echo " 2. Adapter 服务是否已重启"
111+
echo " 3. 查看 Adapter 日志了解详情"
112+
else
113+
echo " ✅ Webhook 收到告警!"
114+
echo ""
115+
echo " 最新的告警记录:"
116+
echo "$alerts" | jq -r '.[-1] | " 时间: \(.timestamp)\n 告警名: \(.data.alerts[0].labels.alertname // "N/A")\n 严重性: \(.data.alerts[0].labels.severity // "N/A")\n 状态: \(.data.alerts[0].status // "N/A")"' 2>/dev/null || echo "$alerts"
117+
fi
118+
119+
# Step 6: 测试 Prometheus 的活跃告警
120+
echo -e "\n[Step 6] 检查 Prometheus 中的活跃告警"
121+
echo " 查询 firing 状态的告警 (前3个):"
122+
curl -s --noproxy "*" http://10.210.10.33:9090/api/v1/alerts | \
123+
jq -r '.data.alerts[] | select(.state=="firing") | " - \(.labels.alertname) (\(.labels.service // "no-service"))"' 2>/dev/null | head -3
124+
125+
echo -e "\n=================================================="
126+
echo "测试完成!"
127+
echo ""
128+
echo "完整流程验证:"
129+
echo "1. Webhook 服务: ✅ 运行中 (10.99.181.164:8080)"
130+
echo "2. Adapter 服务: ✅ 运行中 (10.210.10.33:9999)"
131+
echo "3. 告警传递测试: $([ "$http_code" = "200" ] && echo "✅ 成功" || echo "❌ 失败")"
132+
echo "4. Webhook 接收: $([ -n "$alerts" ] && [ "$alerts" != "[]" ] && echo "✅ 已收到告警" || echo "⚠️ 未收到告警")"
133+
echo ""
134+
135+
if [ "$http_code" = "200" ] && [ -n "$alerts" ] && [ "$alerts" != "[]" ]; then
136+
echo "🎉 恭喜!告警流程工作正常!"
137+
else
138+
echo "⚠️ 告警流程存在问题,请检查上述步骤中的错误信息"
139+
fi
140+
141+
echo "=================================================="

0 commit comments

Comments
 (0)