Skip to content

Commit e66d104

Browse files
authored
Merge pull request #215 from YAPP-Github/develop
[Feat] Datadog 테라폼 적용 및 월간 서버 리포트 추가 Prod 적용
2 parents 05516ed + 1997898 commit e66d104

File tree

12 files changed

+665
-0
lines changed

12 files changed

+665
-0
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: monthly-server-report.yml
2+
3+
on:
4+
schedule:
5+
- cron: '30 0 1 * *'
6+
workflow_dispatch:
7+
8+
jobs:
9+
send-report:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- name: Checkout
13+
uses: actions/checkout@v4
14+
15+
- name: Setup Python
16+
uses: actions/setup-python@v5
17+
with:
18+
python-version: '3.10'
19+
20+
- name: Install Dependencies
21+
run: |
22+
pip install boto3 datadog-api-client requests python-dateutil
23+
24+
- name: Run Report Script
25+
env:
26+
DD_API_KEY: ${{ secrets.DD_API_KEY }}
27+
DD_APP_KEY: ${{ secrets.DD_APP_KEY }}
28+
DISCORD_WEBHOOK_URL: ${{ secrets.DISCORD_MONTHLY_SERVER_REPORT_WEBHOOK }}
29+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
30+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
31+
AWS_REGION: ${{ secrets.AWS_REGION }}
32+
run: python scripts/monthly-server-report.py
33+

scripts/monthly-server-report.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import boto3
2+
import os
3+
import requests
4+
from datadog_api_client import ApiClient, Configuration
5+
from datadog_api_client.v1.api.events_api import EventsApi
6+
from datadog_api_client.v1.api.service_level_objectives_api import ServiceLevelObjectivesApi
7+
from datetime import datetime
8+
from dateutil.relativedelta import relativedelta
9+
10+
DD_API_KEY = os.getenv("DD_API_KEY")
11+
DD_APP_KEY = os.getenv("DD_APP_KEY")
12+
DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL")
13+
AWS_REGION = os.getenv("AWS_REGION", "ap-northeast-2")
14+
WAF_WEB_ACL_NAME = "eatda-web-acl"
15+
16+
SLO_AVAILABILITY_ID = "c2ba09c7153a5bcd91e9ba4f92245579"
17+
SLO_LATENCY_ID = "7beca231285d5639b23be8d182cd8d4a"
18+
19+
20+
def get_date_ranges():
21+
today = datetime.now()
22+
this_month_start = today.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
23+
last_month_start = this_month_start - relativedelta(months=1)
24+
month_before_last_start = last_month_start - relativedelta(months=1)
25+
26+
report_period = {
27+
'start_ts': int(last_month_start.timestamp()),
28+
'end_ts': int(this_month_start.timestamp()),
29+
'start_dt': last_month_start,
30+
'end_dt': this_month_start,
31+
'start_iso': last_month_start.strftime('%Y-%m-%d'),
32+
'end_iso': this_month_start.strftime('%Y-%m-%d'),
33+
'month_str': last_month_start.strftime("%Y년 %m월")
34+
}
35+
36+
prev_period = {
37+
'start_iso': month_before_last_start.strftime('%Y-%m-%d'),
38+
'end_iso': last_month_start.strftime('%Y-%m-%d')
39+
}
40+
41+
return report_period, prev_period
42+
43+
44+
def normalize_slo_value(value):
45+
if value is None:
46+
return 0.0
47+
if 0 < value <= 1.0:
48+
return value * 100
49+
return value
50+
51+
52+
def get_datadog_metrics(start_ts, end_ts):
53+
configuration = Configuration()
54+
configuration.host = "https://api.us5.datadoghq.com"
55+
configuration.api_key["apiKeyAuth"] = DD_API_KEY
56+
configuration.api_key["appKeyAuth"] = DD_APP_KEY
57+
58+
data = {'slo_avail': 0.0, 'slo_latency': 0.0, 'alert_count': 0}
59+
60+
try:
61+
with ApiClient(configuration) as api_client:
62+
slo_api = ServiceLevelObjectivesApi(api_client)
63+
64+
try:
65+
avail = slo_api.get_slo_history(SLO_AVAILABILITY_ID, from_ts=start_ts, to_ts=end_ts)
66+
data['slo_avail'] = normalize_slo_value(avail.data.overall.sli_value)
67+
except Exception as e:
68+
print(f"⚠️ Error fetching Availability SLO: {e}")
69+
70+
try:
71+
latency = slo_api.get_slo_history(SLO_LATENCY_ID, from_ts=start_ts, to_ts=end_ts)
72+
data['slo_latency'] = normalize_slo_value(latency.data.overall.sli_value)
73+
except Exception as e:
74+
print(f"⚠️ Error fetching Latency SLO: {e}")
75+
76+
event_api = EventsApi(api_client)
77+
try:
78+
events = event_api.list_events(
79+
start=start_ts,
80+
end=end_ts,
81+
tags="status:error,source:monitor,service:eatda-api-prod"
82+
)
83+
data['alert_count'] = len(events.events) if events.events else 0
84+
except Exception as e:
85+
print(f"⚠️ Error fetching Events: {e}")
86+
except Exception as e:
87+
print(f"❌ Critical Datadog API Error: {e}")
88+
89+
return data
90+
91+
92+
def get_aws_waf_stats(start_dt, end_dt):
93+
client = boto3.client('cloudwatch', region_name=AWS_REGION)
94+
95+
def get_metric(metric_name):
96+
response = client.get_metric_statistics(
97+
Namespace='AWS/WAFV2',
98+
MetricName=metric_name,
99+
Dimensions=[
100+
{'Name': 'WebACL', 'Value': WAF_WEB_ACL_NAME},
101+
{'Name': 'Rule', 'Value': 'ALL'},
102+
{'Name': 'Region', 'Value': AWS_REGION},
103+
],
104+
StartTime=start_dt,
105+
EndTime=end_dt,
106+
Period=86400,
107+
Statistics=['Sum']
108+
)
109+
try:
110+
if response['Datapoints']:
111+
return int(sum([dp['Sum'] for dp in response['Datapoints']]))
112+
113+
print(f"⚠️ No datapoints for WAF metric: {metric_name}")
114+
return 0
115+
except Exception as e:
116+
print(f"❌ Error fetching WAF metric {metric_name}: {e}")
117+
return 0
118+
119+
return {
120+
'allowed': get_metric('AllowedRequests'),
121+
'blocked': get_metric('BlockedRequests')
122+
}
123+
124+
125+
def get_total_cost(start_iso, end_iso):
126+
client = boto3.client('ce', region_name='us-east-1')
127+
try:
128+
response = client.get_cost_and_usage(
129+
TimePeriod={'Start': start_iso, 'End': end_iso},
130+
Granularity='MONTHLY',
131+
Metrics=['UnblendedCost']
132+
)
133+
if response['ResultsByTime']:
134+
return float(response['ResultsByTime'][0]['Total']['UnblendedCost']['Amount'])
135+
return 0.0
136+
except Exception as e:
137+
print(f"❌ Error fetching AWS Cost: {e}")
138+
return 0.0
139+
140+
141+
def send_discord_report():
142+
print("🚀 Starting Monthly Report Generation...")
143+
current_period, prev_period = get_date_ranges()
144+
145+
dd_data = get_datadog_metrics(current_period['start_ts'], current_period['end_ts'])
146+
147+
waf_data = get_aws_waf_stats(current_period['start_dt'], current_period['end_dt'])
148+
149+
curr_cost = get_total_cost(current_period['start_iso'], current_period['end_iso'])
150+
prev_cost = get_total_cost(prev_period['start_iso'], prev_period['end_iso'])
151+
152+
cost_diff = curr_cost - prev_cost
153+
cost_diff_str = f"+${cost_diff:.2f}" if cost_diff >= 0 else f"-${abs(cost_diff):.2f}"
154+
cost_emoji = "📈" if cost_diff > 0 else "📉" if cost_diff < 0 else "➡️"
155+
156+
total_req = waf_data['allowed'] + waf_data['blocked']
157+
if total_req == 0: total_req = 1
158+
159+
message = f"""
160+
📊 **[Eatda] {current_period['month_str']} 월간 통합 리포트**
161+
162+
**1. Datadog (서비스 품질)**
163+
- 🩺 **가용성 SLO:** `{dd_data['slo_avail']:.3f}%`
164+
- 🐢 **응답속도 SLO:** `{dd_data['slo_latency']:.3f}%`
165+
- 🚨 **발생한 장애:** `{dd_data['alert_count']}건`
166+
167+
**2. AWS WAF (보안)**
168+
- 🛡️ **총 요청:** `{total_req:,}건`
169+
- ✅ **허용됨:** `{waf_data['allowed']:,}건` ({waf_data['allowed'] / total_req * 100:.1f}%)
170+
- 🚫 **차단됨:** `{waf_data['blocked']:,}건`
171+
172+
**3. AWS Cost (비용)**
173+
- 💰 **이번 달:** `${curr_cost:.2f}`
174+
- {cost_emoji} **전월 대비:** `{cost_diff_str}` ({'증가' if cost_diff > 0 else '감소' if cost_diff < 0 else '변동없음'})
175+
""".strip()
176+
177+
try:
178+
requests.post(DISCORD_WEBHOOK_URL, json={
179+
"username": "Eatda Manager",
180+
"embeds": [{
181+
"title": f"📅 {current_period['month_str']} 운영 결산 보고",
182+
"description": message,
183+
"color": 5763719 if dd_data['slo_avail'] >= 99 else 15548997
184+
}]
185+
})
186+
print("✅ Report sent successfully to Discord!")
187+
except Exception as e:
188+
print(f"❌ Failed to send Discord webhook: {e}")
189+
190+
191+
if __name__ == "__main__":
192+
send_discord_report()

terraform/datadog/.terraform.lock.hcl

Lines changed: 47 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

terraform/datadog/backend.tf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
terraform {
2+
backend "s3" {
3+
bucket = "eatda-tf-state"
4+
key = "datadog/terraform.tfstate"
5+
region = "ap-northeast-2"
6+
encrypt = true
7+
dynamodb_table = "eatda-tf-lock"
8+
}
9+
}

terraform/datadog/integrations.tf

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
resource "datadog_webhook" "discord_alert" {
2+
custom_headers = null
3+
encode_as = "json"
4+
name = "discord-alert-channel"
5+
payload = jsonencode({
6+
embeds = [{
7+
color = 15548997
8+
description = "$EVENT_MSG"
9+
title = "$EVENT_TITLE"
10+
url = "$LINK"
11+
}]
12+
})
13+
url = data.aws_ssm_parameter.discord_alert_webhook_url.value
14+
}
15+
16+
resource "datadog_webhook" "discord_warn" {
17+
custom_headers = null
18+
encode_as = "json"
19+
name = "discord-warn-channel"
20+
payload = jsonencode({
21+
embeds = [{
22+
color = 16776960
23+
description = "$EVENT_MSG"
24+
title = "$EVENT_TITLE"
25+
url = "$LINK"
26+
}]
27+
})
28+
url = data.aws_ssm_parameter.discord_warn_webhook_url.value
29+
}
30+
31+
resource "datadog_webhook" "discord-recovery" {
32+
custom_headers = null
33+
encode_as = "json"
34+
name = "discord-warn-channel-recovery"
35+
payload = jsonencode({
36+
embeds = [{
37+
color = 5763719
38+
description = "$EVENT_MSG"
39+
title = "$EVENT_TITLE"
40+
url = "$LINK"
41+
}]
42+
})
43+
url = data.aws_ssm_parameter.discord_recovery_webhook_url.value
44+
}

terraform/datadog/locals.tf

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
data "aws_ssm_parameter" "datadog_api" {
2+
name = "/prod/DD_API_KEY"
3+
}
4+
5+
data "aws_ssm_parameter" "datadog_app" {
6+
name = "/prod/DD_APP_KEY"
7+
}
8+
9+
data "aws_ssm_parameter" "discord_alert_webhook_url" {
10+
name = "/discord/discord_alert_webhook_url"
11+
}
12+
13+
data "aws_ssm_parameter" "discord_warn_webhook_url" {
14+
name = "/discord/discord_warn_webhook_url"
15+
}
16+
17+
data "aws_ssm_parameter" "discord_recovery_webhook_url" {
18+
name = "/discord/discord_recovery_webhook_url"
19+
}
20+
21+
locals {
22+
notification_footer = <<-EOT
23+
24+
---
25+
26+
{{#is_alert}}
27+
🚨 **CRITICAL ALERT**
28+
@webhook-discord-alert-channel
29+
{{/is_alert}}
30+
31+
{{#is_warning}}
32+
⚠️ **WARNING ALERT**
33+
@webhook-discord-warn-channel
34+
{{/is_warning}}
35+
36+
{{#is_recovery}}
37+
✅ **RECOVERY**
38+
@webhook-discord-warn-channel-recovery
39+
{{/is_recovery}}
40+
EOT
41+
}

0 commit comments

Comments
 (0)