Skip to content

Commit b9c7854

Browse files
committed
Restarting apps metrics
1 parent 4a997e2 commit b9c7854

File tree

15 files changed

+421
-34
lines changed

15 files changed

+421
-34
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,5 @@ marathon-forwarder
3535

3636
# coverage
3737
/coverage
38+
39+
tags

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,31 @@ AppCop is periodically fetching applications and groups from Marathon.
2828
When application is suspended or group is empty for long (configurable) time then it is deleted.
2929

3030

31+
### Metrics
32+
33+
`AppCop` provides set of standard system metrics as well as application based metrics.
34+
35+
36+
#### Metric Types
37+
38+
`System Metrics` - `AppCop` specific telemetry (e.g - queue Size, Event delays etc). Location equals, `metrics-prefix` append `metrics-system-sub-prefix`.
39+
40+
`Applications Metrics` - Applications telemetry calculated based on events provided by marathon
41+
(like: task_killed, task_finished counters). Location equals, `metrics-prefix` (append) `metrics-app-sub-prefix`.
42+
43+
Please note the existance of `appid-prefix` config option, if set, removes matching string from
44+
application id when it comes to metric publication. For example, assumming
45+
46+
```
47+
appid-prefix = com.example.
48+
appID = com.example.exampleapp
49+
```
50+
your applications metric will be placed under:
51+
```
52+
{prefix}.{metrics-app-sub-prefix}.exampleapp
53+
```
54+
55+
3156
## Installation
3257

3358
### Installing from source code
@@ -95,6 +120,7 @@ marathon-password | | Marathon password for basic au
95120
marathon-protocol | `http` | Marathon protocol (http or https)
96121
marathon-ssl-verify | `true` | Verify certificates when connecting via SSL
97122
marathon-timeout | `30s` | Time limit for requests made by the Marathon HTTP client. A timeout of zero means no timeout
123+
appid-prefix | | Prefix common to all fully qualified application ID's. Remove this preffix from applications id's ([Metric Types](#metric types))
98124
marathon-username | | Marathon username for basic auth
99125
scale-down-score | `30` | Score for application to scale it one instance down
100126
scale-limit | `2` | How many scale down actions to commit in one scaling down iteration
@@ -104,6 +130,8 @@ evaluate-interval | `30s` | How often collected scores are
104130
metrics-interval | `30s` | Metrics reporting interval
105131
metrics-location | | Graphite URL (used when metrics-target is set to graphite)
106132
metrics-prefix | `default` | Metrics prefix (default is resolved to <hostname>.<app_name>
133+
metrics-system-sub-prefix | `appcop-internal` | System specific metrics. Append to metric-prefix
134+
metrics-app-sub-prefix | `applications` | Applications specific metrics. Appended to metric-prefix
107135
metrics-target | `stdout` | Metrics destination stdout or graphite (empty string disables metrics)
108136
workers-pool-size | `10` | Number of concurrent workers processing events
109137
mgc-enabled | `true` | Enable garbage collecting of Marathon, old suspended applications will be deleted

config/config.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ func (config *Config) parseFlags() {
8282
flag.DurationVar(&config.Marathon.Timeout,
8383
"marathon-timeout", 30*time.Second,
8484
"Time limit for requests made by the Marathon HTTP client. A Timeout of zero means no timeout")
85+
flag.StringVar(&config.Marathon.AppIDPrefix, "appid-prefix", "",
86+
"Prefix common to all fully qualified application ID's. Remove this preffix from applications id's (reffer to README to get an idea when this id is removed)")
8587

8688
// Score
8789
flag.BoolVar(&config.Score.DryRun,
@@ -122,10 +124,16 @@ func (config *Config) parseFlags() {
122124
"Metrics destination stdout or graphite (empty string disables metrics)")
123125
flag.StringVar(&config.Metrics.Prefix, "metrics-prefix", "default",
124126
"Metrics prefix (default is resolved to <hostname>.<app_name>")
127+
flag.StringVar(&config.Metrics.SystemSubPrefix, "metrics-system-sub-prefix", "appcop-internal",
128+
"System specific metrics. Append to metric-prefix")
129+
flag.StringVar(&config.Metrics.AppSubPrefix, "metrics-app-sub-prefix", "applications",
130+
"Applications specific metrics. Appended to metric-prefix")
125131
flag.DurationVar(&config.Metrics.Interval, "metrics-interval", 30*time.Second,
126132
"Metrics reporting interval")
127133
flag.StringVar(&config.Metrics.Addr, "metrics-location", "",
128134
"Graphite URL (used when metrics-target is set to graphite)")
135+
flag.StringVar(&config.Metrics.Addr, "metrics-instance", "",
136+
"Part of Graphite metric, used to distinguish between AppCop instances internal metrics.")
129137

130138
// Log
131139
flag.StringVar(&config.Log.Level, "log-level", "info",

marathon/app.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"encoding/json"
55
"fmt"
66
"strings"
7+
8+
"github.com/allegro/marathon-appcop/metrics"
79
)
810

911
const ApplicationImmunityLabel = "APP_IMMUNITY"
@@ -96,6 +98,28 @@ type Task struct {
9698
HealthCheckResults []HealthCheckResult `json:"healthCheckResults"`
9799
}
98100

101+
// GetMetric returns a string indicating where this applications metric should be placed
102+
// in graphite in defined prefix. It is done by triming begining prefix (if defined)
103+
// from application id and replacing appID separators with
104+
// metrics separators appropriate for graphite.
105+
func (t Task) GetMetric(prefix string) string {
106+
taskAppID := string(t.AppID)
107+
108+
var appID string
109+
if prefix == "" {
110+
appID = taskAppID
111+
} else {
112+
appID = strings.Replace(taskAppID, prefix, "", 1)
113+
}
114+
noRootAppID := strings.TrimPrefix(appID, "/")
115+
metricPath := strings.Replace(noRootAppID, metrics.PathSeparator, metrics.MetricSeparator, -1)
116+
taskStatus := strings.ToLower(t.TaskStatus)
117+
118+
filteredPathParts := metrics.FilterOutEmptyStrings([]string{metricPath, taskStatus})
119+
return strings.Join(filteredPathParts, metrics.MetricSeparator)
120+
121+
}
122+
99123
// TaskID from marathon
100124
// Usually in the form of AppID.uuid with '/' replaced with '_'
101125
type TaskID string

marathon/app_test.go

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,117 @@ func TestParseTasksRecievesMalformedJSONBlob(t *testing.T) {
9393
require.Error(t, err)
9494
}
9595

96+
var getMetricTestCases = []struct {
97+
task *Task
98+
prefix string
99+
expectedTaskMetric string
100+
}{
101+
{
102+
task: &Task{
103+
TaskStatus: "task_running",
104+
AppID: "com.example.domain.context/app-name",
105+
},
106+
prefix: "com.example.",
107+
expectedTaskMetric: "domain.context.app-name.task_running",
108+
},
109+
{
110+
task: &Task{
111+
TaskStatus: "task_running",
112+
AppID: "/com.example.domain.context/app-name",
113+
},
114+
prefix: "com.example.",
115+
expectedTaskMetric: "domain.context.app-name.task_running",
116+
},
117+
{
118+
task: &Task{
119+
TaskStatus: "task_running",
120+
AppID: "com.example.domain.context/app-name",
121+
},
122+
prefix: "",
123+
expectedTaskMetric: "com.example.domain.context.app-name.task_running",
124+
},
125+
{
126+
task: &Task{
127+
TaskStatus: "task_running",
128+
AppID: "com.example.domain.context/group/app-name",
129+
},
130+
prefix: "com.example.",
131+
expectedTaskMetric: "domain.context.group.app-name.task_running",
132+
},
133+
{
134+
task: &Task{
135+
TaskStatus: "task_staging",
136+
AppID: "com.example.domain.context/group/app-name",
137+
},
138+
prefix: "com.example.",
139+
expectedTaskMetric: "domain.context.group.app-name.task_staging",
140+
},
141+
{
142+
task: &Task{
143+
TaskStatus: "task_staging",
144+
AppID: "com.example.domain.context/group/nested-group/app-name",
145+
},
146+
prefix: "com.example.",
147+
expectedTaskMetric: "domain.context.group.nested-group.app-name.task_staging",
148+
},
149+
{
150+
task: &Task{
151+
TaskStatus: "task_running",
152+
AppID: "app-name",
153+
},
154+
prefix: "com.example.",
155+
expectedTaskMetric: "app-name.task_running",
156+
},
157+
{
158+
task: &Task{
159+
TaskStatus: "task_running",
160+
AppID: "app-name",
161+
},
162+
prefix: "",
163+
expectedTaskMetric: "app-name.task_running",
164+
},
165+
{
166+
task: &Task{
167+
TaskStatus: "task_running",
168+
AppID: "com.example.domain.context/app-name",
169+
},
170+
prefix: "",
171+
expectedTaskMetric: "com.example.domain.context.app-name.task_running",
172+
},
173+
{
174+
task: &Task{
175+
TaskStatus: "task_running",
176+
AppID: "",
177+
},
178+
prefix: "com.example.",
179+
expectedTaskMetric: "task_running",
180+
},
181+
{
182+
task: &Task{
183+
TaskStatus: "",
184+
AppID: "com.example.domain.context/app-name",
185+
},
186+
prefix: "com.example.",
187+
expectedTaskMetric: "domain.context.app-name",
188+
},
189+
{
190+
task: &Task{
191+
TaskStatus: "",
192+
AppID: "",
193+
},
194+
prefix: "com.example.",
195+
expectedTaskMetric: "",
196+
},
197+
}
198+
199+
func TestTaskGetMetricTestCases(t *testing.T) {
200+
t.Parallel()
201+
for _, testCase := range getMetricTestCases {
202+
taskMetric := testCase.task.GetMetric(testCase.prefix)
203+
assert.Equal(t, testCase.expectedTaskMetric, taskMetric)
204+
}
205+
}
206+
96207
var penalizeTestCases = []struct {
97208
app *App
98209
expectedApp *App

marathon/config.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,15 @@ import "time"
44

55
// Config contains marathon module specific configuration
66
type Config struct {
7-
Location string
8-
Protocol string
9-
Username string
10-
Password string
11-
VerifySsl bool
12-
Timeout time.Duration
7+
Location string
8+
Protocol string
9+
Username string
10+
Password string
11+
// AppIDPrefix is a part of application id preferably present
12+
// in all applications in marathon, if found it is removed for the sake of
13+
// making applications paths shorter.
14+
// By default this string is empty and no prefix is considered.
15+
AppIDPrefix string
16+
VerifySsl bool
17+
Timeout time.Duration
1318
}

marathon/marathon.go

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,16 @@ type Marathoner interface {
2727
AppDelete(AppID) error
2828
GroupDelete(GroupID) error
2929
GetEmptyLeafGroups() ([]*Group, error)
30+
GetAppIDPrefix() string
3031
}
3132

3233
// Marathon reciever
3334
type Marathon struct {
34-
Location string
35-
Protocol string
36-
Auth *url.Userinfo
37-
client *pester.Client
35+
Location string
36+
Protocol string
37+
appIDPrefix string
38+
Auth *url.Userinfo
39+
client *pester.Client
3840
}
3941

4042
// ScaleData marathon scale json representation
@@ -81,10 +83,11 @@ func New(config Config) (*Marathon, error) {
8183
pClient.Transport = transport
8284

8385
return &Marathon{
84-
Location: config.Location,
85-
Protocol: config.Protocol,
86-
Auth: auth,
87-
client: pClient,
86+
Location: config.Location,
87+
Protocol: config.Protocol,
88+
appIDPrefix: config.AppIDPrefix,
89+
Auth: auth,
90+
client: pClient,
8891
}, nil
8992
}
9093

@@ -427,3 +430,7 @@ func (m Marathon) LeaderGet() (string, error) {
427430

428431
return leaderResponse.Leader, err
429432
}
433+
434+
func (m Marathon) GetAppIDPrefix() string {
435+
return m.appIDPrefix
436+
}

marathon/marathon_stub.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,7 @@ func (m MStub) GroupDelete(groupID GroupID) error {
112112
func (m MStub) GetEmptyLeafGroups() ([]*Group, error) {
113113
return []*Group{}, nil
114114
}
115+
116+
func (m MStub) GetAppIDPrefix() string {
117+
return ""
118+
}

metrics/config.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,20 @@ import "time"
44

55
// Config specific to metrics package
66
type Config struct {
7-
Target string
7+
Target string
8+
//Prefix is the begining of metric, it is prepended
9+
// in each and every published metric.
810
Prefix string
911
Interval time.Duration
1012
Addr string
13+
Instance string
14+
// SystemSubPrefix it is part of a metric that is appended to the
15+
// main Prefix, representing appcop internal metrics
16+
// essential to appcop admins, e.g runtime metrics, event processing time,
17+
// event queue size etc.
18+
SystemSubPrefix string
19+
// AppSubPrefix it is part of a metric that is appended to the
20+
// main Prefix, representing applications specific metric, e.g task_running,
21+
// task_staging, task_failed.
22+
AppSubPrefix string
1123
}

0 commit comments

Comments
 (0)