Skip to content

Commit 6a5fc82

Browse files
luckyj5rockb1017mwang2016
authored
Merge Release/v1.2.0 to master (#267)
* updates for v1.2.0 release (#261). * add hec-retry counter in the error message to know how many retries were needed to deliver a batch. * updated HEC headers to include telemetry data (appname and appversion). * update add-app-info configuration to be a list of app metadata fields. * remove cf_origin and cf_ignored_app fields from app metadata. * Added memory queue pressure monitoring capability, event received and event sent count. * add functional tests automation(#264). Co-authored-by: Rock Baek <[email protected]> Co-authored-by: Mei Wang <[email protected]>
1 parent 13602ea commit 6a5fc82

File tree

2,166 files changed

+19746
-1337811
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,166 files changed

+19746
-1337811
lines changed

.circleci/ci_nozzle_manifest.yml

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
---
22
applications:
33
- name: splunk-firehose-nozzle
4-
memory: 256M
5-
instances: 1
4+
memory: 512M
5+
instances: 2
66
buildpack: go_buildpack
77
cmd: splunk-firehose-nozzle
88
env:
99
GOPACKAGENAME: main
10-
API_ENDPOINT:
11-
CLIENT_ID:
12-
CLIENT_SECRET:
13-
SPLUNK_HOST:
14-
SPLUNK_TOKEN:
15-
SPLUNK_INDEX: main
10+
API_ENDPOINT:
11+
API_USER:
12+
API_PASSWORD:
13+
CLIENT_ID:
14+
CLIENT_SECRET:
15+
SPLUNK_HOST:
16+
SPLUNK_TOKEN:
17+
SPLUNK_INDEX:
1618
SKIP_SSL_VALIDATION_CF: true
1719
SKIP_SSL_VALIDATION_SPLUNK: true
1820
JOB_NAME: splunk-nozzle
@@ -25,14 +27,13 @@ applications:
2527
APP_LIMITS: 1000
2628
BOLTDB_PATH: cache.db
2729
EVENTS: ValueMetric,CounterEvent,Error,LogMessage,HttpStartStop,ContainerMetric
28-
EXTRA_FIELDS: name:splunk-pcf-ci
30+
EXTRA_FIELDS: name:update-ci-test
2931
FIREHOSE_SUBSCRIPTION_ID: splunk-ci
3032
FLUSH_INTERVAL: 5s
3133
CONSUMER_QUEUE_SIZE: 10000
32-
HEC_BATCH_SIZE: 100
34+
HEC_BATCH_SIZE: 1000
3335
HEC_RETRIES: 5
3436
HEC_WORKERS: 8
3537
DEBUG: false
3638
ENABLE_EVENT_TRACING: true
37-
RLP_GATEWAY_RETRIES: 5
38-
STATUS_MONITOR_INTERVAL: 0s
39+
STATUS_MONITOR_INTERVAL: 0s

.circleci/config.yml

Lines changed: 11 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
name: Builder
1616
command: make build
1717
- run:
18-
name: Run unit tests
18+
name: Run tests
1919
command: |
2020
make testall
2121
cp splunk-firehose-nozzle /tmp
@@ -84,6 +84,11 @@ jobs:
8484
.circleci/pre-req.sh
8585
cf push -f .circleci/data_gen_manifest.yml -u process -p tools/data_gen --random-route
8686
sleep 10
87+
- run:
88+
name: Nozzle Log
89+
command: |
90+
cf logs splunk-firehose-nozzle
91+
background: true
8792
- run:
8893
name: Prepare test environment
8994
command: |
@@ -102,37 +107,6 @@ jobs:
102107
when:
103108
always
104109

105-
execute_perf_tests:
106-
docker:
107-
- image: circleci/golang:1.12
108-
working_directory: /go/src/github.com/cloudfoundry-community/splunk-firehose-nozzle
109-
steps:
110-
- attach_workspace:
111-
at: /tmp
112-
- checkout
113-
- run:
114-
name: Install dependencies
115-
command: |
116-
curl https://glide.sh/get | sh
117-
go get -t ./...
118-
cp -R /tmp/splunk-firehose-nozzle .
119-
- run:
120-
name: Prepare test environment
121-
command: |
122-
.circleci/pre-req.sh
123-
.circleci/pre-functional-test.sh
124-
- run:
125-
name: Executing perf tests
126-
command: |
127-
.circleci/performance-test.sh
128-
- run:
129-
name: Teardown
130-
command: |
131-
echo "Teardown deployment env"
132-
cf delete-org splunk-ci-org -f
133-
when:
134-
always
135-
136110
workflows:
137111
version: 2
138112
build-and-deploy-nozzle:
@@ -150,25 +124,8 @@ workflows:
150124
- execute_tests:
151125
requires:
152126
- deploy-nozzle
153-
154-
155-
# - execute_perf_tests:
156-
# requires:
157-
# - build
158-
# filters:
159-
# branches:
160-
# only:
161-
# - dev/pcf-performance-testing
162-
# nightly:
163-
# triggers:
164-
# - schedule:
165-
# cron: "0 7 * * *"
166-
# filters:
167-
# branches:
168-
# only:
169-
# - dev/pcf-performance-testing
170-
# jobs:
171-
# - build
172-
# - execute_perf_tests:
173-
# requires:
174-
# - build
127+
filters:
128+
branches:
129+
only:
130+
- master
131+
- release/v1.2.0

.circleci/performance-test.sh

Lines changed: 0 additions & 6 deletions
This file was deleted.

.gitignore

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,17 @@
22

33
tools/dump_app_info/dump_app_info
44
tools/data_gen/data_gen
5-
65
cache.db
76
*.out
8-
97
main
108
splunk-firehose-nozzle
119
env.sh
1210
data_gen
13-
1411
manifest*.yml
1512
splunk-firehose-nozzle.iml
16-
vendor/github.com/cloudfoundry/sonde-go/definitions/
17-
testing/integration/venv/
18-
testing/integration/config/local.ini
13+
1914
*.pyc
20-
testing/integration/config/env.json
2115
testing/manual_perf/data_gen_manifest.yml
16+
testing/integration/config/env.json
17+
testing/integration/config/local.ini
2218

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ test:
6767

6868
testv:
6969
@go test -v ${PKGS}
70+
7071
# Run "short" unit tests
7172
test-short:
7273
@go test -short ${PKGS}

README.md

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -81,26 +81,24 @@ This is recommended for dev environments only.
8181
* `FIREHOSE_KEEP_ALIVE`: Keep alive duration for the Firehose consumer.
8282
* `ADD_APP_INFO`: Enrich raw data with app info. A comma separated list of app metadata (AppName,OrgName,OrgGuid,SpaceName,SpaceGuid).
8383
* `IGNORE_MISSING_APP`: If the application is missing, then stop repeatedly querying application info from Cloud Foundry.
84-
* `MISSING_APP_CACHE_INVALIDATE_TTL`: How frequently the missing app info cache invalidates.
85-
* `APP_CACHE_INVALIDATE_TTL`: How frequently the app info local cache invalidates.
86-
* `ORG_SPACE_CACHE_INVALIDATE_TTL`: How frequently the org and space cache invalidates.
84+
* `MISSING_APP_CACHE_INVALIDATE_TTL`: How frequently the missing app info cache invalidates (in s/m/h. For example, 3600s or 60m or 1h).
85+
* `APP_CACHE_INVALIDATE_TTL`: How frequently the app info local cache invalidates (in s/m/h. For example, 3600s or 60m or 1h).
86+
* `ORG_SPACE_CACHE_INVALIDATE_TTL`: How frequently the org and space cache invalidates (in s/m/h. For example, 3600s or 60m or 1h).
8787
* `APP_LIMITS`: Restrict to APP_LIMITS the most updated apps per request when populating the app metadata cache.
8888
* `BOLTDB_PATH`: Bolt database path.
8989
* `EVENTS`: A comma separated list of events to include. Possible values: ValueMetric,CounterEvent,Error,LogMessage,HttpStartStop,ContainerMetric
9090
* `EXTRA_FIELDS`: Extra fields to annotate your events with (format is key:value,key:value).
91-
* `FLUSH_INTERVAL`: Time interval for flushing queue to Splunk regardless of CONSUMER_QUEUE_SIZE. Protects against stale events in low throughput systems.
91+
* `FLUSH_INTERVAL`: Time interval (in s/m/h. For example, 3600s or 60m or 1h) for flushing queue to Splunk regardless of CONSUMER_QUEUE_SIZE. Protects against stale events in low throughput systems.
9292
* `CONSUMER_QUEUE_SIZE`: Sets the internal consumer queue buffer size. Events will be pushed to Splunk after queue is full.
9393
* `HEC_BATCH_SIZE`: Set the batch size for the events to push to HEC (Splunk HTTP Event Collector).
9494
* `HEC_RETRIES`: Retry count for sending events to Splunk. After expiring, events will begin dropping causing data loss.
9595
* `HEC_WORKERS`: Set the amount of Splunk HEC workers to increase concurrency while ingesting in Splunk.
9696
* `ENABLE_EVENT_TRACING`: Enables event trace logging. Splunk events will now contain a UUID, Splunk Nozzle Event Counts, and a Subscription-ID for Splunk correlation searches.
9797
* `SPLUNK_VERSION`: The Splunk version that determines how HEC ingests metadata fields. Only required for Splunk version 6.3 or below.
98-
* `RLP_GATEWAY_RETRIES`: Number of retries to connect to RLP gateway.
99-
* `STATUS_MONITOR_INTERVAL`: Time interval for monitoring memory queue pressure to help with back-pressure insights.
100-
98+
* `STATUS_MONITOR_INTERVAL`: Time interval (in s/m/h. For example, 3600s or 60m or 1h) for monitoring memory queue pressure. Use to help with back-pressure insights. (Increases CPU load. Use for insights purposes only) Default is 0s (Disabled).
10199
### Please note
102100
> SPLUNK_VERSION configuration parameter is only required for Splunk version 6.3 and below.
103-
For Splunk version 6.3 or below, please deploy nozzle via CLI. Update nozzle_manifest.yml with splunk_version (eg:- SPLUNK_VERSION: 6.3) as an env variable and [deploy nozzle as an app via CLI](#push-as-an-app-to-cloud-foundry).
101+
For Splunk version 6.3 or below, please deploy nozzle via CLI. Update nozzle_manifest.yml with splunk_version (For example: SPLUNK_VERSION: 6.3) as an env variable and [deploy nozzle as an app via CLI](#push-as-an-app-to-cloud-foundry).
104102

105103
**[Tile](https://network.pivotal.io/products/splunk-nozzle/)** only supports deployment for Splunk version 6.4 or above
106104

@@ -137,11 +135,11 @@ on user authentication.
137135
```
138136

139137
#### Dump application info to boltdb ####
140-
If in production there are lots of Cloud Foundry applications(say tens of thousands) and if the user would like to enrich
141-
application logs by including application meta data,querying all application metadata information from Cloud Foundry may take some time.
138+
If in production there are lots of CF applications(say tens of thousands) and if the user would like to enrich
139+
application logs by including application meta data,querying all application metadata information from CF may take some time.
142140
For example if we include, add app name, space ID, space name, org ID and org name to the events.
143141
If there are multiple instances of Spunk nozzle deployed the situation will be even worse, since each of the Splunk nozzle(s) will query all applications meta data and
144-
cache the meta data information to the local boltdb file. These queries will introduce load to the Cloud Foundry system and could potentially take a long time to finish.
142+
cache the meta data information to the local boltdb file. These queries will introduce load to the CF system and could potentially take a long time to finish.
145143
Users can run this tool to generate a copy of all application meta data and copy this to each Splunk nozzle deployment. Each Splunk nozzle can pick up the cache copy and update the cache file incrementally afterwards.
146144

147145
Example of how to run the dump application info tool:
@@ -239,7 +237,7 @@ This topic describes how to troubleshoot Splunk Firehose Nozzle for Cloud Foundr
239237
240238
Are you searching for events and not finding them or looking at a dashboard and seeing "No result found"? Check Splunk Nozzle app logs.
241239
242-
To view the nozzle's logs running on Cloud Foundry do the following:
240+
To view the nozzle's logs running on CF do the following:
243241
244242
<ol>
245243
<li>Log in as an admin via the CLI.</li>
@@ -338,8 +336,8 @@ A correct setup logs a start message with configuration parameters of the Nozzle
338336
splunk-version: 6.6
339337
subscription-id: splunk-firehose
340338
trace-logging: true
341-
rlp-gateway-retries: 5
342339
status-monitor-interval: 0s
340+
version:
343341
wanted-events: ValueMetric,CounterEvent,Error,LogMessage,HttpStartStop,ContainerMetric
344342
}
345343
ip: 10.0.0.0
@@ -419,7 +417,7 @@ $ chmod +x tools/nozzle.sh
419417
Build project:
420418
421419
```
422-
$ make VERSION=2.0.0
420+
$ make VERSION=1.2.0
423421
```
424422
425423
Run tests with [Ginkgo](http://onsi.github.io/ginkgo/)

eventrouter/default.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ func (r *router) Route(msg *events.Envelope) error {
5555
event = fevents.ErrorEvent(msg)
5656
case events.Envelope_ContainerMetric:
5757
event = fevents.ContainerMetric(msg)
58+
case events.Envelope_HttpStart:
59+
event = fevents.HttpStart(msg)
60+
case events.Envelope_HttpStop:
61+
event = fevents.HttpStop(msg)
5862

5963
default:
6064
return fmt.Errorf("Unsupported event type: %s", eventType.String())

eventrouter/eventrouter_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ var _ = Describe("eventrouter", func() {
3131
noCache = testing.NewMemoryCacheMock()
3232
memSink = &testing.MemorySinkMock{}
3333
config := &Config{
34-
SelectedEvents: "LogMessage,HttpStartStop,ValueMetric,CounterEvent,Error,ContainerMetric",
34+
SelectedEvents: "LogMessage,HttpStart,HttpStop,HttpStartStop,ValueMetric,CounterEvent,Error,ContainerMetric",
3535
}
3636
r, err = New(noCache, memSink, config)
3737
Ω(err).ShouldNot(HaveOccurred())
@@ -66,7 +66,8 @@ var _ = Describe("eventrouter", func() {
6666

6767
It("Route valid message", func() {
6868
eventTypes := []events.Envelope_EventType{
69-
events.Envelope_LogMessage, events.Envelope_HttpStartStop,
69+
events.Envelope_LogMessage, events.Envelope_HttpStart,
70+
events.Envelope_HttpStop, events.Envelope_HttpStartStop,
7071
events.Envelope_ValueMetric, events.Envelope_CounterEvent,
7172
events.Envelope_Error, events.Envelope_ContainerMetric,
7273
}
@@ -81,12 +82,12 @@ var _ = Describe("eventrouter", func() {
8182

8283
It("Route un-selected message", func() {
8384
config := &Config{
84-
SelectedEvents: "ValueMetric",
85+
SelectedEvents: "HttpStart",
8586
}
8687
r, err = New(noCache, memSink, config)
8788
Ω(err).ShouldNot(HaveOccurred())
8889

89-
eventType = events.Envelope_HttpStartStop
90+
eventType = events.Envelope_HttpStop
9091
err := r.Route(msg)
9192
Ω(err).ShouldNot(HaveOccurred())
9293
Expect(len(memSink.Events)).To(Equal(0))

events/events.go

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,46 @@ var AppMetadata = []string{
3535
"SpaceGuid",
3636
}
3737

38+
func HttpStart(msg *events.Envelope) *Event {
39+
httpStart := msg.GetHttpStart()
40+
fields := logrus.Fields{
41+
"timestamp": httpStart.GetTimestamp(),
42+
"request_id": utils.FormatUUID(httpStart.GetRequestId()),
43+
"method": httpStart.GetMethod().String(),
44+
"uri": httpStart.GetUri(),
45+
"remote_addr": httpStart.GetRemoteAddress(),
46+
"user_agent": httpStart.GetUserAgent(),
47+
"parent_request_id": utils.FormatUUID(httpStart.GetParentRequestId()),
48+
"cf_app_id": utils.FormatUUID(httpStart.GetApplicationId()),
49+
"instance_index": httpStart.GetInstanceIndex(),
50+
"instance_id": httpStart.GetInstanceId(),
51+
}
52+
53+
return &Event{
54+
Fields: fields,
55+
Msg: "",
56+
}
57+
}
58+
59+
func HttpStop(msg *events.Envelope) *Event {
60+
httpStop := msg.GetHttpStop()
61+
62+
fields := logrus.Fields{
63+
"timestamp": httpStop.GetTimestamp(),
64+
"uri": httpStop.GetUri(),
65+
"request_id": utils.FormatUUID(httpStop.GetRequestId()),
66+
"peer_type": httpStop.GetPeerType().String(),
67+
"status_code": httpStop.GetStatusCode(),
68+
"content_length": httpStop.GetContentLength(),
69+
"cf_app_id": utils.FormatUUID(httpStop.GetApplicationId()),
70+
}
71+
72+
return &Event{
73+
Fields: fields,
74+
Msg: "",
75+
}
76+
}
77+
3878
func HttpStartStop(msg *events.Envelope) *Event {
3979
httpStartStop := msg.GetHttpStartStop()
4080

@@ -159,7 +199,6 @@ func (e *Event) AnnotateWithAppData(appCache cache.Cache, config *Config) {
159199
cf_space_name := appInfo.SpaceName
160200
cf_org_id := appInfo.OrgGuid
161201
cf_org_name := appInfo.OrgName
162-
//cf_ignored_app := appInfo.IgnoredApp
163202
app_env := appInfo.CfAppEnv
164203

165204
if cf_app_name != "" && config.AddAppName {
@@ -185,14 +224,10 @@ func (e *Event) AnnotateWithAppData(appCache cache.Cache, config *Config) {
185224
if app_env["SPLUNK_INDEX"] != nil {
186225
e.Fields["info_splunk_index"] = app_env["SPLUNK_INDEX"]
187226
}
188-
//removing cf_ignored_app as per INGEST-17639
189-
//e.Fields["cf_ignored_app"] = cf_ignored_app
190227
}
191228
}
192229

193230
func (e *Event) AnnotateWithCFMetaData() {
194-
//removing cf_origin as per INGEST-17639
195-
//e.Fields["cf_origin"] = "firehose"
196231
e.Fields["event_type"] = e.Type
197232
}
198233

0 commit comments

Comments
 (0)