Skip to content

Commit 9d584df

Browse files
authored
Merge pull request #387 from xueweiz/test-pr
Add a few behavioral e2e tests
2 parents 7dc84e8 + 7d28dde commit 9d584df

File tree

163 files changed

+182678
-63
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

163 files changed

+182678
-63
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/bin/
22
/Dockerfile
3+
/test/bin/
34
/*.tar.gz
45
ci.env
56
pr.env

Makefile

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ PKG:=k8s.io/node-problem-detector
4040
# PKG_SOURCES are all the go source code.
4141
PKG_SOURCES:=$(shell find pkg cmd -name '*.go')
4242

43+
# PARALLEL specifies the number of parallel test nodes to run for e2e tests.
44+
PARALLEL?=3
45+
4346
# TARBALL is the name of release tar. Include binary version by default.
4447
TARBALL?=node-problem-detector-$(VERSION).tar.gz
4548

@@ -103,6 +106,13 @@ endif
103106
-tags "$(BUILD_TAGS)" \
104107
./cmd/nodeproblemdetector
105108

109+
./test/bin/problem-maker: $(PKG_SOURCES)
110+
CGO_ENABLED=$(CGO_ENABLED) GOOS=linux GO111MODULE=on go build \
111+
-mod vendor \
112+
-o test/bin/problem-maker \
113+
-tags "$(BUILD_TAGS)" \
114+
./test/e2e/problemmaker/problem_maker.go
115+
106116
Dockerfile: Dockerfile.in
107117
sed -e 's|@BASEIMAGE@|$(BASEIMAGE)|g' $< >$@
108118
ifneq ($(ENABLE_JOURNALD), 1)
@@ -115,8 +125,8 @@ test: vet fmt
115125
GO111MODULE=on go test -mod vendor -timeout=1m -v -race -short -tags "$(BUILD_TAGS)" ./...
116126

117127
e2e-test: vet fmt build-tar
118-
GO111MODULE=on go test -mod vendor -timeout=10m -v -tags "$(BUILD_TAGS)" \
119-
./test/e2e/metriconly/... \
128+
GO111MODULE=on ginkgo -nodes=$(PARALLEL) -mod vendor -timeout=10m -v -tags "$(BUILD_TAGS)" \
129+
./test/e2e/metriconly/... -- \
120130
-project=$(PROJECT) -zone=$(ZONE) \
121131
-image=$(VM_IMAGE) -image-family=$(IMAGE_FAMILY) -image-project=$(IMAGE_PROJECT) \
122132
-ssh-user=$(SSH_USER) -ssh-key=$(SSH_KEY) \
@@ -129,8 +139,8 @@ build-binaries: ./bin/node-problem-detector ./bin/log-counter
129139
build-container: build-binaries Dockerfile
130140
docker build -t $(IMAGE) .
131141

132-
build-tar: ./bin/node-problem-detector ./bin/log-counter
133-
tar -zcvf $(TARBALL) bin/ config/ test/e2e-install.sh
142+
build-tar: ./bin/node-problem-detector ./bin/log-counter ./test/bin/problem-maker
143+
tar -zcvf $(TARBALL) bin/ config/ test/e2e-install.sh test/bin/problem-maker
134144
sha1sum $(TARBALL)
135145
md5sum $(TARBALL)
136146

@@ -156,4 +166,5 @@ push: push-container push-tar
156166
clean:
157167
rm -f bin/log-counter
158168
rm -f bin/node-problem-detector
169+
rm -f test/bin/problem-maker
159170
rm -f node-problem-detector-*.tar.gz

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,26 @@ Kubernetes cluster to a healthy state. The following remedy systems exist:
249249
[this issue](https://github.com/kubernetes/node-problem-detector/issues/199)
250250
for an example production use case for Draino.
251251

252+
# Testing
253+
254+
NPD is tested via unit tests, [NPD e2e tests](https://github.com/kubernetes/node-problem-detector/blob/master/test/e2e/README.md), Kubernetes e2e tests and Kubernetes nodes e2e tests. Prow handles the [pre-submit tests](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes/node-problem-detector/node-problem-detector-presubmits.yaml) and [CI tests](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes/node-problem-detector/node-problem-detector-ci.yaml).
255+
256+
CI test results can be found at below:
257+
1. [Unit tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-test)
258+
2. [NPD e2e tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-e2e-test)
259+
3. [Kubernetes e2e tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-e2e-kubernetes-gce-gci)
260+
4. [Kubernetes nodes e2e tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-e2e-node)
261+
262+
## Running tests
263+
264+
Unit test is ran via `make test`.
265+
266+
See [NPD e2e test documentation](https://github.com/kubernetes/node-problem-detector/blob/master/test/e2e/README.md) for how to setup and run NPD e2e tests.
267+
268+
## Problem Maker
269+
270+
[Problem maker](https://github.com/kubernetes/node-problem-detector/blob/master/test/e2e/problemmaker/README.md) is a program used in NPD e2e tests to generate/simulate node problems. It is ONLY indented to be used by NPD e2e tests. Please do NOT run it on your workstation, as it could cause real node problems.
271+
252272
# Docs
253273

254274
* [Custom plugin monitor](docs/custom_plugin_monitor.md)

config/kernel-monitor.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,16 @@
4343
"reason": "KernelOops",
4444
"pattern": "divide error: 0000 \\[#\\d+\\] SMP"
4545
},
46+
{
47+
"type": "temporary",
48+
"reason": "Ext4Error",
49+
"pattern": "EXT4-fs error .*"
50+
},
51+
{
52+
"type": "temporary",
53+
"reason": "Ext4Warning",
54+
"pattern": "EXT4-fs warning .*"
55+
},
4656
{
4757
"type": "permanent",
4858
"condition": "KernelDeadlock",

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ require (
1616
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
1717
github.com/google/cadvisor v0.33.0
1818
github.com/onsi/ginkgo v1.8.0
19-
github.com/onsi/gomega v1.5.0 // indirect
19+
github.com/onsi/gomega v1.7.0
2020
github.com/pborman/uuid v1.2.0
2121
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90
2222
github.com/prometheus/common v0.4.1

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,8 @@ github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa
252252
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
253253
github.com/onsi/gomega v1.5.0 h1:izbySO9zDPmjJ8rDjLvkA2zJHIo+HkYXHnf7eN7SSyo=
254254
github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
255+
github.com/onsi/gomega v1.7.0 h1:XPnZz8VVBHjVsy1vzJmRwIcSwiUO+JFfrv/xGiigmME=
256+
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
255257
github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
256258
github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
257259
github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=

test/e2e-install.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ function install-npd() {
6767
echo "Installing NPD systemd service."
6868
cp "${workdir}"/config/systemd/node-problem-detector-metric-only.service /etc/systemd/system/node-problem-detector.service
6969

70+
echo "Installing problem maker binary, used only for e2e testing."
71+
cp "${workdir}"/test/bin/problem-maker "${BIN_DIR}"
72+
7073
rm -rf "${workdir}"
7174

7275
# Start systemd service.

test/e2e/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Currently the tests only support Google Compute Engine (GCE) environment. Suppor
66

77
## Prerequisites
88

9-
1. Setup [Google Application Default Credentials](https://developers.google.com/identity/protocols/application-default-credentials), which is [required for authentication](https://godoc.org/google.golang.org/api/compute/v1#hdr-Creating_a_client) by the Compute Engine API.
9+
1. Setup [Google Application Default Credentials (ADC)](https://developers.google.com/identity/protocols/application-default-credentials), which is [required for authentication](https://godoc.org/google.golang.org/api/compute/v1#hdr-Creating_a_client) by the Compute Engine API.
1010
2. Setup a [project-wide SSH key](https://cloud.google.com/compute/docs/instances/adding-removing-ssh-keys#project-wide) that can be used to SSH into the GCE VMs.
1111

1212
## Running tests
@@ -21,5 +21,6 @@ export VM_IMAGE=[TESTED_OS_IMAGE:cos-73-11647-217-0]
2121
export IMAGE_PROJECT=[TESTED_OS_IMAGE_PROJECT:cos-cloud]
2222
export SSH_USER=${USER}
2323
export SSH_KEY=~/.ssh/id_rsa
24+
export ARTIFACTS=/tmp/npd
2425
make e2e-test
2526
```

test/e2e/lib/gce/instance.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
"k8s.io/node-problem-detector/test/e2e/lib/ssh"
2525

26+
. "github.com/onsi/gomega"
2627
compute "google.golang.org/api/compute/v1"
2728
)
2829

@@ -145,14 +146,26 @@ func (ins *Instance) RunCommand(cmd string) ssh.Result {
145146
return ssh.Run(cmd, ins.ExternalIP, ins.SshUser, ins.SshKey)
146147
}
147148

149+
// RunCommand runs a command on the GCE instance and returns the command result, and fails the test when the command failed.
150+
func (ins *Instance) RunCommandOrFail(cmd string) ssh.Result {
151+
result := ins.RunCommand(cmd)
152+
Expect(result.SSHError).ToNot(HaveOccurred(), "SSH-ing to the instance failed: %v\n", result)
153+
Expect(result.Code).To(Equal(0), "Running command failed: %v\n", result)
154+
return result
155+
}
156+
148157
// PushFile pushes a local file to a GCE instance.
149158
func (ins *Instance) PushFile(srcPath, destPath string) error {
150159
if ins.ExternalIP == "" {
151160
ins.populateExternalIP()
152161
}
153-
return exec.Command("scp", "-o", "StrictHostKeyChecking no",
162+
output, err := exec.Command("scp", "-o", "StrictHostKeyChecking no",
154163
"-i", ins.SshKey,
155-
srcPath, fmt.Sprintf("%s@%s:%s", ins.SshUser, ins.ExternalIP, destPath)).Run()
164+
srcPath, fmt.Sprintf("%s@%s:%s", ins.SshUser, ins.ExternalIP, destPath)).CombinedOutput()
165+
if err != nil {
166+
return fmt.Errorf("Error running scp: %v.\nHere is the output for the command: %v", err, string(output))
167+
}
168+
return nil
156169
}
157170

158171
// DeleteInstance deletes a GCE instance.

test/e2e/lib/npd/npd.go

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ import (
2929
"github.com/avast/retry-go"
3030
)
3131

32-
const npdMetricsFilename = "node-problem-detector-metrics.txt"
33-
const npdLogsFilename = "node-problem-detector.log"
34-
3532
// SetupNPD installs NPD from the test tarball onto the provided GCE instance.
3633
//
3734
// Here is how it works:
@@ -91,6 +88,20 @@ func FetchNPDMetrics(ins gce.Instance) ([]metrics.Float64MetricRepresentation, e
9188
return npdMetrics, nil
9289
}
9390

91+
// FetchNPDMetric fetches and parses a specific metric reported by NPD on the provided GCE instance.
92+
func FetchNPDMetric(ins gce.Instance, metricName string, labels map[string]string) (float64, error) {
93+
gotMetrics, err := FetchNPDMetrics(ins)
94+
if err != nil {
95+
return 0.0, err
96+
}
97+
metric, err := metrics.GetFloat64Metric(gotMetrics, metricName, labels, true)
98+
if err != nil {
99+
return 0.0, fmt.Errorf("Failed to find %s metric with label %v: %v.\nHere is all NPD exported metrics: %v",
100+
metricName, labels, err, gotMetrics)
101+
}
102+
return metric.Value, nil
103+
}
104+
94105
// WaitForNPD waits for NPD to become ready by waiting for expected metrics.
95106
func WaitForNPD(ins gce.Instance, metricNames []string, timeoutSeconds uint) error {
96107
verifyMetricExist := func() error {
@@ -116,30 +127,33 @@ func WaitForNPD(ins gce.Instance, metricNames []string, timeoutSeconds uint) err
116127
}
117128

118129
// SaveTestArtifacts saves debugging data from NPD.
119-
func SaveTestArtifacts(ins gce.Instance, directory string) []error {
130+
func SaveTestArtifacts(ins gce.Instance, artifactDirectory string, testID int) []error {
120131
var errs []error
121132

122-
npdMetrics := ins.RunCommand("curl http://localhost:20257/metrics")
123-
if npdMetrics.SSHError != nil || npdMetrics.Code != 0 {
124-
errs = append(errs, fmt.Errorf("Error fetching NPD metrics: %v\n", npdMetrics))
125-
} else {
126-
npdMetricsPath := path.Join(directory, npdMetricsFilename)
127-
err := ioutil.WriteFile(npdMetricsPath, []byte(npdMetrics.Stdout), 0644)
128-
if err != nil {
129-
errs = append(errs, fmt.Errorf("Error writing to %s: %v", npdMetricsPath, err))
130-
}
133+
if err := saveCommandResultAsArtifact(ins, artifactDirectory, testID,
134+
"curl http://localhost:20257/metrics", "node-problem-detector-metrics"); err != nil {
135+
errs = append(errs, err)
131136
}
132-
133-
npdLog := ins.RunCommand("sudo journalctl -u node-problem-detector.service")
134-
if npdLog.SSHError != nil || npdLog.Code != 0 {
135-
errs = append(errs, fmt.Errorf("Error fetching NPD logs: %v\n", npdLog))
136-
} else {
137-
npdLogsPath := path.Join(directory, npdLogsFilename)
138-
err := ioutil.WriteFile(npdLogsPath, []byte(npdLog.Stdout), 0644)
139-
if err != nil {
140-
errs = append(errs, fmt.Errorf("Error writing to %s: %v", npdLogsPath, err))
141-
}
137+
if err := saveCommandResultAsArtifact(ins, artifactDirectory, testID,
138+
"sudo journalctl -u node-problem-detector.service", "node-problem-detector"); err != nil {
139+
errs = append(errs, err)
140+
}
141+
if err := saveCommandResultAsArtifact(ins, artifactDirectory, testID,
142+
"sudo journalctl -k", "kernel-logs"); err != nil {
143+
errs = append(errs, err)
142144
}
143145

144146
return errs
145147
}
148+
149+
func saveCommandResultAsArtifact(ins gce.Instance, artifactDirectory string, testID int, command string, artifactPrefix string) error {
150+
artifactPath := path.Join(artifactDirectory, fmt.Sprintf("%v-%02d.txt", artifactPrefix, testID))
151+
result := ins.RunCommand(command)
152+
if result.SSHError != nil || result.Code != 0 {
153+
return fmt.Errorf("Error running command: %v\n", result)
154+
}
155+
if err := ioutil.WriteFile(artifactPath, []byte(result.Stdout), 0644); err != nil {
156+
return fmt.Errorf("Error writing artifact to %v: %v\n", artifactPath, err)
157+
}
158+
return nil
159+
}

0 commit comments

Comments
 (0)