Skip to content

Commit c54247a

Browse files
authored
Rework log collector for GKE clusters (#222)
Rework log collector for services
1 parent 780c872 commit c54247a

File tree

9 files changed

+287
-50
lines changed

9 files changed

+287
-50
lines changed

ib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
{'name': 'gerrit-review', 'depends_on': ['images-base']},
2121
{'name': 'github-trigger', 'depends_on': ['images-base']},
2222
{'name': 'github-review', 'depends_on': ['images-base']},
23-
{'name': 'collector-api'},
23+
{'name': 'collector-api', 'depends_on': ['images-base']},
2424
{'name': 'job'},
2525
{'name': 'opa'},
2626
{'name': 'gc', 'depends_on': ['images-base']},

src/api/handlers/job_api.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,16 @@ class Archive(Resource):
480480
def post(self):
481481
job_id = g.token['job']['id']
482482

483+
j = g.db.execute_one_dict('''
484+
SELECT id
485+
FROM job
486+
WHERE id = %s
487+
AND (state = 'running' OR end_date > NOW() - INTERVAL '5 minutes')
488+
''', [job_id])
489+
490+
if not j:
491+
abort(401, 'Unauthorized')
492+
483493
for f in request.files:
484494
stream = request.files[f].stream
485495
key = '%s/%s' % (job_id, f)

src/api/handlers/projects/jobs.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -307,15 +307,11 @@ def get(self, project_id, job_id):
307307
# First restart
308308
j['name'] = j['name'] + '.1'
309309

310-
logger.error(json.dumps(old_id_job, indent=4))
311-
312310
for j in jobs:
313311
for dep in j['dependencies']:
314312
if dep['job-id'] in old_id_job:
315313
dep['job'] = old_id_job[dep['job-id']]['name']
316314
dep['job-id'] = old_id_job[dep['job-id']]['id']
317-
else:
318-
logger.error('%s not found', dep['job'])
319315

320316
for j in jobs:
321317
g.db.execute('''

src/collector-api/server.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ def get(self):
2626
return {'status': 200}
2727

2828
def handle_entry(entry):
29+
if 'kubernetes' not in entry:
30+
return
31+
2932
e = entry['kubernetes']
3033
pod_path = os.path.join(storage_path, e['pod_id'])
3134

@@ -35,11 +38,9 @@ def handle_entry(entry):
3538
metadata_path = os.path.join(pod_path, "metadata.json")
3639
log_path = os.path.join(pod_path, e['container_name'] +".log")
3740

38-
3941
if not os.path.exists(metadata_path):
4042
with open(metadata_path, 'w+') as metadata_file:
4143
md = {
42-
'namespace_id': e['namespace_id'],
4344
'namespace_name': e['namespace_name'],
4445
'pod_id': e['pod_id'],
4546
'pod_name': e['pod_name'],
@@ -58,7 +59,9 @@ def handle_entry(entry):
5859

5960
if 'log' in entry:
6061
with open(log_path, 'a+') as log_file:
61-
log_file.write(entry['log'])
62+
log = entry['log']
63+
log = log.replace('\x00', '\n')
64+
log_file.write(log)
6265

6366
@api.route('/api/log')
6467
class Console(Resource):
@@ -114,6 +117,9 @@ def get(self, pod_id, container_name):
114117
def main(): # pragma: no cover
115118
app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 1024 * 4
116119

120+
if not os.path.exists(storage_path):
121+
os.makedirs(storage_path)
122+
117123
port = int(os.environ.get('INFRABOX_PORT', 8080))
118124
logger.info('Starting Server on port %s', port)
119125
app.run(host='0.0.0.0', port=port)

src/dashboard-client/src/models/Job.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import Notification from '../models/Notification'
33
import NotificationService from '../services/NotificationService'
44
import NewAPIService from '../services/NewAPIService'
55
import store from '../store'
6+
import router from '../router'
67
const Convert = require('ansi-to-html')
78

89
class Section {

src/openpolicyagent/policies/job.rego

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,15 @@ allow {
5757
allow {
5858
api.method = "POST"
5959
api.path = ["api", "job", suffix]
60-
job_suffix := {"cache", "archive", "output", "create_jobs", "consoleupdate", "stats", "markup", "badge", "testresult"}
60+
job_suffix := {"cache", "output", "create_jobs", "consoleupdate", "stats", "markup", "badge", "testresult"}
6161
suffix = job_suffix[_]
6262
api.token.type = "job"
6363
api.token.job.state = job_state[_]
64-
}
64+
}
65+
66+
# Allow POST access to /api/job/archive for valid job tokens (for service uploads)
67+
allow {
68+
api.method = "POST"
69+
api.path = ["api", "job", "archive"]
70+
api.token.type = "job"
71+
}

src/services/aks/pkg/controller/akscluster/akscluster_controller.go

Lines changed: 82 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -514,8 +514,6 @@ func retrieveLogs(cr *v1alpha1.AKSCluster, cluster *RemoteCluster, log *logrus.E
514514
return
515515
}
516516

517-
log.Info(string(*data))
518-
519517
err = json.Unmarshal(*data, &pods)
520518
if err != nil {
521519
log.Errorf("Failed to collected pod list: %v", err)
@@ -532,7 +530,7 @@ func retrieveLogs(cr *v1alpha1.AKSCluster, cluster *RemoteCluster, log *logrus.E
532530
continue
533531
}
534532

535-
filename := "pod_" + pod.Namespace + "_" + pod.Pod + "_" + pod.PodID + ".txt"
533+
filename := "pod_" + pod.Namespace + "_" + pod.Pod + "_" + container + ".txt"
536534
err = uploadToArchive(cr, log, data, filename)
537535
if err != nil {
538536
log.Warningf("Failed to upload log to archive: %v", err)
@@ -568,6 +566,12 @@ func injectCollector(cluster *RemoteCluster, log *logrus.Entry) error {
568566
return err
569567
}
570568

569+
err = kubectlApply(cluster, newFluentbitConfigMap(), log)
570+
if err != nil {
571+
log.Errorf("Failed to create fluent bit config map: %v", err)
572+
return err
573+
}
574+
571575
err = kubectlApply(cluster, newCollectorDaemonSet(), log)
572576
if err != nil {
573577
log.Errorf("Failed to create collector daemon set: %v", err)
@@ -691,30 +695,83 @@ func newCollectorDeployment() *appsv1.Deployment {
691695
}
692696
}
693697

698+
func newFluentbitConfigMap() *v1.ConfigMap{
699+
return &v1.ConfigMap{
700+
TypeMeta: metav1.TypeMeta{
701+
Kind: "ConfigMap",
702+
APIVersion: "v1",
703+
},
704+
ObjectMeta: metav1.ObjectMeta{
705+
Name: "infrabox-fluent-bit",
706+
Namespace: "infrabox-collector",
707+
},
708+
Data: map[string]string {
709+
"parsers.conf": `
710+
[PARSER]
711+
Name docker_utf8
712+
Format json
713+
Time_Key time
714+
Time_Format %Y-%m-%dT%H:%M:%S.%L
715+
Time_Keep On
716+
Decode_Field_as escaped_utf8 log do_next
717+
Decode_Field_as escaped log
718+
`,
719+
"fluent-bit.conf": `
720+
[SERVICE]
721+
Flush 2
722+
Daemon Off
723+
Log_Level info
724+
Parsers_File parsers.conf
725+
[INPUT]
726+
Name tail
727+
Path /var/log/containers/*.log
728+
Parser docker_utf8
729+
Tag kube.*
730+
Refresh_Interval 2
731+
Mem_Buf_Limit 50MB
732+
Skip_Long_Lines On
733+
[FILTER]
734+
Name kubernetes
735+
Match kube.*
736+
Kube_URL https://kubernetes.default.svc.cluster.local:443
737+
Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
738+
Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token
739+
[OUTPUT]
740+
Name http
741+
Match *
742+
Host infrabox-collector-api.infrabox-collector
743+
Port 80
744+
URI /api/log
745+
Format json
746+
`,
747+
},
748+
}
749+
}
750+
694751
func newCollectorDaemonSet() *appsv1.DaemonSet {
695752
return &appsv1.DaemonSet{
696753
TypeMeta: metav1.TypeMeta{
697754
Kind: "DaemonSet",
698755
APIVersion: "extensions/v1beta1",
699756
},
700757
ObjectMeta: metav1.ObjectMeta{
701-
Name: "infrabox-collector-fluentd",
758+
Name: "infrabox-collector-fluent-bit",
702759
Namespace: "infrabox-collector",
703760
},
704761
Spec: appsv1.DaemonSetSpec{
705762
Template: v1.PodTemplateSpec{
706763
ObjectMeta: metav1.ObjectMeta{
707764
Labels: map[string]string{
708-
"app": "fluentd.collector.infrabox.net",
765+
"app": "fluentbit.collector.infrabox.net",
709766
},
710767
},
711768
Spec: v1.PodSpec{
712769
Containers: []v1.Container{{
713-
Name: "fluentd",
714-
Image: "quay.io/infrabox/collector-fluentd",
770+
Name: "fluent-bit",
771+
Image: "fluent/fluent-bit:0.13",
715772
Resources: v1.ResourceRequirements{
716773
Limits: v1.ResourceList{
717-
"memory": resource.MustParse("200Mi"),
774+
"memory": resource.MustParse("100Mi"),
718775
},
719776
Requests: v1.ResourceList{
720777
"cpu": resource.MustParse("100m"),
@@ -728,10 +785,14 @@ func newCollectorDaemonSet() *appsv1.DaemonSet {
728785
Name: "varlibdockercontainers",
729786
MountPath: "/var/lib/docker/containers",
730787
ReadOnly: true,
731-
}},
732-
Env: []v1.EnvVar{{
733-
Name: "INFRABOX_COLLECTOR_ENDPOINT",
734-
Value: "http://infrabox-collector-api.infrabox-collector/api/log",
788+
}, {
789+
Name: "config",
790+
MountPath: "/fluent-bit/etc/parsers.conf",
791+
SubPath: "parsers.conf",
792+
}, {
793+
Name: "config",
794+
MountPath: "/fluent-bit/etc/fluent-bit.conf",
795+
SubPath: "fluent-bit.conf",
735796
}},
736797
}},
737798
Volumes: []v1.Volume{{
@@ -748,11 +809,18 @@ func newCollectorDaemonSet() *appsv1.DaemonSet {
748809
Path: "/var/log",
749810
},
750811
},
812+
}, {
813+
Name: "config",
814+
VolumeSource: v1.VolumeSource{
815+
ConfigMap: &v1.ConfigMapVolumeSource{
816+
LocalObjectReference: v1.LocalObjectReference{
817+
Name: "infrabox-fluent-bit",
818+
},
819+
},
820+
},
751821
}},
752822
},
753823
},
754824
},
755825
}
756826
}
757-
758-
// newPodForCR returns a busybox pod with the same name/namespace as the cr

0 commit comments

Comments
 (0)