diff --git a/.github/workflows/chatops.yaml b/.github/workflows/chatops.yaml index 1d38da5b..d9fce0ad 100644 --- a/.github/workflows/chatops.yaml +++ b/.github/workflows/chatops.yaml @@ -32,6 +32,7 @@ jobs: pip install ansible pip install ansible-lint cd chatops_deployment + ansible-galaxy install -r ansible/requirements.yml ansible-lint --project-dir ansible - name: Run ShellCheck diff --git a/chatops_deployment/ansible/configure.yml b/chatops_deployment/ansible/configure.yml index 12243af7..9c5b8a10 100644 --- a/chatops_deployment/ansible/configure.yml +++ b/chatops_deployment/ansible/configure.yml @@ -44,9 +44,8 @@ - name: Set up systemd exporters hosts: stack - gather_facts: false roles: - - role: systemd_exporter + - role: prometheus.prometheus.systemd_exporter tags: - systemd_exporter diff --git a/chatops_deployment/ansible/roles/prometheus/files/prometheus b/chatops_deployment/ansible/roles/prometheus/files/prometheus new file mode 100644 index 00000000..a7ff6076 --- /dev/null +++ b/chatops_deployment/ansible/roles/prometheus/files/prometheus @@ -0,0 +1 @@ +ARGS="--web.config.file=/etc/prometheus/prometheus-web.yml" \ No newline at end of file diff --git a/chatops_deployment/ansible/roles/prometheus/files/prometheus.filebeat.yml b/chatops_deployment/ansible/roles/prometheus/files/prometheus.filebeat.yml index 3450fbfd..18989691 100644 --- a/chatops_deployment/ansible/roles/prometheus/files/prometheus.filebeat.yml +++ b/chatops_deployment/ansible/roles/prometheus/files/prometheus.filebeat.yml @@ -1,9 +1,9 @@ --- -- type: filestream - id: prometheus - enabled: true - paths: - - /opt/prometheus/prometheus.log - fields: - service.name: prometheus - fields_under_root: true +filebeat.inputs: + - type: journald + id: prometheus + include_matches.match: + - _SYSTEMD_UNIT=prometheus.service + fields: + service.name: prometheus + fields_under_root: true diff --git a/chatops_deployment/ansible/roles/prometheus/files/prometheus.service b/chatops_deployment/ansible/roles/prometheus/files/prometheus.service deleted file mode 100644 index b4c7c41b..00000000 --- a/chatops_deployment/ansible/roles/prometheus/files/prometheus.service +++ /dev/null @@ -1,19 +0,0 @@ -[Unit] -Description=Prometheus Server -Documentation=https://prometheus.io/docs/introduction/overview/ -After=network-online.target - -[Service] -User=prometheus -Group=prometheus -Restart=on-failure -ExecStart=/opt/prometheus/prometheus \ - --config.file=/opt/prometheus/prometheus.yml \ - --storage.tsdb.path=/var/stack/prometheus/data \ - --storage.tsdb.retention.time=30d \ - --web.config.file=/opt/prometheus/web.yml -StandardOutput=append:/var/log/prometheus/prometheus.log -StandardError=append:/var/log/prometheus/prometheus.log - -[Install] -WantedBy=multi-user.target diff --git a/chatops_deployment/ansible/roles/prometheus/files/rules.yml b/chatops_deployment/ansible/roles/prometheus/files/rules.yml index 12955958..ec709d6c 100644 --- a/chatops_deployment/ansible/roles/prometheus/files/rules.yml +++ b/chatops_deployment/ansible/roles/prometheus/files/rules.yml @@ -12,10 +12,13 @@ groups: description: "Container has been not running for more than 30 seconds." - alert: SystemdServiceDown - expr: systemd_unit_state{name=~"grafana-server.service|haproxy.service",state=~"failed|inactive"} == 1 + expr: | + systemd_unit_state{ + name=~"grafana-server.service|haproxy.service|kibana.service|elasticsearch.service|logstash.service|filebeat.service",state=~"failed|inactive" + } == 1 for: 30s labels: severity: critical annotations: summary: "Systemd service {{ $labels.name }} on host {{ $labels.instance }} is in state {{ $labels.state }}." - description: "Systemd service has been in failed state for 30s." + description: "Systemd service has been in failed {{ $labels.state }} for 30s." diff --git a/chatops_deployment/ansible/roles/prometheus/handlers/main.yml b/chatops_deployment/ansible/roles/prometheus/handlers/main.yml deleted file mode 100644 index c2e76d10..00000000 --- a/chatops_deployment/ansible/roles/prometheus/handlers/main.yml +++ /dev/null @@ -1,24 +0,0 @@ ---- -- name: Move Prometheus binaries - become: true - ansible.builtin.copy: - src: "/tmp/prometheus-{{ prometheus_version }}.linux-amd64/" - dest: "/opt/prometheus" - mode: preserve - owner: prometheus - group: prometheus - remote_src: true - -- name: Start Prometheus - become: true - ansible.builtin.systemd_service: - name: prometheus.service - state: started - daemon_reload: true - enabled: true - -- name: Restart Prometheus - become: true - ansible.builtin.systemd_service: - name: prometheus.service - state: restarted diff --git a/chatops_deployment/ansible/roles/prometheus/tasks/main.yml b/chatops_deployment/ansible/roles/prometheus/tasks/main.yml index 2a4d33bc..f1b3769c 100644 --- a/chatops_deployment/ansible/roles/prometheus/tasks/main.yml +++ b/chatops_deployment/ansible/roles/prometheus/tasks/main.yml @@ -1,41 +1,9 @@ --- -- name: Create prometheus group +- name: Install Prometheus become: true - ansible.builtin.group: + ansible.builtin.apt: name: prometheus - state: present - -- name: Add ubuntu to prometheus group - become: true - ansible.builtin.user: - name: ubuntu - group: prometheus - -- name: Reset connection for group changes - ansible.builtin.meta: reset_connection - -- name: Create a prometheus user - become: true - ansible.builtin.user: - name: prometheus - create_home: false - group: prometheus - system: true - -- name: Download and extract Prometheus - become: true - ansible.builtin.unarchive: - src: " https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-amd64.tar.gz " - dest: /tmp - remote_src: true - creates: "/opt/prometheus" - mode: "0774" - notify: - - Move Prometheus binaries - - Start Prometheus - -- name: Flush handlers to move binaries - ansible.builtin.meta: flush_handlers + state: latest # noqa: package-latest - name: Set permissions on volume become: true @@ -47,55 +15,61 @@ mode: "0774" recurse: true -- name: Copy prometheus service file +- name: Copy prometheus rules file become: true ansible.builtin.copy: - src: prometheus.service - dest: /etc/systemd/system/prometheus.service + src: rules.yml + dest: /etc/prometheus/rules.yml owner: prometheus group: prometheus mode: "0774" - notify: - - Start Prometheus -- name: Copy prometheus rules file +- name: Copy systemd arguments become: true ansible.builtin.copy: - src: rules.yml - dest: /opt/prometheus/rules.yml + src: prometheus + dest: "/etc/default/prometheus" owner: prometheus group: prometheus - mode: "0774" - notify: - - Restart Prometheus + mode: "0644" - name: Template prometheus config become: true ansible.builtin.template: src: "{{ item }}" - dest: "/opt/prometheus/{{ item[:-3] }}" + dest: "/etc/prometheus/{{ item[:-3] }}" owner: prometheus group: prometheus mode: "0774" - notify: - - Restart Prometheus loop: - prometheus.yml.j2 - - web.yml.j2 + - prometheus-web.yml.j2 -- name: Create Prometheus log directory - ansible.builtin.file: - path: /var/log/prometheus - state: directory +- name: Copy certificate and key + become: true + ansible.builtin.copy: + src: "./{{ env }}_ssl/{{ item }}" + dest: "/etc/prometheus/{{ item }}" owner: prometheus group: prometheus - mode: "0770" + mode: "0440" + loop: + - prometheus.key + - prometheus.crt + - alertmanager.crt + +- name: Restart Prometheus + become: true + ansible.builtin.systemd_service: + name: prometheus.service + state: restarted + daemon_reload: true - name: Copy filebeat external config become: true ansible.builtin.copy: src: prometheus.filebeat.yml dest: /var/filebeat/prometheus.filebeat.yml - owner: root - group: root + owner: prometheus + group: prometheus mode: "0640" diff --git a/chatops_deployment/ansible/roles/prometheus/templates/prometheus-web.yml.j2 b/chatops_deployment/ansible/roles/prometheus/templates/prometheus-web.yml.j2 new file mode 100644 index 00000000..8cd65287 --- /dev/null +++ b/chatops_deployment/ansible/roles/prometheus/templates/prometheus-web.yml.j2 @@ -0,0 +1,6 @@ +basic_auth_users: + {{ prometheus_username }}: {{ prometheus_password | ansible.builtin.password_hash(hashtype="bcrypt") }} + +tls_server_config: + cert_file: /etc/prometheus/prometheus.crt + key_file: /etc/prometheus/prometheus.key diff --git a/chatops_deployment/ansible/roles/prometheus/templates/prometheus.yml.j2 b/chatops_deployment/ansible/roles/prometheus/templates/prometheus.yml.j2 index b037ae80..b6f30e85 100644 --- a/chatops_deployment/ansible/roles/prometheus/templates/prometheus.yml.j2 +++ b/chatops_deployment/ansible/roles/prometheus/templates/prometheus.yml.j2 @@ -1,5 +1,6 @@ global: scrape_interval: 15s + evaluation_interval: 15s external_labels: monitor: 'chatops-monitor' @@ -7,9 +8,12 @@ scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] + scheme: https basic_auth: username: "{{ prometheus_username }}" password: "{{ prometheus_password }}" + tls_config: + ca_file: /etc/prometheus/prometheus.crt - job_name: 'alertmanager' static_configs: @@ -17,26 +21,28 @@ scrape_configs: basic_auth: username: "{{ alertmanager_username }}" password: "{{ alertmanager_password }}" + scheme: https + tls_config: + ca_file: /etc/prometheus/alertmanager.crt - job_name: 'load-balancer-metrics' static_configs: - - targets: ['{{ loadbalancer_private_ip }}:8405'] + - targets: ['localhost:8405'] - job_name: 'chatops_cadvisor' static_configs: - {% for host in groups['chatops'] %} - - targets: ['{{ host }}:8080'] - {% endfor %} + - targets: ['localhost:8080'] - job_name: 'systemd_exporter' static_configs: - {% for host in groups['private'] %} - - targets: ['{{ host }}:9558'] - {% endfor %} - - targets: ['{{ loadbalancer_private_ip }}:9558'] + - targets: ['localhost:9558'] + + - job_name: 'node_exporter' + static_configs: + - targets: ['localhost:9100'] rule_files: - - '/opt/prometheus/rules.yml' + - '/etc/prometheus/rules.yml' alerting: alertmanagers: @@ -47,3 +53,5 @@ alerting: basic_auth: username: "{{ alertmanager_username }}" password: "{{ alertmanager_password }}" + tls_config: + ca_file: /etc/prometheus/alertmanager.crt diff --git a/chatops_deployment/ansible/roles/prometheus/templates/web.yml.j2 b/chatops_deployment/ansible/roles/prometheus/templates/web.yml.j2 deleted file mode 100644 index d63f6432..00000000 --- a/chatops_deployment/ansible/roles/prometheus/templates/web.yml.j2 +++ /dev/null @@ -1,2 +0,0 @@ -basic_auth_users: - {{ prometheus_username }}: {{ prometheus_password | ansible.builtin.password_hash(hashtype="bcrypt") }} \ No newline at end of file diff --git a/chatops_deployment/ansible/roles/systemd_exporter/files/systemd-exporter.service b/chatops_deployment/ansible/roles/systemd_exporter/files/systemd-exporter.service deleted file mode 100644 index 7e90beae..00000000 --- a/chatops_deployment/ansible/roles/systemd_exporter/files/systemd-exporter.service +++ /dev/null @@ -1,15 +0,0 @@ -[Unit] -Description=Systemd Exporter Service -Documentation=https://github.com/prometheus-community/systemd_exporter -After=network-online.target - -[Service] -User=systemd-exporter -Group=systemd-exporter -Restart=on-failure -ExecStart=/opt/systemd-exporter/systemd_exporter --web.telemetry-path="/" -StandardOutput=append:/var/log/systemd-exporter/systemd-exporter.log -StandardError=append:/var/log/systemd-exporter/systemd-exporter.log - -[Install] -WantedBy=multi-user.target diff --git a/chatops_deployment/ansible/roles/systemd_exporter/tasks/main.yml b/chatops_deployment/ansible/roles/systemd_exporter/tasks/main.yml deleted file mode 100644 index a4edb1ee..00000000 --- a/chatops_deployment/ansible/roles/systemd_exporter/tasks/main.yml +++ /dev/null @@ -1,79 +0,0 @@ ---- -- name: Create systemd-exporter group - become: true - ansible.builtin.group: - name: systemd-exporter - state: present - -- name: Add ubuntu to systemd-exporter group - become: true - ansible.builtin.user: - name: ubuntu - groups: systemd-exporter - append: true - -- name: Reset connection for group changes - ansible.builtin.meta: reset_connection - -- name: Create a systemd-exporter user - become: true - ansible.builtin.user: - name: systemd-exporter - create_home: false - group: systemd-exporter - system: true - -- name: Download and extract systemd-exporter - become: true - ansible.builtin.unarchive: - src: "https://github.com/prometheus-community/systemd_exporter/releases/download/v{{ systemd_exporter_version }}/ - systemd_exporter-{{ systemd_exporter_version}}.linux-amd64.tar.gz" - dest: /tmp - remote_src: true - creates: "/tmp/systemd_exporter-{{ systemd_exporter_version }}.linux-amd64" - mode: "0774" - -- name: Move systemd-exporter binaries - become: true - ansible.builtin.copy: - src: "/tmp/systemd_exporter-{{ systemd_exporter_version }}.linux-amd64/" - dest: "/opt/systemd-exporter" - mode: preserve - owner: systemd-exporter - group: systemd-exporter - remote_src: true - -- name: Create systemd directory - become: true - ansible.builtin.file: - path: /usr/local/lib/systemd/system - state: directory - mode: "0755" - owner: root - group: root - -- name: Copy systemd-exporter service file - become: true - ansible.builtin.copy: - src: systemd-exporter.service - dest: /usr/local/lib/systemd/system/systemd-exporter.service - owner: systemd-exporter - group: systemd-exporter - mode: "0774" - -- name: Create systemd-exporter logging directory - become: true - ansible.builtin.file: - path: /var/log/systemd-exporter - state: directory - mode: "0774" - owner: systemd-exporter - group: systemd-exporter - -- name: Start systemd-exporter service - become: true - ansible.builtin.systemd_service: - name: systemd-exporter.service - state: restarted - daemon_reload: true - enabled: true