diff --git a/roles/edpm_telemetry_power_monitoring/defaults/main.yml b/roles/edpm_telemetry_power_monitoring/defaults/main.yml index ad3a1a16e..eacc96dfc 100644 --- a/roles/edpm_telemetry_power_monitoring/defaults/main.yml +++ b/roles/edpm_telemetry_power_monitoring/defaults/main.yml @@ -38,7 +38,7 @@ edpm_telemetry_image_download_retries: "{{ edpm_download_retries | default(5) }} edpm_telemetry_old_tripleo_compute_sevices: - tripleo_ceilometer_agent_ipmi.service # Image to use for kepler -edpm_telemetry_kepler_image: "quay.io/sustainable_computing_io/kepler:release-0.7.12" +edpm_telemetry_kepler_image: "quay.io/sustainable_computing_io/kepler:v0.10.2" # Instruction for distribution of container health check scripts edpm_telemetry_power_monitoring_healthcheck_sources: ceilometer_agent_ipmi: ceilometer_agent diff --git a/roles/edpm_telemetry_power_monitoring/files/healthchecks/exporter/healthcheck b/roles/edpm_telemetry_power_monitoring/files/healthchecks/exporter/healthcheck index 53a669f60..bfd5031ab 100644 --- a/roles/edpm_telemetry_power_monitoring/files/healthchecks/exporter/healthcheck +++ b/roles/edpm_telemetry_power_monitoring/files/healthchecks/exporter/healthcheck @@ -15,11 +15,11 @@ # License for the specific language governing permissions and limitations # under the License. -URL="http://0.0.0.0:8888/healthz" +URL="http://0.0.0.0:8888/metrics" TIMEOUT=5 # Timeout in seconds # Get the HTTP status code and response body using curl -RESPONSE=$(curl -s -w "%{http_code}" $URL --max-time $TIMEOUT) +RESPONSE=$(curl -I -s -w "%{http_code}" $URL --max-time $TIMEOUT) BODY=${RESPONSE:0:-3} # Extract the body (all but the last 3 characters) HTTP_CODE=${RESPONSE: -3} # Extract the last 3 characters as the HTTP status code @@ -29,11 +29,5 @@ if [ "$HTTP_CODE" -ne 200 ]; then exit 1 fi -# Check if the response body contains "ok" -if [[ "$BODY" != *"ok"* ]]; then - echo "$1 Health check failed: Response body does not contain 'ok'" - exit 1 -fi - -echo "$1 Health check passed: HTTP status code $HTTP_CODE, Health response 'ok'" +echo "$1 Health check passed: HTTP status code $HTTP_CODE" exit 0 diff --git a/roles/edpm_telemetry_power_monitoring/tasks/configure.yml b/roles/edpm_telemetry_power_monitoring/tasks/configure.yml index 3bd68650c..79499246b 100644 --- a/roles/edpm_telemetry_power_monitoring/tasks/configure.yml +++ b/roles/edpm_telemetry_power_monitoring/tasks/configure.yml @@ -98,6 +98,13 @@ path: "{{ edpm_telemetry_certs }}/tls.key" register: tls_key_stat +- name: Gather virtualization fact + ansible.builtin.setup: + gather_subset: + - "!all" + - "!min" + - "virtual" + - name: Render container config templates ansible.builtin.template: src: "{{ item }}" @@ -108,6 +115,7 @@ vars: ca_bundle_exists: "{{ ca_bundle_stat_res.stat.exists }}" tls_cert_exists: "{{ tls_crt_stat.stat.exists and tls_key_stat.stat.exists }}" + running_in_vm: "{{ ansible_facts['virtualization_role'] == 'guest' }}" - name: Configure tls if present when: diff --git a/roles/edpm_telemetry_power_monitoring/templates/kepler-config.yaml.j2 b/roles/edpm_telemetry_power_monitoring/templates/kepler-config.yaml.j2 new file mode 100644 index 000000000..90ba29d69 --- /dev/null +++ b/roles/edpm_telemetry_power_monitoring/templates/kepler-config.yaml.j2 @@ -0,0 +1,10 @@ +web: + listenAddresses: + - :8888 +{% if running_in_vm|bool %} +# WARN DO NOT ENABLE THIS IN PRODUCTION - for CI testing only +dev: + fake-cpu-meter: + enabled: true + zones: ["package", "core", "dram"] +{% endif %} diff --git a/roles/edpm_telemetry_power_monitoring/templates/kepler.json.j2 b/roles/edpm_telemetry_power_monitoring/templates/kepler.json.j2 index 603058d50..b782c4dc6 100644 --- a/roles/edpm_telemetry_power_monitoring/templates/kepler.json.j2 +++ b/roles/edpm_telemetry_power_monitoring/templates/kepler.json.j2 @@ -4,16 +4,8 @@ "restart": "always", "ports": ["8888:8888"], "net": "host", - "command": "-v=2", + "command": "--config.file=/etc/kepler/kepler-config.yaml", "recreate": true, - "environment": { - "ENABLE_GPU": "true", - "EXPOSE_CONTAINER_METRICS": "true", - "ENABLE_PROCESS_METRICS": "true", - "EXPOSE_VM_METRICS": "true", - "EXPOSE_ESTIMATED_IDLE_POWER_METRICS": "false", - "LIBVIRT_METADATA_URI": "http://openstack.org/xmlns/libvirt/nova/1.1" - }, {% if edpm_telemetry_power_monitoring_healthcheck %} "healthcheck": { "test": "/openstack/healthcheck kepler", @@ -21,6 +13,7 @@ }, {% endif %} "volumes": [ + "{{ edpm_telemetry_config_dest }}/kepler-config.yaml:/etc/kepler/kepler-config.yaml:ro", "/lib/modules:/lib/modules:ro", "/run/libvirt:/run/libvirt:shared,ro", "/sys:/sys",