Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ A resource-friendly, highly efficient, and minimal Prometheus exporter to track
see a sample of the metrics page in [here](./extra/metrics.txt).

## 🎥 DEMO
<img src="./capture/CXP-DEMO.gif" width="100%" height="50%" />
<img src="https://shayan-ghani.github.io/Container-Exporter/CXP-DEMO.gif" width="100%" height="50%" />


## 📋 Step-by-Step Guide
Expand Down
73 changes: 31 additions & 42 deletions container_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,72 +10,61 @@
app = Flask(__name__)


# TO-DO : handle init containers with better storage methods
# TO-DO : modulization

# Create Prometheus gauge metrics for status and stats
# Create Prometheus gauge metrics
container_status = Gauge('cxp_container_status', 'Docker container status (1 = running, 0 = not running)', ['container_name'])
container_cpu_percentage = Gauge('cxp_cpu_percentage', 'Docker container cpu usage', ['container_name'])
container_memory_percentage = Gauge('cxp_memory_percentage', 'Docker container memory usage in percent', ['container_name'])
container_memory_bytes_total = Gauge('cxp_memory_bytes_total', 'Docker container memory usage in bytes', ['container_name'])

# Create Prometheus Counter metric for Disk I/O
disk_io_read_counter = Counter("cxp_disk_io_read_bytes_total", "Total number of bytes read from disk", ['container_name'])
disk_io_write_counter = Counter("cxp_disk_io_write_bytes_total", "Total number of bytes written to disk", ['container_name'])

# Create Prometheus Counter metric for Network I/O
network_rx_counter = Counter("cxp_network_rx_bytes_total", "Total number of bytes received over the network", ['container_name'])
network_tx_counter = Counter("cxp_network_tx_bytes_total", "Total number of bytes transmitted over the network", ['container_name'])


# get the data that relates to running containers at the first startup
def get_init_container():
client = docker_env()
return client.containers.list()

init_containers_names = [c.name for c in get_init_container()]

# get the data for all containers (killed exited stopped and running)
def get_all_container():
# get the data for running or not running(unhealthy) containers
def get_containers(all=False):
client = docker_env()
return client.containers.list(all=True)
return client.containers.list(all)

init_containers_names = [c.name for c in get_containers()]

def update_container_status():
# update the running container_names that is offline with the status of all containers
all_containers = get_all_container()
for container in all_containers:
# update container status whether they are running.
def update_container_status(containers):
for container in containers:
if container.name in init_containers_names:
container_status.labels(container_name=container.name).set(1 if container.status == "running" else 0)
elif container.status == "running":
container_status.labels(container_name=container.name).set(1)
init_containers_names.append(container.name)

for removed_container_name in init_containers_names:
if removed_container_name not in [c.name for c in all_containers]:
container_status.labels(container_name=removed_container_name).set(0)
for container_name in init_containers_names:
if container_name not in [c.name for c in containers]:
container_status.labels(container_name=container_name).set(0)


# get containers' stats and update their metrics in async mode
async def container_stats():
docker = Docker()
containers = await docker.containers.list()
tasks = [stat.get_container_stats(container) for container in containers]
all_stats = await gather(*tasks)
for stats in all_stats:
container_cpu_percentage.labels(container_name=stats[0]['name'][1:]).set(stat.calculate_cpu_percentage(stats[0]))
container_memory_percentage.labels(container_name=stats[0]['name'][1:]).set(stat.calculate_memory_percentage(stats[0]))
container_memory_bytes_total.labels(container_name=stats[0]['name'][1:]).set(stat.calculate_memory_bytes(stats[0]))
disk_io_read_counter.labels(container_name=stats[0]['name'][1:]).inc(stat.calculate_disk_io(stats[0])[0])
disk_io_write_counter.labels(container_name=stats[0]['name'][1:]).inc(stat.calculate_disk_io(stats[0])[1])
network_rx_counter.labels(container_name=stats[0]['name'][1:]).inc(stat.calculate_network_io(stats[0])[0])
network_tx_counter.labels(container_name=stats[0]['name'][1:]).inc(stat.calculate_network_io(stats[0])[1])

try:
containers = await docker.containers.list()
tasks = [stat.get_container_stats(container) for container in containers]
all_stats = await gather(*tasks)
for stats in all_stats:
container_cpu_percentage.labels(container_name=stats[0]['name'][1:]).set(stat.calculate_cpu_percentage(stats[0]))
container_memory_percentage.labels(container_name=stats[0]['name'][1:]).set(stat.calculate_memory_percentage(stats[0]))
container_memory_bytes_total.labels(container_name=stats[0]['name'][1:]).set(stat.calculate_memory_bytes(stats[0]))
disk_io_read_counter.labels(container_name=stats[0]['name'][1:]).inc(stat.calculate_disk_io(stats[0])[0])
disk_io_write_counter.labels(container_name=stats[0]['name'][1:]).inc(stat.calculate_disk_io(stats[0])[1])
network_rx_counter.labels(container_name=stats[0]['name'][1:]).inc(stat.calculate_network_io(stats[0])[0])
network_tx_counter.labels(container_name=stats[0]['name'][1:]).inc(stat.calculate_network_io(stats[0])[1])
finally:
await docker.close()

metrics_names = [container_cpu_percentage, container_memory_percentage , container_memory_bytes_total , disk_io_read_counter , disk_io_write_counter , network_rx_counter , network_tx_counter ]

def flush_metric_labels():
all_containers = get_all_container()
for container in all_containers:
def flush_metric_labels(c):
for container in c:
if container.status != "running":
for m in metrics_names:
m.clear()
Expand All @@ -87,15 +76,15 @@ def index():
@app.route('/metrics')
def metrics():
try:
update_container_status()
flush_metric_labels()
all_containers = get_containers(all=True)
update_container_status(all_containers)
flush_metric_labels(all_containers)
loop = new_event_loop()
t = [loop.create_task(container_stats())]
loop.run_until_complete(wait(t))
except Exception as e:
return f"Error running script: {str(e)}"

# generate the latest value of metrics
return Response(generate_latest(), mimetype='text/plain')

def create_app():
Expand Down
Loading