@@ -16,43 +16,52 @@ cat <<"EOF"
1616EOF
1717
1818add () {
19+ echo -e " \n IMPORTANT: Tag-Based Monitoring Enabled"
20+ echo " Only VMs and containers with the tag 'mon-restart' will be automatically restarted by this service."
21+ echo
22+ echo " 🔧 How to add the tag:"
23+ echo " → Proxmox Web UI: Go to VM/CT → Options → Tags → Add 'mon-restart'"
24+ echo " → CLI: qm set <vmid> -tags mon-restart"
25+ echo " pct set <ctid> -tags mon-restart"
26+ echo
27+
1928 while true ; do
20- read -p " This script will add Monitor All to Proxmox VE. Proceed(y/n)?" yn
29+ read -p " This script will add Monitor All to Proxmox VE. Proceed (y/n)? " yn
2130 case $yn in
22- [Yy]* ) break ;;
23- [Nn]* ) exit ;;
24- * ) echo " Please answer yes or no." ;;
31+ [Yy]* ) break ;;
32+ [Nn]* ) exit ;;
33+ * ) echo " Please answer yes or no." ;;
2534 esac
2635 done
2736
28- echo ' #!/usr/bin/env bash
37+ cat << 'EOF ' >/usr/local/bin/ping-instances.sh
38+ #!/usr/bin/env bash
39+
2940# Read excluded instances from command line arguments
3041excluded_instances=("$@")
3142echo "Excluded instances: ${excluded_instances[@]}"
3243
3344while true; do
3445
35- for instance in $(pct list | awk ' \' ' {if( NR>1) print $1}' \' ' ; qm list | awk ' \' ' {if( NR>1) print $1}' \' ' ); do
46+ for instance in $(pct list | awk 'NR>1 { print $1}'; qm list | awk 'NR>1 { print $1}'); do
3647 # Skip excluded instances
3748 if [[ " ${excluded_instances[@]} " =~ " ${instance} " ]]; then
3849 echo "Skipping $instance because it is excluded"
3950 continue
4051 fi
4152
42- # Determine the type of the instance (container or virtual machine)
53+ # Determine type and set config command
4354 if pct status $instance >/dev/null 2>&1; then
44- # It is a container
55+ type="ct"
4556 config_cmd="pct config"
46- IP=$(pct exec $instance ip a s dev eth0 | awk ' \' ' /inet / {print $2}' \' ' | cut -d/ -f1)
4757 else
48- # It is a virtual machine
58+ type="vm"
4959 config_cmd="qm config"
50- IP=$(qm guest cmd $instance network-get-interfaces | egrep -o "([0-9]{1,3}\.){3}[0-9]{1,3}" | grep -E "192\.|10\." | head -n 1)
5160 fi
5261
53- # Skip instances based on onboot and templates
62+ # Skip templates and onboot-disabled
5463 onboot=$($config_cmd $instance | grep -q "onboot: 0" || ( ! $config_cmd $instance | grep -q "onboot" ) && echo "true" || echo "false")
55- template=$($config_cmd $instance | grep template | grep -q "template:" && echo "true" || echo "false")
64+ template=$($config_cmd $instance | grep -q "^ template:" && echo "true" || echo "false")
5665
5766 if [ "$onboot" == "true" ]; then
5867 echo "Skipping $instance because it is set not to boot"
@@ -62,36 +71,49 @@ while true; do
6271 continue
6372 fi
6473
65- # Ping the instance
66- if ! ping -c 1 $IP >/dev/null 2>&1; then
67- # If the instance can not be pinged, stop and start it
68- if pct status $instance >/dev/null 2>&1; then
69- # It is a container
70- echo "$(date): CT $instance is not responding, restarting..."
71- pct stop $instance >/dev/null 2>&1
72- sleep 5
73- pct start $instance >/dev/null 2>&1
74+ # Check for mon-restart tag
75+ has_tag=$($config_cmd $instance | grep -q "tags:.*mon-restart" && echo "true" || echo "false")
76+ if [ "$has_tag" != "true" ]; then
77+ echo "Skipping $instance because it does not have 'mon-restart' tag"
78+ continue
79+ fi
80+
81+ # Responsiveness check and restart if needed
82+ if [ "$type" == "vm" ]; then
83+ # Check if guest agent responds
84+ if qm guest cmd $instance ping >/dev/null 2>&1; then
85+ echo "VM $instance is responsive via guest agent"
7486 else
75- # It is a virtual machine
87+ echo "$(date): VM $instance is not responding to agent ping, restarting..."
7688 if qm status $instance | grep -q "status: running"; then
77- echo "$(date): VM $instance is not responding, restarting..."
7889 qm stop $instance >/dev/null 2>&1
7990 sleep 5
80- else
81- echo "$(date): VM $instance is not running, starting..."
8291 fi
8392 qm start $instance >/dev/null 2>&1
8493 fi
94+ else
95+ # Container: get IP and ping
96+ IP=$(pct exec $instance ip a s dev eth0 | awk '/inet / {print $2}' | cut -d/ -f1)
97+ if ! ping -c 1 $IP >/dev/null 2>&1; then
98+ echo "$(date): CT $instance is not responding, restarting..."
99+ pct stop $instance >/dev/null 2>&1
100+ sleep 5
101+ pct start $instance >/dev/null 2>&1
102+ else
103+ echo "CT $instance is responsive"
104+ fi
85105 fi
86106 done
87107
88- # Wait for 5 minutes. (Edit to your needs)
89108 echo "$(date): Pausing for 5 minutes..."
90109 sleep 300
91- done >/var/log/ping-instances.log 2>&1' > /usr/local/bin/ping-instances.sh
110+
111+ done >/var/log/ping-instances.log 2>&1
112+ EOF
113+
92114 touch /var/log/ping-instances.log
93- # Change file permissions to executable
94115 chmod +x /usr/local/bin/ping-instances.sh
116+
95117 cat << EOF >/etc/systemd/system/ping-instances.timer
96118[Unit]
97119Description=Delay ping-instances.service by 5 minutes
@@ -104,17 +126,17 @@ OnUnitActiveSec=300
104126WantedBy=timers.target
105127EOF
106128
107- # Create ping-instances.service
108129 cat << EOF >/etc/systemd/system/ping-instances.service
109130[Unit]
110- Description=Ping instances every 5 minutes and restarts if necessary
131+ Description=Ping instances every 5 minutes and restart if necessary
111132After=ping-instances.timer
112133Requires=ping-instances.timer
134+
113135[Service]
114136Type=simple
115- # To specify which CT/VM should be excluded, add the CT/VM ID at the end of the line where ExecStart=/usr/local/bin/ping-instances.sh is specified.
116- # For example: ExecStart=/usr/local/bin/ping-instances.sh 100 102
117- # Virtual machines without the QEMU guest agent installed must be excluded.
137+ # To exclude specific instances, pass IDs to ExecStart, e.g.:
138+ # ExecStart=/usr/local/bin/ping-instances.sh 100 200
139+ # Instances must also have the 'mon-restart' tag to be monitored
118140
119141ExecStart=/usr/local/bin/ping-instances.sh
120142Restart=always
@@ -125,39 +147,33 @@ StandardError=file:/var/log/ping-instances.log
125147WantedBy=multi-user.target
126148EOF
127149
128- # Reload daemon, enable and start ping-instances.service
129150 systemctl daemon-reload
130151 systemctl enable -q --now ping-instances.timer
131152 systemctl enable -q --now ping-instances.service
132153 clear
133- echo -e " \n To view Monitor All logs: cat /var/log/ping-instances.log"
154+ echo -e " \n Monitor All installed."
155+ echo " 📄 To view logs: cat /var/log/ping-instances.log"
156+ echo " ⚙️ Make sure your VMs or containers have the 'mon-restart' tag to be monitored."
134157}
135158
136159remove () {
137160 systemctl disable -q --now ping-instances.timer
138161 systemctl disable -q --now ping-instances.service
139- rm /etc/systemd/system/ping-instances.service /etc/systemd/system/ping-instances.timer /usr/local/bin/ping-instances.sh /var/log/ping-instances.log
140- echo " Removed Monitor All from Proxmox VE"
162+ rm -f /etc/systemd/system/ping-instances.service
163+ rm -f /etc/systemd/system/ping-instances.timer
164+ rm -f /usr/local/bin/ping-instances.sh
165+ rm -f /var/log/ping-instances.log
166+ echo " Monitor All removed from Proxmox VE"
141167}
142168
143- # Define options for the whiptail menu
144169OPTIONS=(Add " Add Monitor-All to Proxmox VE"
145170 Remove " Remove Monitor-All from Proxmox VE" )
146171
147- # Show the whiptail menu and save the user's choice
148172CHOICE=$( whiptail --backtitle " Proxmox VE Helper Scripts" --title " Monitor-All for Proxmox VE" --menu " Select an option:" 10 58 2 \
149173 " ${OPTIONS[@]} " 3>&1 1>&2 2>&3 )
150174
151- # Check the user's choice and perform the corresponding action
152175case $CHOICE in
153- " Add" )
154- add
155- ;;
156- " Remove" )
157- remove
158- ;;
159- * )
160- echo " Exiting..."
161- exit 0
162- ;;
176+ " Add" ) add ;;
177+ " Remove" ) remove ;;
178+ * ) echo " Exiting..." ; exit 0 ;;
163179esac
0 commit comments