This is a sh-compliant check plugin for Nagios which enables you to monitor a specific process whether it’s running or not and to see how much cpu/memory it’s wasting. This can be accomplished with the check_procs plugin which is included in the official Nagios-Plugins package as well, but it doesn’t generate any performance output. At least that’s what I’ve experienced lately (I’d have written the plugin anyway for training purposes).
The script
No special needs, just the basic stuff. The script parses the output of ps aux in different ways. You need to provide a specific process via -p/–process, if warning/critical thresholds are wanted you may also choose between cpu or memory thresholds via -t/–target (default is memory).
As always, you may grab the plugin at MonitoringExchange or directly from here via SVN and the copy’n'paste way below.
user@host: ~ $ svn co svn://svn.matejunkie.com/nagios-plugins/stable/check_ps/ check_ps/
#!/bin/sh # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA PROGNAME=`basename $0` VERSION="Version 1.0," AUTHOR="2009, Mike Adolphs (http://www.matejunkie.com/)" ST_OK=0 ST_WR=1 ST_CR=2 ST_UK=3 process="cron" target="mem" print_version() { echo "$VERSION $AUTHOR" } print_help() { print_version $PROGNAME $VERSION echo "" echo "$PROGNAME is a Nagios plugin to check a specific process via ps." echo "You may provide any string as an argument to match a specific" echo "process. Please note that the output could be distorted if the" echo "argument matches various processes, so please make sure to use" echo "unique strings to match a process." echo "" echo "$PROGNAME -p firefox [-w 10] [-c 20] [-t cpu]" echo "" echo "Options:" echo " -p/--process)" echo " You need to provide a string for which the ps output is then" echo " then \"greped\"." echo " -w/--warning)" echo " Defines a warning level for a target which is explained" echo " below. Default is: off" echo " -c/--critical)" echo " Defines a critical level for a target which is explained" echo " below. Default is: off" echo " -t/--target)" echo " A target can be defined via -t. Choose between cpu and mem." echo " Default is: mem" exit $ST_UK } while test -n "$1"; do case "$1" in -help|-h) print_help exit $ST_UK ;; --version|-v) print_version $PROGNAME $VERSION exit $ST_UK ;; --process|-p) process=$2 shift ;; --target|-t) target=$2 shift ;; --warning|-w) warning=$2 shift ;; --critical|-c) critical=$2 shift ;; *) echo "Unknown argument: $1" print_help exit $ST_UK ;; esac shift done get_wcdiff() { if [ ! -z "$warning" -a ! -z "$critical" ] then wclvls=1 if [ ${warning} -gt ${critical} ] then wcdiff=1 fi elif [ ! -z "$warning" -a -z "$critical" ] then wcdiff=2 elif [ -z "$warning" -a ! -z "$critical" ] then wcdiff=3 fi } val_wcdiff() { if [ "$wcdiff" = 1 ] then echo "Please adjust your warning/critical thresholds. The warning \ must be lower than the critical level!" exit $ST_UK elif [ "$wcdiff" = 2 ] then echo "Please also set a critical value when you want to use \ warning/critical thresholds!" exit $ST_UK elif [ "$wcdiff" = 3 ] then echo "Please also set a warning value when you want to use \ warning/critical thresholds!" exit $ST_UK fi } get_vals() { process=`echo ${process} | sed 's/^.\|[a-z][A-Z] /\[&]/g'` tmp_output=`ps aux | grep "$process" | grep -v $0` if [ -z "$tmp_output" ] then echo "CRITICAL - Process is not running!" exit $ST_CR fi ps_user=`echo ${tmp_output} | awk '{print $1}'` ps_pid=`echo ${tmp_output} | awk '{print $2}' ` ps_cpu=`echo ${tmp_output} | awk '{print $3}'` ps_mem=`echo ${tmp_output} | awk '{print $4}' ` ps_start=`echo ${tmp_output} | awk '{print $9}' ` tmp_ps_cputime=`echo ${tmp_output} | awk '{print $10}'` tmp_ps_cpuhours=`echo ${tmp_ps_cputime} | awk -F \: '{print $1}'` tmp_ps_cpumin=`echo ${tmp_ps_cputime} | awk -F \: '{print $2}'` ps_cputime=`echo "scale=0; (${tmp_ps_cpuhours} * 60) + \ ${tmp_ps_cpumin}" | bc -l` } do_wccalc() { if [ -n "$warning" -a -n "$critical" ] then if [ "$target" = "cpu" ] then tmp_wc_target=`echo ${ps_cpu} | awk -F \. '{print $2}'` if [ "$tmp_wc_target" -ge 5 ] then wc_target=`echo ${ps_cpu} | awk -F \. '{print $1}'` wc_target=`expr ${wc_target} + 1` else wc_target=`echo ${ps_cpu} | awk -F \. '{print $1}'` fi elif [ "$target" = "mem" ] then tmp_wc_target=`echo ${ps_mem} | awk -F \. '{print $2}'` if [ "$tmp_wc_target" -ge 5 ] then wc_target=`echo ${ps_mem} | awk -F \. '{print $1}'` wc_target=`expr ${wc_target} + 1` else wc_target=`echo ${ps_mem} | awk -F \. '{print $1}'` fi fi fi } do_output() { process=`echo ${process} | sed 's/\[//g' | sed 's/\]//g'` output="Process: ${process}, User: ${ps_user}, CPU: ${ps_cpu}%, \ RAM: ${ps_mem}%, Start: ${ps_start}, CPU Time: ${ps_cputime} min" } do_perfdata() { perfdata="'cpu'=${ps_cpu} 'memory'=${ps_mem} 'cputime'=${ps_cputime}" } # Here we go! get_wcdiff val_wcdiff get_vals do_wccalc do_output do_perfdata if [ -n "$warning" -a -n "$critical" ] then if [ "$wc_target" -ge "$warning" -a "$wc_target" -lt "$critical" ] then echo "WARNING - ${output} | ${perfdata}" exit $ST_WR elif [ "$wc_target" -ge "$critical" ] then echo "CRITICAL - ${output} | ${perfdata}" exit $ST_CR else echo "OK - ${output} | ${perfdata}" exit $ST_OK fi else echo "OK - ${output} | ${perfdata}" exit $ST_OK fi
PNP Template
# This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # PNP Template for check_ps.sh # Author: Mike Adolphs (http://www.matejunkie.com/ $opt[1] = "--vertical-label \"percent\" -u 100 -l 0 -r --title \"CPU/Memory Usage for $hostname / $servicedesc\" "; $opt[2] = "--vertical-label \"minutes\" -u 100 -l 0 -r --title \"cputime for $hostname / $servicedesc\" "; $def[1] = "DEF:cpu=$rrdfile:$DS[1]:AVERAGE " ; $def[1] .= "DEF:memory=$rrdfile:$DS[2]:AVERAGE " ; $def[2] .= "DEF:cputime=$rrdfile:$DS[3]:AVERAGE " ; $def[1] .= "COMMENT:\"\\t\\t\\tLAST\\t\\t\\tAVERAGE\\t\\t\\tMAX\\n\" " ; $def[2] .= "COMMENT:\"\\t\\t\\tLAST\\t\\t\\tAVERAGE\\t\\t\\tMAX\\n\" " ; $def[1] .= "LINE2:cpu#E80C3E:\"CPU\\t\\t\" " ; $def[1] .= "GPRINT:cpu:LAST:\"%6.2lf %%\\t\\t\" " ; $def[1] .= "GPRINT:cpu:AVERAGE:\"%6.2lf \\t\\t\" " ; $def[1] .= "GPRINT:cpu:MAX:\"%6.2lf \\n\" " ; $def[1] .= "LINE2:memory#008000:\"Memory\\t\" " ; $def[1] .= "GPRINT:memory:LAST:\"%6.2lf %%\\t\\t\" " ; $def[1] .= "GPRINT:memory:AVERAGE:\"%6.2lf \\t\\t\" " ; $def[1] .= "GPRINT:memory:MAX:\"%6.2lf \\n\" " ; $def[2] .= "AREA:cputime#E80C3E:\"CPUTime\\t\" " ; $def[2] .= "GPRINT:cputime:LAST:\"%6.2lf min\\t\\t\" " ; $def[2] .= "GPRINT:cputime:AVERAGE:\"%6.2lf min\\t\\t\" " ; $def[2] .= "GPRINT:cputime:MAX:\"%6.2lf min\\n\" " ;
The License
As always this little script is ment to be sh-compliant and released under the terms of the GPL Version 2 only. Feel free to subscribe via rss to get updates on this one. More options will be added in the future.
Hi,
nice script, going to use it. Thanks a lot!
But why do you this “&”-thing, when you read the process name?
Thanks for the reply. Always good to hear that someone has a use for it.
This “&”-thing is probably the following sed command. It adds square brackets around the first letter to filter out the grep when grepping for a specific process.
Hello Mike,
Hey Im not getting valid perfdata … any ideas ?
Thanks
2009-07-28 15:55:00 [11104] [2] Template is check_ps.php
2009-07-28 15:55:00 [11104] [2] No pattern match in function _parse(] )
2009-07-28 15:55:00 [11104] [1] Invalid Perfdata detected
Link 212 check_ps has an extra ] that makes the script create invalid perfdata !
Thank you Joao for mentioning the bug and sorry for my late answer. I’m just a little busy at the moment since everyone else is on holiday! ;-)
I fixed it in the article, will fix it on the exchanges as well.
I just want to thank you for the good scripts!
I am new in the monitoring business and succeed in installing nagios/cacti/pnp but do not know yet to use them. Indeed, I want to monitor (CPU,Memory and VMemory into graph)three processes of one server. I was sure that it was possible and began to give up hope before I google your blog. Can you help me please ?
I want to know :
1) how to integrate for instance your plugin check_ps to nagios
2) how to collect the datas in pnp or cacti ?
3) Is it working to monitor processes on windows machine ?
Thanks for your help
I used your plugin to monitor remote host with nrpe and it does not work. Do you have an idea where it may come from ?
I finally succeed in using your plugin with nrpe. However in my nagios dashboard, it seems that the results are the same for all services I monitored even if I used different process name. Do you have an idea of where it may come from ?
Hi Mike , Nice script works perfect. I have only one issue
how do I use it on a remote host not -H like the scripts I know off , remote execute works perfect but nagios doesn’t understand ? thanks a lot for your help
Cheers Rob de Jongh
Thanks for this script, great work!
Here’s a patch to support aggregating CPU/MEM usage of multiple processes with the same name (ex.: httpd, spamd, etc.), and optimized/simplified by removing the User/Start/CPU-Time outputs (and ps_pid which was unused anyway):
— check_ps.sh.orig 2010-07-03 01:52:51.000000000 -0500
+++ check_ps.sh 2010-07-03 02:19:10.000000000 -0500
@@ -131,7 +131,7 @@
get_vals() {
process=`echo ${process} | sed ‘s/^.\|[a-z][A-Z] /\[&]/g’`
- tmp_output=`ps aux | grep “$process” | grep -v $0`
+ tmp_output=”`ps aux | grep \”$process\” | grep -v $0`”
if [ -z "$tmp_output" ]
then
@@ -139,17 +139,8 @@
exit $ST_CR
fi
- ps_user=`echo ${tmp_output} | awk ‘{print $1}’`
- ps_pid=`echo ${tmp_output} | awk ‘{print $2}’ `
- ps_cpu=`echo ${tmp_output} | awk ‘{print $3}’`
- ps_mem=`echo ${tmp_output} | awk ‘{print $4}’ `
- ps_start=`echo ${tmp_output} | awk ‘{print $9}’ `
-
- tmp_ps_cputime=`echo ${tmp_output} | awk ‘{print $10}’`
- tmp_ps_cpuhours=`echo ${tmp_ps_cputime} | awk -F \: ‘{print $1}’`
- tmp_ps_cpumin=`echo ${tmp_ps_cputime} | awk -F \: ‘{print $2}’`
- ps_cputime=`echo “scale=0; (${tmp_ps_cpuhours} * 60) + \
-${tmp_ps_cpumin}” | bc -l`
+ ps_cpu=`echo “${tmp_output}” | awk ‘{sum += $3; print sum}’ | tail -1`
+ ps_mem=`echo “${tmp_output}” | awk ‘{sum += $4; print sum}’ | tail -1`
}
do_wccalc() {
@@ -165,8 +156,8 @@
else
wc_target=`echo ${ps_cpu} | awk -F \. ‘{print $1}’`
fi
- elif [ "$target" = "mem" ]
- then
+ elif [ "$target" = "mem" ]
+ then
tmp_wc_target=`echo ${ps_mem} | awk -F \. ‘{print $2}’`
if [ "$tmp_wc_target" -ge 5 ]
then
@@ -182,12 +173,11 @@
do_output() {
process=`echo ${process} | sed ‘s/\[//g' | sed 's/\]//g’`
- output=”Process: ${process}, User: ${ps_user}, CPU: ${ps_cpu}%, \
-RAM: ${ps_mem}%, Start: ${ps_start}, CPU Time: ${ps_cputime} min”
+ output=”Process: ${process}, CPU: ${ps_cpu}%, RAM: ${ps_mem}%”
}
do_perfdata() {
- perfdata=”‘cpu’=${ps_cpu} ‘memory’=${ps_mem} ‘cputime’=${ps_cputime}”
+ perfdata=”‘cpu’=${ps_cpu} ‘memory’=${ps_mem}”
}
# Here we go!
in case patch formatting got messed up by WP in my previous comment, here’s a link:
http://pastebin.com/88TJybdv