系统监视

一. 监视并重启httpd服务

#!/bin/bash
# Apache Process Monitor
# Restart Apache Web Server When It Goes Down
# RHEL / CentOS / Fedora Linux restart command
RESTART="service httpd restart"

# uncomment if you are using Debian / Ubuntu Linux
#RESTART="/etc/init.d/apache2 restart"

#path to pgrep command
PGREP="/usr/bin/pgrep"

# find httpd pid
$PGREP httpd

if [ $? -eq 0 ]
then
# restart apache
$RESTART
fi


check process

#!/bin/bash

process="/usr/sbin/httpd"
start="service httpd restart"

ps ax | awk '{print $5}' | grep -q "^$process$" || {
# Apparently not running, so start the process
eval "$start"
exit $?
}

exit 0


check apache

#!/bin/bash

URL="http://localhost/netsaint/index.html"
TIMEOUT=10 #(seconds)
MATCH="<HTML>"
restart="service httpd restart"

wget -q -O - -T=$TIMEOUT --tries=1 "$URL" | grep -q "$MATCH" || {
# Something is wrong, so restart
eval "$restart"
exit $?
}

exit 0


check port

#!/bin/bash

port="80"
restart="service httpd restart"

netstat -ln | awk '/^tcp/ {print $4}' | grep -q ":$port$" || {
# Apparently not listening, so run restart command
eval "$restart"
exit $?
}

exit 0


log watch all

#!/bin/bash

HOSTS='
host1|admin1@someplace.com
host2|admin2@someplace.com
host3|admin3@someplace.com
'

for entry in $HOSTS ; do
host=`echo "$entry" | sed 's/|.*$//'`
email=`echo "$entry" | sed 's/^.*|//'`
logwatch --mailto "$email" --hostname "$host"
done


web

#!/bin/bash

SLEEP=30 #seconds
TIMEOUT=10 #seconds
URL='http://localhost/index.html'
MATCH='<HTML>'

while true ; do
wget -q -O - -T=$TIMEOUT --tries=1 "$URL" | grep -q "$MATCH" || {
/usr/local/sbin/report ERROR "Web server not responding"
/etc/rc.d/init.d/httpd restart && {
/usr/local/sbin/report INFO "Web server restarted"
}
}
sleep $SLEEP
done


log watch example

#!/bin/bash

# Note, needs to be placed in /etc/log.d/scripts/services and named
# according to your configuration file (i.e. a config file of
# 'example.conf' would require this file to be named 'example'.

# This is as nice script that will show you the lines you will
# be processing and reporting on. It will first display the
# standard environment variables and then it takes STDIN and
# dump it right back out to STDOUT.

# These are the standard environment variables.
echo "Date Range: $LOGWATCH_DATE_RANGE"
echo "Detail Level: $LOGWATCH_DETAIL_LEVEL"
echo "Temp Dir: $LOGWATCH_TEMP_DIR"
echo "Debug Level: $LOGWATCH_DEBUG"

# Now take STDIN and dump it to STDOUT
cat


按如下方式设置调度作业(cronjob):
*/5 * * * * /path/to/script.sh >/dev/null 2>&1

二. 下面的脚本用业监视系统进程,根据指定的服务端口是否打开,输出相应的信息.

执行该脚本,看能否得到正确的结果,如果不能,修改脚本使之能正常运行.

#!/bin/bash
# Shell script to monitor running services such as web/http, ssh, mail etc.
# If service fails it will send an Email to ADMIN user
# service port
ports="22 80 25"

# service names as per above ports
service="SSH WEB MAIL"

# No of services to monitor as per (above ports+1)
SCOUNTER=4

#Email id to send alert
ADMINEMAIL="admin@myispname.com"

# counter
c=1
echo "Running services status:"

# use sudo if you want i.e. sudo /bin/netstat
/bin/netstat -tulpn | grep -vE '^Active|Proto' | while read LINE
do
  sendMail=0
  # get active port name and use : as delimiter
  t=$(echo $LINE | awk '{ print $4}' | cut -d: -f2)
  [ "$t" == "" ] && t=-1 || :
  # get service name from $services and : as delimiter
  sname=$(echo $service | cut -d' ' -f$c)
  sstatus="$sname: No"
  # now compare port
  for i in $ports
  do
    if [ $i -eq $t ]; then
      sstatus="$sname: Ok"
      sendMail=1
    fi
  done
  # display service status as OK or NO
  echo "$sstatus"
  #next service please
  c=$( expr $c + 1 )
  [ "$sendMail" == "0" ] && echo $sstatus | mail -s "service down $sstatus" $ADMINEMAIL || :
  # break afer 3 services
  [ $c -ge $SCOUNTER ] && break || :
done

三. 下面的脚本用ping来检查远程主机是否有响应,输出相应的信息.

执行该脚本,看能否得到正确的结果,如果不能,修改脚本使之能正常运行.

#!/bin/bash
# Simple SHELL script for Linux and UNIX system monitoring with
# ping command

# add ip / hostname separated by while space
HOSTS="cyberciti.biz theos.in router"

# no ping request
COUNT=1

# email report when
SUBJECT="Ping failed"
EMAILID="me@mydomain.com"
for myHost in $HOSTS
do
  count=$(ping -c $COUNT $myHost | grep 'received' | awk -F',' '{ print $2 }' | awk '{ print $1 }')
  if [ $count -eq 0 ]; then
    # 100% failed
    echo "Host : $myHost is down (ping failed) at $(date)" | mail -s "$SUBJECT" $EMAILID
  fi
done


check drives

#!/bin/bash

types="ext2|ext3|ufs|vfat"
cutoff="90"

for drive in `mount | awk "/type ($types)/ {print \\\$1}"` ; do
df "$drive" | awk -v "cutoff=$cutoff" '/^\// {
gsub(/%$/, "", $5);
if ($5 > cutoff)
print "Drive " $1 " (" $6 ") is " $5 "% Full"
}'
done | /usr/local/sbin/report --stdin WARNING 'Drives almost full'


check for failure

#!/bin/bash

# Check for any drive failures
egrep 'I/O error: dev .+, sector' /var/log/messages | \
/usr/local/sbin/report --stdin CRITICAL 'Drives failure'

# Check for any RAID failures
egrep 'raid.*Disk failure on' /var/log/messages | \
/usr/local/sbin/report --stdin CRITICAL 'RAID drive failure'


services

#!/bin/bash

SLEEP=30 #seconds

while true ; do
cat /usr/local/etc/services.conf | while read line ; do
match=`echo "$line" | sed 's/|.*$//'`
cmd=`echo "$line" | sed 's/^.*|//'`
ps ax -o '%c %P' | awk -v "process=$match" \
'{if (($1 == process) && ($2 == 1)) exit 1}' && {
# Process not found!
/usr/local/sbin/report ERROR "Process $match not running"
[ -n "$cmd" ] && {
$cmd && {
/usr/local/sbin/report INFO \
"Process $match was restarted ($cmd)"
}
}
}
done
sleep $SLEEP
done

练习

1. 监控磁盘空闲空间,如果小于30%,则发送警告(提示信息或邮件).

2. 编写脚本,找出系统没有设置密码的用户,并将它们锁定(passwd -l username).