#!/bin/bash # version: 2008-08-05.01 # wassup # wassup is a simple host monitor that checks if a host is online: # - Pings the target host # - Sends an email if the host is unresponsive # - Creates a lockfile to prevent repeated notifications # - Sends an email when host is responsive again # It is designed to be run from any location by an ordinary user, after editing # the necessary variables below ($SAFETY, $BASEDIR, & $MAILTO). # After testing from the command line, run from cron at desired interval: # */5 * * * * /home/bob/wassup/wassup www.example.edu >/dev/null # by Jorey Bump # http://www.joreybump.com/code/wassup/ # Set local variables here: # The safety host is a reliable external host to ping in order to prove the # network connection is working: SAFETY="www.example.com" # The base directory contains lockfiles, messages and logs. It must be writable # by the user running wassup: BASEDIR=/home/bob/wassup # Send email notifications to this address (if list, use commas, no spaces) # Obviously, these accounts should be accessible if target is down # MAILTO=bob,admin@example.com,5551234444@mobile.example.net MAILTO=bob@example.com ######### # It should not be necessary to edit beyond this point. ######### # Get target host (IP address or host name argument from command line): TARGET=$1 # Create path to lock file for target host, used to prevent repeated # mailings: LOCKFILE=$BASEDIR/${TARGET}.offline # Email Subject lines: SUBJECT_DOWN="Server is down! Cannot ping ${TARGET}." SUBJECT_UP="Server is up again! ${TARGET} is back online." # Paths to message files containing the bodies of the email notifications: MESSAGE_DOWN=$BASEDIR/message_down MESSAGE_UP=$BASEDIR/message_up # Create path to log file for target host: LOGFILE=$BASEDIR/${TARGET}.log # Full date for log entries: Wed Aug 29 19:03:38 EDT 2001 LOGDATE=$(date) # Check usage: # Usage: USAGE="usage = `basename $0` " # Check for expected number of arguments: if [ $# != 1 ] ; then printf "\n$USAGE\nwassup needs an IP address or host:\n" printf "wassup 192.1.1.1 or wassup www.foo.com\n\n" exit 1 fi # Begin: # Ping the target: ping -c 1 $TARGET if [ $? != 0 ] then # No response from target, so check the safety host to make sure we have a # network connection: ping -c 1 $SAFETY if [ $? = 0 ] then # The safety host responded, so ping the target again: ping -c 2 $TARGET if [ $? != 0 ] then # Still no response from target, so assume it's down. # Only send an alert if there is no lockfile: if [ ! -e $LOCKFILE ] then # There is no lockfile for this host, so sound the alert: echo "$LOGDATE host=$TARGET status=unreachable" >> $LOGFILE mail -s "$SUBJECT_DOWN" $MAILTO < $MESSAGE_DOWN if [ $? = 0 ] then # Log success of email notification: echo "$LOGDATE host=$TARGET message=alert status=sent" >> $LOGFILE else # Log Failure of email notification: echo "$LOGDATE host=$TARGET message=alert status=error" >> $LOGFILE fi # Create lockfile to prevent repeat notifications for this host: touch $LOCKFILE fi fi else # Safety did not respond, so there is a network problem on our end. # Log & abort: echo "$LOGDATE safety=$SAFETY status=unreachable" >> $LOGFILE exit 2 fi else # The target host responded. Check for a lockfile: if [ -e $LOCKFILE ] then # A lockfile exists for this host, indicating it was previously down. # It is now back online, so log & send notification: echo "$LOGDATE host=$TARGET status=online" >> $LOGFILE mail -s "$SUBJECT_UP" $MAILTO < $MESSAGE_UP if [ $? = 0 ] then # Log success of email notification: echo "$LOGDATE host=$TARGET message=online status=sent" >> $LOGFILE else # Log Failure of email notification: echo "$LOGDATE host=$TARGET message=online status=error" >> $LOGFILE fi # Remove the lockfile to reset host status: rm $LOCKFILE fi fi # Exit nicely: exit 0