1 #!/bin/bash
    2 
    3 #
    4 # The goal of this script is to run nmon daily, and compress the
    5 # previous log files
    6 #
    7 
    8 # init some stuff
    9 date=$(date +%Y%m%d)
   10 ## init ## where to output the data
   11 logdir='/var/log/nmon'
   12 if [[ ! -d ${logdir} ]]
   13  then
   14    mkdir -p ${logdir}
   15  fi
   16 ## init ## who to notify if nmon or logger cannot be found
   17 email='traxler@lsu.edu'
   18 ## init ## how long to wait before killing current instances 10 minutes is 20 30 second intervals
   19 ((waitTime=20))
   20 ## init ## number of days of nmon data to keep
   21 ((days=30))
   22 ## init ##figure out fqdn if we can
   23 hostname=$(uname -n)
   24   ## make sure hostname is fqdn (why does this have to be this hard)
   25   hnlen=${#hostname}
   26   tmp=${hostname:nhlen-3:3}
   27   if [[ 'Xedu' != "X${tmp}" && 'Xorg' != "X${tmp}" ]]
   28    then
   29      hostname="${hostname}.hpc.lsu.edu"
   30    fi
   31 ## init ## try to find nmon command
   32 CMD=$(which nmon)
   33 if [[ 'X' == "X${CMD}" ]]
   34  then
   35    if [[ -x /usr/local/bin/nmon ]]
   36     then
   37       CMD='/usr/local/bin/nmon'
   38     else
   39       echo "nmon not in path on host: ${hostname} " | mail -s "missing nmon" ${email}
   40       exit
   41     fi
   42  fi
   43 ## init ## try to find logger
   44 LOGGER=$(which logger)
   45 if [[ 'X' == "X${LOGGER}" ]]
   46  then
   47    echo "logger not in path on host: ${hostname} " | mail -s "missing logger" ${email}
   48    LOGGER='echo '
   49  fi
   50 ## init ## make sure logging directory exists
   51 if [[ ! ${logdir} ]]
   52  then
   53    mkdir -p ${logdir}
   54  fi
   55 ## init ## set name of output file
   56 outfile="${logdir}/$hostname.$date.nmon"
   57 
   58 ## init ## set interval to 15 minutes
   59 ((minuteInterval=15))
   60 ((interval=minuteInterval*60))
   61 
   62 ## init ## it is possible that nmon is already/still running
   63 others=$(ps -ef | grep '[n]mon -F /var/log/nmon' | wc -l)
   64    ## wait 10 minutes, if still running , kill it and proceed
   65    ((cnt = 0))
   66    while ((0 < others))
   67     do
   68       sleep 30
   69       ((count = count + 1))
   70       others=$(ps -ef | grep '[n]mon -F /var/log/nmon' | wc -l)
   71       if ((waitTime < count))
   72        then
   73            if ((0 < others))
   74          then
   75            ## kill all instances that are running
   76            ps -ef | grep '[n]mon -F /var/log/nmon' | awk '{print $2}' | while read pid
   77             do
   78               kill -9 ${pid}
   79             done
   80            ${LOGGER} nmon "existing nmon process ${pid} killed"
   81          fi
   82        fi
   83     done
   84 ## init ## make sure this is Linux
   85 os=$(uname -a | awk '{print $1}')
   86 
   87 # Main section now -- clean up and start nmon
   88 if [[ "${os}" = "Linux" ]]
   89  then
   90     ## figure out how many iterations left in today
   91     hour=$(date +"%-H")
   92     min=$(date +"%-M")
   93     ((count=((((24 - hour) * 60) - min) / minuteInterval) - 1))
   94 
   95     ## gzip all files int he directory before the new file is created.
   96     ### ls -1 ${logdir}/*.nmon | grep -v "${outfile}" | while read line
   97     ls -1 ${logdir}/*.nmon | while read line
   98      do
   99        rsync -4 ${line} rsync://nmon.hpc.lsu.edu/nmon-${hostname}/
  100        gzip -N -S .gz -9 -f -r ${line}    > /dev/null 2>/dev/null
  101      done
  102 
  103     # actually start nmon
  104     ${CMD} -F ${outfile} -s${interval} -c${count}
  105 
  106     # purge old files
  107     ls -1 ${logdir}/*.nmon ${logdir}/*.nmon.gz | while read line
  108      do
  109        fileDate=$(stat --printf="%y" ${line} | cut -b1-10)
  110        cutoff=$(date -d "${days} days ago" "+%Y-%m-%d")
  111        if [[ "${cutoff}" > "${fileDate}" ]]
  112         then
  113           rm -f ${line}
  114         fi
  115      done
  116     ## log to syslog that nmon was started
  117     ${LOGGER} nmon "nmon data collection started at: ${date} interval: ${interval} for count: ${count} times"
  118  fi
  119