#319: NRPE Log Monitoring Implementation - Jenrite/OE2-project-group-B GitHub Wiki

Critical log patterns are any alert with one of these keywords, ERROR|CRITICAL Normal/abnormal log patterns, We occassionally get warning alerts due to logged in users/number of processes since these do not indicate any malicous actitivity/infastructure problems if it is a single alert every now and then so we can think of this as a normal log pattern. In terms of abnormal log patterns it would be several >3 in an hour with a warning >7 being extremely bad


# Parameters
LOG_FILE="/var/log/syslog"
KEYWORDS="ERROR|CRITICAL|WARNING|FAILED"
THRESHOLD_WARNING=3
THRESHOLD_CRITICAL=7
TIME_FRAME=5 # minutes

# Ensure the script is run as 'nagios' user
if [ "$(whoami)" != "nagios" ]; then
  echo "UNKNOWN - Script must be run as nagios user"
  exit 3
fi

# Extract relevant lines from the last X minutes
COUNT=$(grep -E "${KEYWORDS}" "${LOG_FILE}" | awk -v timeframe=$TIME_FRAME '
  {
    cmd = "date +%b\\ %e\\ %H:%M"
    cmd | getline current
    close(cmd)

    split($0, a, " ")
    logtime = a[1] " " a[2] " " a[3]
    
    cmd = "date -d \"" logtime "\" +%s"
    cmd | getline log_epoch
    close(cmd)
    
    cmd = "date -d \"" current "\" +%s"
    cmd | getline now_epoch
    close(cmd)

    if (now_epoch - log_epoch <= timeframe * 60) {
      count++
    }
  }
  END { print count }
')

# Decision logic
if (( COUNT >= THRESHOLD_CRITICAL )); then
  echo "CRITICAL - $COUNT log entries matched in last ${TIME_FRAME}m"
  exit 2
elif (( COUNT >= THRESHOLD_WARNING )); then
  echo "WARNING - $COUNT log entries matched in last ${TIME_FRAME}m"
  exit 1
else
  echo "OK - $COUNT log entries matched in last ${TIME_FRAME}m"
  exit 0
fi

image

Once the script was written I needed to give it the correct permissions to run properly

image

Once the script was sorted I made it a service

image

(the service running)

ISSUES: I ran into some trouble when starting the service as I only had the script on mgmt instead of db, so when the service tried to run it I got this error

image

It was fixed by simply copying the script over to db