Data Guard Monitoring Scripts ---------------------------------------------------------------------------------------------------------------------------------- Installed on primary: #!/bin/bash #ck_stby.sh: Check redo log gap and log apply lag of standby databases, run on primary. This #script must have permission 700 because sys passwords are stored in here. New entry must #be added to tnsnames.ora in the current directory and where checklag function is called. #It's obvious down below. #Make sure tabs are tabs; there're two (find text "trim tab" below) export ORACLE_SID=orcl #can be any DB, not part of the data guard export ORACLE_HOME=/u01/app/oracle/product/11.2.0/db export PATH=$ORACLE_HOME/bin:/usr/bin:/bin cd /u01/app/oracle/scripts/ck_stby #where this script is #Use tnsnames.ora in this directory export TNS_ADMIN=. LAG_THRESHOLD=1 #if oldest log apply was this many days ago, alert #Check archive gap. Alert if v$archive_gap is not empty. #No longer used. See comment below function checkgap { conn=$1 #Standby conn string P=$2 #password for sys RECIPIENT=$3 RESULT=-1 #reset result RESULT=$(sqlplus -s -L sys/$P@$conn as sysdba <> ck_stby.err mail -s "Standby log gap check failed for $conn: $RESULT" $RECIPIENT < /dev/null fi } #Yong commented out this "gap check" because v$archive_gap is unreliable. Check archive lag with #function checklag only. If one day v$archive_gap is bug-free, can uncomment the checkgap lines. #checkgap db1sb db1sbsyspw "dba_joe@example.com,dba_john@example.com" #checkgap db2sb db2sbsyspw "dba_jane@example.com" #Check standby lag. Alert if oldest log applied was $LAG_THRESHOLD days ago. function checklag { conn=$1 #Standby conn string P=$2 #password for sys RECIPIENT=$3 RESULT1=100 #reset apply lag result RESULT1=$(sqlplus -s -L sys/$P@$conn as sysdba <> ck_stby.err echo "Last (oldest last if RAC) log apply occurred $RESULT1 days ago. Find out why logs are not applied!" | mail -s "Standby lag alert for $conn" $RECIPIENT fi #20130310 Add logic to check for presence of MRP0 process RESULT2=1 #reset MRP0 process result RESULT2=$(sqlplus -s -L sys/$P@$conn as sysdba <> ck_stby.err echo -n "$(date): MRP0 process is down for $conn; no logs will be applied!" | mail -s "Standby apply process down for $conn" $RECIPIENT else echo "MRP0 process is up for $conn. Good! Instance,PID,status,thread#,sequence#: $RESULT2." fi } #Real work. Must add entry to tnsnames.ora in this directory before adding a line here. Multiple emails must be in quotes. checkgap db1sb db1sbsyspw "dba_joe@example.com,dba_john@example.com" checkgap db2sb db2sbsyspw dba_jane@example.com Crontab example (never throw away stdout or stderr in crontab): $ crontab -l 1 7,10,13,16,22 * * * /u01/app/oracle/scripts/ck_stby/ck_stby.sh > /tmp/ck_stby.out 2>&1 ---------------------------------------------------------------------------------------------------------------------------------- Installed on standby #!/bin/bash #ck_stby_reco.sh: Check standby recovery based on the time before the last '^Media Recovery Log', run on standby export ORACLE_SID=db1sb export ORACLE_HOME=/u01/app/oracle/product/11.2.0/db export PATH=$ORACLE_HOME/bin:/usr/bin:/bin ALRT_FILE=/u01/app/oracle/diag/rdbms/db1sb/db1sb1/trace/alert_db1sb1.log LAG=86400 #If last media recovery happened $LAG seconds or earlier ago, alert us. This is NOT related to the delay of log application! RECIPIENT=dba_joe@example.com,dba_john@example.com cd /u01/app/oracle/scripts/ck_stby_reco #last 100 lines of alert.log, adjust as needed tail -100 $ALRT_FILE > lst100_alrt #last line with the pattern PTLINE=$(grep -n '^Media Recovery Log' lst100_alrt | tail -1 | awk -F: '{print $1}') PTLINE=${PTLINE:=0} #reset to 0 in case the pattern is not found at all #last line of timestamp before the pattern line ($TM will be like "Tue May 18 10:03:21 2010") TM=$(head -$PTLINE lst100_alrt | egrep -n '^Mon|^Tue|^Wed|^Thu|^Fri|^Sat|^Sun' lst100_alrt | tail -1 | cut -d: -f2-) TM=${TM:=0} #reset to 0 if the above command set $TM to null #see if the current time is $LAG newer than the above grep'ed time if (( $(date "+%s") > $(echo $(date --date="$TM" "+%s")+$LAG|bc) )); then echo "Last media recovery older than $LAG seconds ago!" mail -s "Standby of $ORACLE_SID: Last media recovery older than $LAG seconds ago!" $RECIPIENT < lst100_alrt else echo "Last media recovery not older than 1 day ago. Good!" fi Crontab example (never throw away stdout or stderr in crontab): $ crontab -l 0 9,15,21 * * * /u01/app/oracle/scripts/ck_stby_reco/ck_stby_reco.sh > /tmp/ck_stby_reco.out 2>&1