#!/bin/sh
#
# plugin for munin to monitor usage of NSD.
#
# (C) 2008 W.C.A. Wijngaards. BSD Licensed.
#
# To install; compile with --enable-bind8-stats (enabled by default)
# and enable nsd-control in nsd.conf with the line
# remote-control: control-enable: yes
# Run the command nsd-control-setup as root to generate the key files.
#
# Environment variables for this script
# statefile - where to put temporary statefile.
# nsd_conf - where the nsd.conf file is located.
# nsd_control - where to find nsd-control executable.
# nsd_checkconf - where to find nsd-checkconf executable.
#
# You can set them in your munin/plugin-conf.d/plugins.conf file
# with:
# [nsd_munin*]
# user root
# env.statefile /usr/local/var/munin/plugin-state/nsd-state
# env.nsd_conf /usr/local/etc/nsd.conf
# env.nsd_control /usr/local/sbin/nsd-control
# env.nsd_checkconf /usr/local/sbin/nsd-checkconf
#
# This plugin can create different graphs depending on what name
# you link it as (with ln -s) into the plugins directory
# You can link it multiple times.
# If you are only a casual user, the _hits and _by_type are most interesting,
# possibly followed by _by_rcode.
#
# nsd_munin_hits - base volume, transport type, failures
# nsd_munin_memory - memory usage
# nsd_munin_by_type - incoming queries by type
# nsd_munin_by_class - incoming queries by class
# nsd_munin_by_opcode - incoming queries by opcode
# nsd_munin_by_rcode - answers by rcode
# nsd_munin_zones - number of zones
#
# Magic markers - optional - used by installation scripts and
# munin-config:
#
#%# family=contrib
#%# capabilities=autoconf suggest
# POD documentation
: <<=cut
=head1 NAME
nsd_munin_ - Munin plugin to monitor the NSD server.
=head1 APPLICABLE SYSTEMS
System with NSD daemon.
=head1 CONFIGURATION
[nsd_munin*]
user root
env.statefile /usr/local/var/munin/plugin-state/nsd-state
env.nsd_conf /usr/local/etc/nsd.conf
env.nsd_control /usr/local/sbin/nsd-control
env.nsd_checkconf /usr/local/sbin/nsd-checkconf
Use the .env settings to override the defaults.
=head1 USAGE
Can be used to present different graphs. Use ln -s for that name in
the plugins directory to enable the graph.
nsd_munin_hits - base volume, transport type, failures
nsd_munin_memory - memory usage
nsd_munin_by_type - incoming queries by type
nsd_munin_by_class - incoming queries by class
nsd_munin_by_opcode - incoming queries by opcode
nsd_munin_by_rcode - answers by rcode
nsd_munin_zones - number of zones
=head1 AUTHOR
Copyright 2008 W.C.A. Wijngaards
=head1 LICENSE
BSD
=cut
state=${statefile:-/usr/local/var/munin/plugin-state/nsd-state}
conf=${nsd_conf:-/usr/local/etc/nsd.conf}
ctrl=${nsd_control:-/usr/local/sbin/nsd-control}
chkconf=${nsd_checkconf:-/usr/local/sbin/nsd-checkconf}
lock=$state.lock
# number of seconds between polling attempts.
# makes the statefile hang around for at least this many seconds,
# so that multiple links of this script can share the results.
lee=55
# to keep things within 19 characters
ABBREV="-e s/num/n/ -e s/type/t/ -e s/opcode/o/ -e s/rcode/r/ -e s/class/c/"
# get value from $1 into return variable $value
get_value ( ) {
value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`"
if test "$value"x = ""x; then
value="0"
fi
}
# download the state from NSD.
get_state ( ) {
# obtain lock for fetching the state
# because there is a race condition in fetching and writing to file
# see if the lock is stale, if so, take it
if test -f $lock ; then
pid="`cat $lock 2>&1`"
kill -0 "$pid" >/dev/null 2>&1
if test $? -ne 0 -a "$pid" != $$ ; then
echo $$ >$lock
fi
fi
i=0
while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do
while test -f $lock; do
# wait
i=`expr $i + 1`
if test $i -gt 1000; then
sleep 1;
fi
if test $i -gt 1500; then
echo "error locking $lock" "=" `cat $lock`
rm -f $lock
exit 1
fi
done
# try to get it
echo $$ >$lock
done
# do not refetch if the file exists and only LEE seconds old
if test -f $state; then
now=`date +%s`
get_value "timestamp"
if test $now -lt `expr $value + $lee`; then
rm -f $lock
return
fi
fi
$ctrl -c $conf stats > $state
if test $? -ne 0; then
echo "error retrieving data from the server"
rm -f $lock
exit 1
fi
echo "timestamp="`date +%s` >> $state
rm -f $lock
}
if test "$1" = "autoconf" ; then
if test ! -f $conf; then
echo no "($conf does not exist)"
exit 1
fi
if test ! -d `dirname $state`; then
mkdir -p `dirname $state`
if test ! -d `dirname $state`; then
echo no "($state directory does not exist)"
exit 1
fi
fi
echo yes
exit 0
fi
if test "$1" = "suggest" ; then
echo "hits"
echo "memory"
echo "by_type"
echo "by_class"
echo "by_opcode"
echo "by_rcode"
echo "zones"
exit 0
fi
# determine my type, by name
id=`echo $0 | sed -e 's/^.*nsd_munin_//'`
if test "$id"x = ""x; then
# some default to keep people sane.
id="hits"
fi
# if $1 exists in statefile, config is echoed with label $2
exist_config ( ) {
mn=`echo $1 | sed $ABBREV | tr . _`
if grep '^'$1'=' $state >/dev/null 2>&1; then
echo "$mn.label $2"
echo "$mn.min 0"
echo "$mn.type ABSOLUTE"
fi
}
# print label and min 0 for a name $1 in nsd format
p_config ( ) {
mn=`echo $1 | sed $ABBREV | tr . _`
echo $mn.label "$2"
echo $mn.min 0
echo $mn.type $3
}
if test "$1" = "config" ; then
if test ! -f $state; then
get_state
fi
case $id in
hits)
echo "graph_title NSD traffic"
echo "graph_args --base 1000 -l 0"
echo "graph_vlabel queries / \${graph_period}"
echo "graph_scale no"
echo "graph_category DNS"
for x in server0.queries server1.queries server2.queries \
server3.queries server4.queries server5.queries \
server6.queries server7.queries server8.queries \
server9.queries server10.queries server11.queries \
server12.queries server13.queries server14.queries \
server15.queries ; do
exist_config $x "queries handled by `basename $x .queries`"
done
p_config "num.queries" "total queries" "ABSOLUTE"
p_config "num.udp" "UDP ip4 queries" "ABSOLUTE"
p_config "num.udp6" "UDP ip6 queries" "ABSOLUTE"
p_config "num.tcp" "TCP ip4 queries" "ABSOLUTE"
p_config "num.tcp6" "TCP ip6 queries" "ABSOLUTE"
p_config "num.edns" "queries with EDNS OPT" "ABSOLUTE"
p_config "num.ednserr" "queries failed EDNS parse" "ABSOLUTE"
p_config "num.answer_wo_aa" "nonauthor. queries (referrals)" "ABSOLUTE"
p_config "num.rxerr" "receive failed" "ABSOLUTE"
p_config "num.txerr" "transmit failed" "ABSOLUTE"
p_config "num.truncated" "truncated replies with TC" "ABSOLUTE"
p_config "num.raxfr" "AXFR from allowed client" "ABSOLUTE"
p_config "num.rixfr" "IXFR from allowed client" "ABSOLUTE"
p_config "num.dropped" "dropped due to sanity check" "ABSOLUTE"
echo "graph_info DNS queries."
;;
memory)
echo "graph_title NSD memory usage"
echo "graph_args --base 1024 -l 0"
echo "graph_vlabel memory used in bytes"
echo "graph_category DNS"
p_config "size.vsz" "Total virtual memory (VSZ)" "GAUGE"
p_config "size.rss" "Total resident memory (RSS)" "GAUGE"
p_config "size.db.mem" "data in memory" "GAUGE"
p_config "size.xfrd.mem" "xfr and notify memory" "GAUGE"
p_config "size.config.mem" "config memory" "GAUGE"
p_config "size.db.disk" "mmap of nsd.db file" "GAUGE"
p_config "size.config.disk" "config zonelist on disk" "GAUGE"
echo "graph_info The memory used by NSD, xfrd and config. Disk size of nsd.db and zonelist."
;;
by_type)
echo "graph_title NSD queries by type"
echo "graph_args --base 1000 -l 0"
echo "graph_vlabel queries / \${graph_period}"
echo "graph_scale no"
echo "graph_category DNS"
for x in `grep "^num.type" $state`; do
nm=`echo $x | sed -e 's/=.*$//'`
tp=`echo $nm | sed -e s/num.type.//`
p_config "$nm" "$tp" "ABSOLUTE"
done
echo "graph_info queries by DNS RR type queried for"
;;
by_class)
echo "graph_title NSD queries by class"
echo "graph_args --base 1000 -l 0"
echo "graph_vlabel queries / \${graph_period}"
echo "graph_scale no"
echo "graph_category DNS"
for x in `grep "^num.class" $state`; do
nm=`echo $x | sed -e 's/=.*$//'`
tp=`echo $nm | sed -e s/num.class.//`
p_config "$nm" "$tp" "ABSOLUTE"
done
echo "graph_info queries by DNS RR class queried for."
;;
by_opcode)
echo "graph_title NSD queries by opcode"
echo "graph_args --base 1000 -l 0"
echo "graph_vlabel queries / \${graph_period}"
echo "graph_scale no"
echo "graph_category DNS"
for x in `grep "^num.opcode" $state`; do
nm=`echo $x | sed -e 's/=.*$//'`
tp=`echo $nm | sed -e s/num.opcode.//`
p_config "$nm" "$tp" "ABSOLUTE"
done
echo "graph_info queries by opcode in the query packet."
;;
by_rcode)
echo "graph_title NSD answers by return code"
echo "graph_args --base 1000 -l 0"
echo "graph_vlabel answer packets / \${graph_period}"
echo "graph_scale no"
echo "graph_category DNS"
for x in `grep "^num.rcode" $state`; do
nm=`echo $x | sed -e 's/=.*$//'`
tp=`echo $nm | sed -e s/num.rcode.//`
p_config "$nm" "$tp" "ABSOLUTE"
done
echo "graph_info answers split out by return value."
;;
zones)
echo "graph_title NSD number of zones"
echo "graph_args --base 1000 -l 0"
echo "graph_vlabel zone count"
echo "graph_category DNS"
p_config "zone.total" "total zones" "GAUGE"
p_config "zone.master" "master zones" "GAUGE"
p_config "zone.slave" "slave zones" "GAUGE"
echo "graph_info number of zones served by NSD."
;;
esac
exit 0
fi
# do the stats itself
get_state
# get the time elapsed
get_value "time.elapsed"
if test $value = 0 || test $value = "0.000000"; then
echo "error: time elapsed 0 or could not retrieve data"
exit 1
fi
elapsed="$value"
# print value for $1
print_value ( ) {
mn=`echo $1 | sed $ABBREV | tr . _`
get_value $1
echo "$mn.value" $value
}
# print value if line already found in $2
print_value_line ( ) {
mn=`echo $1 | sed $ABBREV | tr . _`
value="`echo $2 | sed -e 's/^.*=//'`"
echo "$mn.value" $value
}
case $id in
hits)
for x in server0.queries server1.queries server2.queries \
server3.queries server4.queries server5.queries \
server6.queries server7.queries server8.queries \
server9.queries server10.queries server11.queries \
server12.queries server13.queries server14.queries \
server15.queries \
num.queries num.udp num.udp6 num.tcp num.tcp6 \
num.edns num.ednserr num.answer_wo_aa num.rxerr num.txerr \
num.truncated num.raxfr num.rixfr num.dropped ; do
if grep "^"$x"=" $state >/dev/null 2>&1; then
print_value $x
fi
done
;;
memory)
# get the total memory for NSD
serverpid=`$ctrl -c $conf serverpid 2>&1`
# small race condition, if reload happens between previous and next
# lines, if so, detect by checking if we have a number as output.
rssval=`ps -p $serverpid -o rss= 2>&1`
vszval=`ps -p $serverpid -o vsz= 2>&1`
if test "`expr $rssval + 1 - 1 2>&1`" -eq "$rssval" >/dev/null 2>&1; then
rssval=`expr $rssval \* 1024`
else
rssval=0
fi
if test "`expr $vszval + 1 - 1 2>&1`" -eq "$vszval" >/dev/null 2>&1; then
vszval=`expr $vszval \* 1024`
else
vszval=0
fi
echo "size_vsz.value" $vszval
echo "size_rss.value" $rssval
for x in size.db.mem size.xfrd.mem size.config.mem \
size.db.disk size.config.disk; do
print_value $x
done
;;
by_type)
for x in `grep "^num.type" $state`; do
nm=`echo $x | sed -e 's/=.*$//'`
print_value_line $nm $x
done
;;
by_class)
for x in `grep "^num.class" $state`; do
nm=`echo $x | sed -e 's/=.*$//'`
print_value_line $nm $x
done
;;
by_opcode)
for x in `grep "^num.opcode" $state`; do
nm=`echo $x | sed -e 's/=.*$//'`
print_value_line $nm $x
done
;;
by_rcode)
for x in `grep "^num.rcode" $state`; do
nm=`echo $x | sed -e 's/=.*$//'`
print_value_line $nm $x
done
;;
zones)
get_value "zone.master"
nummas="$value"
get_value "zone.slave"
numsla="$value"
echo "zone_total.value" `expr $nummas + $numsla`
echo "zone_master.value" "$nummas"
echo "zone_slave.value" "$numsla"
esac