Nagios监控远程Linux主机配置

Nagios是一款开源的免费网络监视工具,能有效监控Windows、Linux和Unix的主机状态,交换机路由器等网络设置,打印机等。在系统或服务状态异常时发出邮件或短信报警第一时间通知网站运维人员,在状态恢复后发出正常的邮件或短信通知。下面是如何配置nagios监控linux主机。

Linux被监控主机配置

安装nagios插件
# yum -y install nagios-plugins nagios-plugins-all
安装nrpe插件
# yum -y install nrpe
修改nrpe配置
# vim /etc/nagios/nrpe.cfg
allowed_hosts=127.0.0.1,1.2.3.4
启动nrpe服务
# service nrpe start
设置nrpe开机启动
# chkconfig nrpe on
检查nrpe是否正常启动
# netstat -an | grep 5666

如果出现如下:

tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN

说明nrpe启动成功

防火墙中打开5666端口
# vim /etc/sysconfig/iptables
-A INPUT -m state --state NEW -m tcp -p tcp -s 1.2.3.4 --dport 5666 -j ACCEP
重启防火墙
# service iptables restart

Nagios 监控主机配置

安装nrpe插件
# yum -y install nagios-plugins-nrpe

安装好的check_nrpe插件位置为:/usr/lib64/nagios/plugins/check_nrpe

定义check_nrpe监控命令
# vim /etc/nagios/objects/commands.cfg

在最后面添加如下内容:

# 'check_nrpe' command definition
define command{
        command_name check_nrpe
        command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
添加远程主机配置文件目录
# cd /etc/nagios

新建远程主机监控配置文件目录

# mkdir servers

修改nagios.cfg配置文件,将servers目录添加到配置文件中

# vim nagios.cfg

添加如下一行:

cfg_dir=/etc/nagios/servers
配置远程主机监控对象
# cd servers

假如被监控主机ip为1.2.3.4

新建1.2.3.4.cfg,在里面添加如下监控内容

###############################################################################
# LOCALHOST.CFG - SAMPLE OBJECT CONFIG FILE FOR MONITORING THIS MACHINE
#
# Last Modified: 05-31-2007
#
# NOTE: This config file is intended to serve as an *extremely* simple
#       example of how you can create configuration entries to monitor
#       the local (Linux) machine.
#
###############################################################################
###############################################################################
###############################################################################
#
# HOST DEFINITION
#
###############################################################################
###############################################################################

# Define a host for the local machine

define host{
use                     linux-server,host-pnp   ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name               lvs01
alias                   lvs01
address                 1.2.3.4
}

###############################################################################
###############################################################################
#
# HOST GROUP DEFINITION
#
###############################################################################
###############################################################################

# Define an optional hostgroup for Linux machines

#define hostgroup{
#        hostgroup_name  linux-servers ; The name of the hostgroup
#        alias           Linux Servers ; Long name of the group
#        members         lvs01     ; Comma separated list of hosts that belong to this group
#        }

###############################################################################
###############################################################################
#
# SERVICE DEFINITIONS
#
###############################################################################
###############################################################################
# Define a service to "ping" the local machine

define service{
use                             local-service,srv-pnp; Name of service template to use
host_name                       lvs01
service_description             PING
check_command   check_ping!100.0,20%!500.0,60%
}
# Define a service to check the disk space of the root partition
# on the local machine.  Warning if < 20% free, critical if
# < 10% free space on partition.

define service{
use                             local-service,srv-pnp; Name of service template to use
host_name                       lvs01
service_description             Root Partition
check_command                   check_nrpe!check_disk
}

# Define a service to check the number of currently logged in
# users on the local machine.  Warning if > 20 users, critical
# if > 50 users.

define service{
use                             local-service,srv-pnp; Name of service template to use
host_name                       lvs01
service_description             Current Users
check_command                   check_nrpe!check_users
}
# Define a service to check the number of currently running procs
# on the local machine.  Warning if > 250 processes, critical if
# > 400 users.

define service{
use                             local-service         ; Name of service template to use
host_name                       lvs01
service_description             Total Processes
check_command   check_nrpe!check_total_procs
}
define service{
use                             local-service         ; Name of service template to use
host_name                       lvs01
service_description             Zombie Processes
check_command                   check_nrpe!check_zombie_procs
}

# Define a service to check the load on the local machine.

define service{
use                             local-service,srv-pnp; Name of service template to use
host_name                       lvs01
service_description             Current Load
check_command   check_nrpe!check_load
}

# Define a service to check the swap usage the local machine.
# Critical if less than 10% of swap is free, warning if less than 20% is free

define service{
use                             local-service,srv-pnp; Name of service template to use
host_name                       lvs01
service_description             Swap Usage
check_command   check_nrpe!check_swap
}

# Define a service to check SSH on the local machine.
# Disable notifications for this service by default, as not all users may have SSH enabled.

#define service{
#        use                             local-service         ; Name of service template to use
#        host_name                       lvs01
#        service_description             SSH
#  check_command    check_ssh
#  notifications_enabled   0
#        }

# Define a service to check HTTP on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.

#define service{
#        use                             local-service         ; Name of service template to use
#        host_name                       lvs01
#        service_description             HTTP
# check_command   check_tcp!80
#        }

define service{
use                             local-service,srv-pnp; Name of service template to use
host_name                       lvs01
service_description             Disk IO
check_command                   check_nrpe!check_diskstat
}


验证nagios配置文件
# nagios -v /etc/nagios/nagios.cfg

或者

# service nagios checkconfig

测试check_nrpe插件与被监控机nrpedaemon之间能否正常通信

# /usr/lib64/nagios/plugins/check_nrpe -H 1.2.3.4

如果返回NRPE v2.13,说明正常

或者也可以用如下命令测试,看下是否有数据返回

# /usr/lib64/nagios/plugins/check_nrpe -H 1.2.3.4 -c check_load
重启nagios
# service nagios restart

这样子就完成了对远程主机的监控,登录web界面即可看到相应信息。