Commit 9d461ecf authored by Paschalis Korosoglou's avatar Paschalis Korosoglou

Merge pull request #66 from dpavlos/nagios_config

ARGO-389 Ansible configs for central nagios
parents b1fe6e53 3f8f883d
......@@ -5,5 +5,6 @@
roles:
- { role: firewall, tags: firewall }
- { role: repos, tags: repos }
- { role: ca_bundle, when: ca_bundle_install, tags: ca_bundle }
- { role: has_certificate, tags: certificate }
- { role: monitoring_engine, tags: monitoring_engine }
......@@ -2,27 +2,43 @@
nagios_components:
- { name: argo-ncg , repo: argo-prod }
- { name: argo-msg-nagios , repo: argo-prod }
## NCG vars ##
nagios_server: localhost
probes_type: local
nagios_admin_email: contact@nagiosadmin.localhost
vo: ops
enable_unicore_probes: "0"
metric_config_file: /etc/ncg-metric-config.d/local.conf
gocdb_root_url: https://goc.egi.eu/gocdbpi
cert_status: Production
nagios_role: PROJECT
probes_type: local
gocdb_root_url: https://localhost/gocdbpi
cert_status: Certified
contact_type: roc
nagios_role: project
include_empty_hosts: "0"
enable_notifications: "0"
local_metric_store: "0"
check_hosts: "0"
tenant_name: TENANT_A
poem_root_url: http://localhost/poem
include_proxy_checks: "0"
include_msg_checks_recv: "0"
ocsp_command: handle_service_check
backup_instance: "false"
## CGI vars ##
authorized_for_all_service_commands:
- /C=--/ST=SomeState/L=SomeCity/O=SomeOrganization/OU=SomeOrganizationalUnit/CN=SomeCN_1
- /C=--/ST=SomeState/L=SomeCity/O=SomeOrganization/OU=SomeOrganizationalUnit/CN=SomeCN_2
authorized_for_all_host_commands:
- /C=--/ST=SomeState/L=SomeCity/O=SomeOrganization/OU=SomeOrganizationalUnit/CN=SomeCN_1
- /C=--/ST=SomeState/L=SomeCity/O=SomeOrganization/OU=SomeOrganizationalUnit/CN=SomeCN_2
## Nagios.cfg vars ##
host_perfdata_file_processing_command: ncg-process-host-perfdata-file
service_perfdata_file_processing_command: ncg-process-service-perfdata-file
obsess_over_services: "1"
ocsp_command: handle_service_check
use_large_installation_tweaks: "1"
enable_environment_macros: "1"
backup_instance: "false"
allow_empty_hostgroup_assignment: "1"
cfg_dir: /etc/nagios/argo-ncg.d
use_syslog: "0"
log_initial_states: "1"
......@@ -30,9 +46,8 @@ log_external_commands: "0"
log_passive_checks: "0"
service_check_timeout: 910
process_performance_data: "1"
host_perfdata_file_template: "DATATYPE::HOSTPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tHOSTPERFDATA::$HOSTPERFDATA$\tHOSTCHECKCOMMAND::$HOSTCHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$"
service_perfdata_file_template: "DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$"
host_perfdata_file_processing_command: ncg-process-host-perfdata-file
service_perfdata_file_processing_command: ncg-process-service-perfdata-file
use_large_installation_tweaks: "1"
allow_empty_hostgroup_assignment: "1"
\ No newline at end of file
## Other vars
ca_bundle_install: false
---
- name: reload nagios
service: name=nagios state=reloaded
- name: reload httpd
service: name=httpd state=reloaded
......@@ -3,9 +3,19 @@
- name: Install Nagios
yum: name=nagios state=latest enablerepo=nagios
- name: Install Apache
yum: name=httpd state=latest
- name: Install pnp4nagios
yum: name=pnp4nagios state=latest
- name: Install Apache and mod_ssl
yum: name={{ item }} state=latest
with_items:
- httpd
- mod_ssl
- name: Install fetch-crl
yum: name=fetch-crl state=latest
when: ca_bundle_install
- name: Install NCG and MSG conponents
yum: name={{ item.name }} state=latest enablerepo={{ item.repo }}
with_items: nagios_components
......@@ -15,7 +25,7 @@
with_items: nagios_plugins
- name: Install Nagios UMD plugins
yum: name={{ item.name }} state=latest
yum: name={{ item }} state=latest
with_items: nagios_plugins_umd
when: nagios_plugins_umd is defined
......@@ -23,16 +33,91 @@
template: src=nagios.cfg.j2
dest=/etc/nagios/nagios.cfg
owner=nagios group=nagios mode=0644
notify: reload nagios
- name: Copy cgi.cfg file
template: src=cgi.cfg.j2
dest=/etc/nagios/cgi.cfg
owner=nagios group=nagios mode=0644
notify: reload nagios
- name: Copy ncg.conf file
#FIXME: Make this tenant unaware
- name: Copy ncg.conf file for EGI
template: src=ncg.conf.j2
dest=/etc/argo-ncg/ncg.conf
owner=root group=root
when: tenant_name == "EGI"
notify: reload nagios
- name: Copy ncg.conf file for EUDAT
template: src=ncg.conf.eudat.j2
dest=/etc/ncg/ncg.conf
owner=root group=root
when: tenant_name == "EUDAT"
notify: reload nagios
- name: Copy argo-voms-htpasswd.conf file
template: src=argo-voms-htpasswd.conf.j2
dest=/etc/argo-voms-htpasswd/argo-voms-htpasswd.conf
owner=root group=root
when: voms_htpasswd is defined
#FIXME: Temp workaround for tenants without BDII infrastructure.
- name: Copy broker-list file
template: src=broker-list.j2
dest=/var/cache/msg/broker-cache-file/broker-list
owner=root group=root mode=0644
when: broker_host is defined
##
- name: Copy argo-msg-cache file
template: src=argo-msg-cache.conf.j2
dest=/etc/argo-msg-cache.conf
owner=root group=root
when: lcg_gfal_infosys is defined and broker_network is defined
- name: Create vomses dir
- file: path=/etc/vomses
state=directory
owner=root group=root mode=0755
when: vomses is defined
- name: Copy vomses files
template: src=vomses.j2
dest=/etc/vomses/{{ item.name }}
owner=root group=root mode=0644
when: vomses is defined
with_items: vomses
- name: Create voms lsc dir
- file: path=/etc/grid-security/vomsdir/{{ item.vo }}
state=directory
owner=root group=root mode=0755
when: vomses is defined
with_items: vomses
- name: Create voms lsc files
- template: src=voms_lsc.j2
dest=/etc/grid-security/vomsdir/{{ item.vo }}/{{ item.server }}.lsc
owner=root group=root mode=0644
when: vomses is defined
with_items: vomses
- name: Create sha checksum for dashboard config
shell: echo -n {{ nagios_server }} | sha1sum | cut -f1 -d' '
register: sha
- name: Create dashboard config for msg-to-handler
template: src=dashboard.conf.j2
dest=/etc/msg-to-handler.d/DASHBOARD.conf
owner=root group=root mode=0644
when: send_to_dashboard is defined and send_to_dashboard == "1"
- name: Create apel config for msg-to-handler
template: src=apel.conf.j2
dest=/etc/msg-to-handler.d/APEL.conf
owner=root group=root mode=0644
when: send_to_apel is defined and send_to_apel == "1"
- name: Enable nagios and httpd on boot and start service
service: name={{ item }} state=started enabled=yes
......@@ -40,3 +125,19 @@
- nagios
- httpd
- name: Start fetch-crl cron
service: name=fetch-crl-cron state=started enabled=yes
when: ca_bundle_install
- name: Start fetch-crl boot
service: name=fetch-crl-boot state=started enabled=no
when: ca_bundle_install
ignore_errors: yes
- name: Enable argo-voms-htpasswd on boot and start service
service: name=argo-voms-htpasswd state=started enabled=yes
when: voms_htpasswd is defined
- name: Enable argo-msg-cache on boot and start service
service: name=argo-msg-cache state=started enabled=yes
when: lcg_gfal_infosys is defined
\ No newline at end of file
<handler APEL>
class = GridMon::MsgHandler::MetricOutput
<instance>
SOURCE = local
CACHE_DIR = /var/spool/argo-msg-nagios/incoming
</instance>
<subscribe>
destination = "/queue/Consumer.{{ nagios_role }}_{{ nagios_server|replace(".","_") }}.grid.accounting.test.apel.*"
</subscribe>
</handler>
# Configuration file for argo-msg-cache.
# Maintaining the Broker Cache File requires the end point of the information
# system.
# We support a ',' delimited list as lcg-utils does.
# LCG_GFAL_INFOSYS=bdii.example.org:2170
LCG_GFAL_INFOSYS={% for bdii in lcg_gfal_infosys %}{{ bdii }}{% if not loop.last %},{% endif %}{% endfor %}
# BROKER_NETWORK=PROD
BROKER_NETWORK={{ broker_network }}
\ No newline at end of file
{% for url in voms_htpasswd %}
{{ url }}
{% endfor %}
\ No newline at end of file
{% if broker_host is defined %}
stomp://{{ broker_host }}:6163
{% endif %}
\ No newline at end of file
stomp://{{ broker_host }}:6163
\ No newline at end of file
#################################################################
#
# CGI.CFG - Sample CGI Configuration File for Nagios 4.0.8
#
#
#################################################################
# MAIN CONFIGURATION FILE
# This tells the CGIs where to find your main configuration file.
# The CGIs will read the main and host config files for any other
# data they might need.
main_config_file=/etc/nagios/nagios.cfg
# PHYSICAL HTML PATH
# This is the path where the HTML files for Nagios reside. This
# value is used to locate the logo images needed by the statusmap
# and statuswrl CGIs.
physical_html_path=/usr/share/nagios
# URL HTML PATH
# This is the path portion of the URL that corresponds to the
# physical location of the Nagios HTML files (as defined above).
# This value is used by the CGIs to locate the online documentation
# and graphics. If you access the Nagios pages with an URL like
# http://www.myhost.com/nagios, this value should be '/nagios'
# (without the quotes).
url_html_path=/nagios
# CONTEXT-SENSITIVE HELP
# This option determines whether or not a context-sensitive
# help icon will be displayed for most of the CGIs.
# Values: 0 = disables context-sensitive help
# 1 = enables context-sensitive help
show_context_help=0
# PENDING STATES OPTION
# This option determines what states should be displayed in the web
# interface for hosts/services that have not yet been checked.
# Values: 0 = leave hosts/services that have not been check yet in their original state
# 1 = mark hosts/services that have not been checked yet as PENDING
use_pending_states=1
# AUTHENTICATION USAGE
# This option controls whether or not the CGIs will use any
# authentication when displaying host and service information, as
# well as committing commands to Nagios for processing.
#
# Read the HTML documentation to learn how the authorization works!
#
# NOTE: It is a really *bad* idea to disable authorization, unless
# you plan on removing the command CGI (cmd.cgi)! Failure to do
# so will leave you wide open to kiddies messing with Nagios and
# possibly hitting you with a denial of service attack by filling up
# your drive by continuously writing to your command file!
#
# Setting this value to 0 will cause the CGIs to *not* use
# authentication (bad idea), while any other value will make them
# use the authentication functions (the default).
use_authentication=1
# x509 CERT AUTHENTICATION
# When enabled, this option allows you to use x509 cert (SSL)
# authentication in the CGIs. This is an advanced option and should
# not be enabled unless you know what you're doing.
use_ssl_authentication=0
# DEFAULT USER
# Setting this variable will define a default user name that can
# access pages without authentication. This allows people within a
# secure domain (i.e., behind a firewall) to see the current status
# without authenticating. You may want to use this to avoid basic
# authentication if you are not using a secure server since basic
# authentication transmits passwords in the clear.
#
# Important: Do not define a default username unless you are
# running a secure web server and are sure that everyone who has
# access to the CGIs has been authenticated in some manner! If you
# define this variable, anyone who has not authenticated to the web
# server will inherit all rights you assign to this user!
#default_user_name=guest
# SYSTEM/PROCESS INFORMATION ACCESS
# This option is a comma-delimited list of all usernames that
# have access to viewing the Nagios process information as
# provided by the Extended Information CGI (extinfo.cgi). By
# default, *no one* has access to this unless you choose to
# not use authorization. You may use an asterisk (*) to
# authorize any user who has authenticated to the web server.
authorized_for_system_information=nagiosadmin
# CONFIGURATION INFORMATION ACCESS
# This option is a comma-delimited list of all usernames that
# can view ALL configuration information (hosts, commands, etc).
# By default, users can only view configuration information
# for the hosts and services they are contacts for. You may use
# an asterisk (*) to authorize any user who has authenticated
# to the web server.
authorized_for_configuration_information=nagiosadmin
# SYSTEM/PROCESS COMMAND ACCESS
# This option is a comma-delimited list of all usernames that
# can issue shutdown and restart commands to Nagios via the
# command CGI (cmd.cgi). Users in this list can also change
# the program mode to active or standby. By default, *no one*
# has access to this unless you choose to not use authorization.
# You may use an asterisk (*) to authorize any user who has
# authenticated to the web server.
authorized_for_system_commands=nagiosadmin
# GLOBAL HOST/SERVICE VIEW ACCESS
# These two options are comma-delimited lists of all usernames that
# can view information for all hosts and services that are being
# monitored. By default, users can only view information
# for hosts or services that they are contacts for (unless you
# you choose to not use authorization). You may use an asterisk (*)
# to authorize any user who has authenticated to the web server.
authorized_for_all_services=*
authorized_for_all_hosts=*
# GLOBAL HOST/SERVICE COMMAND ACCESS
# These two options are comma-delimited lists of all usernames that
# can issue host or service related commands via the command
# CGI (cmd.cgi) for all hosts and services that are being monitored.
# By default, users can only issue commands for hosts or services
# that they are contacts for (unless you you choose to not use
# authorization). You may use an asterisk (*) to authorize any
# user who has authenticated to the web server.
authorized_for_all_service_commands={% for dn in authorized_for_all_service_commands %}{{ dn }}{% if not loop.last %},{% endif %}{% endfor %}
authorized_for_all_host_commands={% for dn in authorized_for_all_host_commands %}{{ dn }}{% if not loop.last %},{% endif %}{% endfor %}
# READ-ONLY USERS
# A comma-delimited list of usernames that have read-only rights in
# the CGIs. This will block any service or host commands normally shown
# on the extinfo CGI pages. It will also block comments from being shown
# to read-only users.
#authorized_for_read_only=user1,user2
# STATUSMAP BACKGROUND IMAGE
# This option allows you to specify an image to be used as a
# background in the statusmap CGI. It is assumed that the image
# resides in the HTML images path (i.e. /usr/local/nagios/share/images).
# This path is automatically determined by appending "/images"
# to the path specified by the 'physical_html_path' directive.
# Note: The image file may be in GIF, PNG, JPEG, or GD2 format.
# However, I recommend that you convert your image to GD2 format
# (uncompressed), as this will cause less CPU load when the CGI
# generates the image.
#statusmap_background_image=smbackground.gd2
# STATUSMAP TRANSPARENCY INDEX COLOR
# These options set the r,g,b values of the background color used the statusmap CGI,
# so normal browsers that can't show real png transparency set the desired color as
# a background color instead (to make it look pretty).
# Defaults to white: (R,G,B) = (255,255,255).
#color_transparency_index_r=255
#color_transparency_index_g=255
#color_transparency_index_b=255
# DEFAULT STATUSMAP LAYOUT METHOD
# This option allows you to specify the default layout method
# the statusmap CGI should use for drawing hosts. If you do
# not use this option, the default is to use user-defined
# coordinates. Valid options are as follows:
# 0 = User-defined coordinates
# 1 = Depth layers
# 2 = Collapsed tree
# 3 = Balanced tree
# 4 = Circular
# 5 = Circular (Marked Up)
default_statusmap_layout=5
# DEFAULT STATUSWRL LAYOUT METHOD
# This option allows you to specify the default layout method
# the statuswrl (VRML) CGI should use for drawing hosts. If you
# do not use this option, the default is to use user-defined
# coordinates. Valid options are as follows:
# 0 = User-defined coordinates
# 2 = Collapsed tree
# 3 = Balanced tree
# 4 = Circular
default_statuswrl_layout=4
# STATUSWRL INCLUDE
# This option allows you to include your own objects in the
# generated VRML world. It is assumed that the file
# resides in the HTML path (i.e. /usr/local/nagios/share).
#statuswrl_include=myworld.wrl
# PING SYNTAX
# This option determines what syntax should be used when
# attempting to ping a host from the WAP interface (using
# the statuswml CGI. You must include the full path to
# the ping binary, along with all required options. The
# $HOSTADDRESS$ macro is substituted with the address of
# the host before the command is executed.
# Please note that the syntax for the ping binary is
# notorious for being different on virtually ever *NIX
# OS and distribution, so you may have to tweak this to
# work on your system.
ping_syntax=/bin/ping -n -U -c 5 $HOSTADDRESS$
# REFRESH RATE
# This option allows you to specify the refresh rate in seconds
# of various CGIs (status, statusmap, extinfo, and outages).
refresh_rate=90
# DEFAULT PAGE LIMIT
# This option allows you to specify the default number of results
# displayed on the status.cgi. This number can be adjusted from
# within the UI after the initial page load. Setting this to 0
# will show all results.
result_limit=100
# ESCAPE HTML TAGS
# This option determines whether HTML tags in host and service
# status output is escaped in the web interface. If enabled,
# your plugin output will not be able to contain clickable links.
escape_html_tags=1
# SOUND OPTIONS
# These options allow you to specify an optional audio file
# that should be played in your browser window when there are
# problems on the network. The audio files are used only in
# the status CGI. Only the sound for the most critical problem
# will be played. Order of importance (higher to lower) is as
# follows: unreachable hosts, down hosts, critical services,
# warning services, and unknown services. If there are no
# visible problems, the sound file optionally specified by
# 'normal_sound' variable will be played.
#
#
# <varname>=<sound_file>
#
# Note: All audio files must be placed in the /media subdirectory
# under the HTML path (i.e. /usr/local/nagios/share/media/).
#host_unreachable_sound=hostdown.wav
#host_down_sound=hostdown.wav
#service_critical_sound=critical.wav
#service_warning_sound=warning.wav
#service_unknown_sound=warning.wav
#normal_sound=noproblem.wav
# URL TARGET FRAMES
# These options determine the target frames in which notes and
# action URLs will open.
action_url_target=_blank
notes_url_target=_blank
# LOCK AUTHOR NAMES OPTION
# This option determines whether users can change the author name
# when submitting comments, scheduling downtime. If disabled, the
# author names will be locked into their contact name, as defined in Nagios.
# Values: 0 = allow editing author names
# 1 = lock author names (disallow editing)
lock_author_names=1
# SPLUNK INTEGRATION OPTIONS
# These options allow you to enable integration with Splunk
# in the web interface. If enabled, you'll be presented with
# "Splunk It" links in various places in the CGIs (log file,
# alert history, host/service detail, etc). Useful if you're
# trying to research why a particular problem occurred.
# For more information on Splunk, visit http://www.splunk.com/
# This option determines whether the Splunk integration is enabled
# Values: 0 = disable Splunk integration
# 1 = enable Splunk integration
#enable_splunk_integration=1
# This option should be the URL used to access your instance of Splunk
#splunk_url=http://127.0.0.1:8000/
# NAVIGATION BAR SEARCH OPTIONS
# The following options allow to configure the navbar search. Default
# is to search for hostnames. With enabled navbar_search_for_addresses,
# the navbar search queries IP addresses as well. It's also possible
# to enable search for aliases by setting navbar_search_for_aliases=1.
navbar_search_for_addresses=1
navbar_search_for_aliases=1
<handler Dashboard>
class = GridMon::MsgHandler::DashboardInput
<subscribe>
destination = /topic/nagios.probe.notification.ack.{{ sha.stdout }}
</subscribe>
</handler>
......@@ -846,8 +846,10 @@ process_performance_data={{ process_performance_data }}
#host_perfdata_file_template=[HOSTPERFDATA]\t$TIMET$\t$HOSTNAME$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$
#service_perfdata_file_template=[SERVICEPERFDATA]\t$TIMET$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$
host_perfdata_file_template={{ host_perfdata_file_template }}
service_perfdata_file_template={{ service_perfdata_file_template }}
host_perfdata_file_template=DATATYPE::HOSTPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tHOSTPERFDATA::$HOSTPERFDATA$\tHOSTCHECKCOMMAND::$HOSTCHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$
service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$