# node exporter 安裝 ``` mkdir -p /mnt/storage/node_exporter vi /mnt/storage/node_exporter/node_exporter_ctl vi /mnt/storage/node_exporter/utils.sh chmod 755 /mnt/storage/node_exporter/node_exporter_ctl chmod 755 /mnt/storage/node_exporter/utils.sh tar -zxvf node_exporter-0.17.0.linux-amd64.tar.gz cp node_exporter-0.17.0.linux-amd64/node_exporter /mnt/storage/node_exporter/ vi /etc/systemd/system/nodeexporter.service service nodeexporter status service nodeexporter start systemctl enable nodeexporter ``` node_exporter_ctl ``` #!/usr/bin/env bash set -eu RUN_DIR=/var/run/node_exporter LOG_DIR=/var/log/node_exporter TMP_DIR=/var/tmp/node_exporter STORE_DIR=/mnt/storage/node_exporter mkdir -p ${RUN_DIR} ${LOG_DIR} ${TMP_DIR} ${STORE_DIR} PIDFILE=${RUN_DIR}/node_exporter.pid source /mnt/storage/node_exporter/utils.sh exec 1>> ${LOG_DIR}/$(basename "$0").stdout.log exec 2>> ${LOG_DIR}/$(basename "$0").stderr.log export PATH=/mnt/storage/node_exporter:${PATH} case $1 in start) pid_guard ${PIDFILE} "node_exporter" echo $$ > ${PIDFILE} exec node_exporter \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ --collector.supervisord.url="http://localhost:9100/RPC2" \ \ \ \ --collector.arp \ \ \ \ --collector.bcache \ \ \ \ --collector.bonding \ \ \ \ --no-collector.buddyinfo \ \ \ \ --collector.conntrack \ \ \ \ --collector.cpu \ \ \ \ --collector.diskstats \ \ \ \ --no-collector.drbd \ \ \ \ --collector.edac \ \ \ \ --collector.entropy \ \ \ \ --collector.filefd \ \ \ \ --collector.filesystem \ \ \ \ --collector.hwmon \ \ \ \ --collector.infiniband \ \ \ \ --no-collector.interrupts \ \ \ \ --collector.ipvs \ \ \ \ --no-collector.ksmd \ \ \ \ --collector.loadavg \ \ \ \ --no-collector.logind \ \ \ \ --collector.mdadm \ \ \ \ --collector.meminfo \ \ \ \ --no-collector.meminfo_numa \ \ \ \ --no-collector.mountstats \ \ \ \ --collector.netdev \ \ \ \ --collector.netclass \ \ \ \ --collector.netstat \ \ \ \ --collector.nfs \ \ \ \ --collector.nfsd \ \ \ \ --no-collector.ntp \ \ \ \ --no-collector.processes \ \ \ \ --no-collector.qdisc \ \ \ \ --no-collector.runit \ \ \ \ --collector.sockstat \ \ \ \ --collector.stat \ \ \ \ --no-collector.supervisord \ \ \ \ --no-collector.systemd \ \ \ \ --no-collector.tcpstat \ \ \ \ --collector.textfile \ \ \ \ --collector.time \ \ \ \ --collector.timex \ \ \ \ --collector.uname \ \ \ \ --collector.vmstat \ \ \ \ --collector.wifi \ \ \ \ --collector.xfs \ \ \ \ --collector.zfs \ \ \ --web.listen-address=":9100" \ \ \ \ >> ${LOG_DIR}/node_exporter.stdout.log \ 2>> ${LOG_DIR}/node_exporter.stderr.log ;; stop) kill_and_wait ${PIDFILE} ;; *) echo "Usage: $0 {start|stop}" exit 1 ;; esac exit 0 ``` utils.sh ``` #!/usr/bin/env bash function pid_is_running() { declare pid="$1" ps -p "${pid}" >/dev/null 2>&1 } # pid_guard # # @param pidfile # @param name [String] an arbitrary name that might show up in STDOUT on errors # # Run this before attempting to start new processes that may use the same :pidfile:. # If an old process is running on the pid found in the :pidfile:, exit 1. Otherwise, # remove the stale :pidfile: if it exists. # function pid_guard() { declare pidfile="$1" name="$2" echo "------------ STARTING $(basename "$0") at $(date) --------------" | tee /dev/stderr if [ ! -f "${pidfile}" ]; then return 0 fi local pid pid=$(head -1 "${pidfile}") if pid_is_running "${pid}"; then echo "${name} is already running, please stop it first" exit 1 fi echo "Removing stale pidfile" rm "${pidfile}" } # wait_pid_death # # @param pid # @param timeout # # Watch a :pid: for :timeout: seconds, waiting for it to die. # If it dies before :timeout:, exit 0. If not, exit 1. # # Note that this should be run in a subshell, so that the current # shell does not exit. # function wait_pid_death() { declare pid="$1" timeout="$2" local countdown countdown=$(( timeout * 10 )) while true; do if ! pid_is_running "${pid}"; then return 0 fi if [ ${countdown} -le 0 ]; then return 1 fi countdown=$(( countdown - 1 )) sleep 0.1 done } # kill_and_wait # # @param pidfile # @param timeout [default 25s] # # For a pid found in :pidfile:, send a `kill -15` TERM, then wait for :timeout: seconds to # see if it dies on its own. If not, send it a `kill -9`. If the process does die, # exit 0 and remove the :pidfile:. If after all of this, the process does not actually # die, exit 1. # # Note: # Monit default timeout for start/stop is 30s # Append 'with timeout {n} seconds' to monit start/stop program configs # function kill_and_wait() { declare pidfile="$1" timeout="${2:-25}" sigkill_on_timeout="${3:-1}" if [ ! -f "${pidfile}" ]; then echo "Pidfile ${pidfile} doesn't exist" exit 0 fi local pid pid=$(head -1 "${pidfile}") if [ -z "${pid}" ]; then echo "Unable to get pid from ${pidfile}" exit 1 fi if ! pid_is_running "${pid}"; then echo "Process ${pid} is not running" rm -f "${pidfile}" exit 0 fi echo "Killing ${pidfile}: ${pid} " kill "${pid}" if ! wait_pid_death "${pid}" "${timeout}"; then if [ "${sigkill_on_timeout}" = "1" ]; then echo "Kill timed out, using kill -9 on ${pid}" kill -9 "${pid}" sleep 0.5 fi fi if pid_is_running "${pid}"; then echo "Timed Out" exit 1 else echo "Stopped" rm -f "${pidfile}" fi } running_in_container() { # look for a non-root cgroup grep --quiet --invert-match ':/$' /proc/self/cgroup } ``` nodeexporter.service ``` [Unit] Description=node exporter After=network.target Documentation=node exporter [Service] Type=simple ExecStart=/mnt/storage/node_exporter/node_exporter_ctl start ExecStop=/mnt/storage/node_exporter/node_exporter_ctl stop PIDFile=/var/run/node_exporter/node_exporter.pid TimeoutStopSec=0 [Install] WantedBy=multi-user.target Alias=node_exporter.service ```