Deploymnet Prometheus+Grafana in single machine use docker-compose service
Deploymnet Prometheus+Grafana in single machine use docker-compose service
Collect and report node status in grafana dashboard, through node-exporter and process-exporter to promethues tsdb
- Setup and install relative components
1#update the system and install docker component
2yum update -y
3yum install -y yum-utils
4yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
5yum install -y docker-ce docker-ce-cli containerd.io
6systemctl enable docker
7systemctl start docker
8mkdir -p /data/monitor/{alertmanager,prometheus/{config,data},consul/data,grafana/data}
9chmown -R 1000:1000 /data/monitor
10chmod -R 777 /data/monitor/grafana
- Download the monitoring relative docker images
1docker pull prom/prometheus:latest
2docker pull grafana/grafana:latest
3docker pull prom/alertmanager:latest
4docker pull prom/pushgateway:latest
5docker pull consul:latest
- Prepare
docker-compose.yml
service configuration file
1version: '2.1'
2
3networks:
4 monitor-net:
5 driver: bridge
6
7services:
8 consul:
9 image: consul:latest
10 container_name: consul
11 command: agent -dev -bind=0.0.0.0 -client=0.0.0.0
12 restart: unless-stopped
13 volumes:
14 - /data/monitor/consul/data:/consul/data
15 ports:
16 - '8500:8500'
17 networks:
18 - monitor-net
19 labels:
20 org.label-schema.group: "monitoring"
21
22 prometheus:
23 image: prom/prometheus:latest
24 container_name: prometheus
25 volumes:
26 - /data/monitor/prometheus/config:/etc/prometheus
27 - /data/monitor/prometheus/data:/prometheus:rw
28 command:
29 - '--config.file=/etc/prometheus/prometheus.yml'
30 - '--storage.tsdb.path=/prometheus'
31 - '--web.console.libraries=/etc/prometheus/console_libraries'
32 - '--web.console.templates=/etc/prometheus/consoles'
33 - '--storage.tsdb.retention.time=200h'
34 - '--web.enable-lifecycle'
35 restart: unless-stopped
36 links:
37 - 'consul:consul'
38 ports:
39 - '9090:9090'
40 networks:
41 - monitor-net
42 labels:
43 org.label-schema.group: "monitoring"
44
45 alertmanager:
46 image: prom/alertmanager:latest
47 container_name: alertmanager
48 volumes:
49 - /data/monitor/alertmanager:/etc/alertmanager
50 command:
51 - '--config.file=/etc/alertmanager/config.yml'
52 - '--storage.path=/alertmanager'
53 restart: unless-stopped
54 ports:
55 - '9093:9093'
56 networks:
57 - monitor-net
58 labels:
59 org.label-schema.group: "monitoring"
60
61 grafana:
62 image: grafana/grafana:latest
63 container_name: grafana
64 volumes:
65 - /data/monitor/grafana/data:/var/lib/grafana:rw
66 - /data/monitor/grafana/provisioning:/etc/grafana/provisioning
67 environment:
68 - GF_SECURITY_ADMIN_USER=${ADMIN_USER}
69 - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD}
70 - GF_USERS_ALLOW_SIGN_UP=false
71 restart: unless-stopped
72 ports:
73 - '3000:3000'
74 networks:
75 - monitor-net
76 labels:
77 org.label-schema.group: "monitoring"
78
79 pushgateway:
80 image: prom/pushgateway:latest
81 container_name: pushgateway
82 restart: unless-stopped
83 ports:
84 - '9091:0991'
85 networks:
86 - monitor-net
87 labels:
88 org.label-schema.group: "monitoring"
- Config the promethues scrape the metrics
The path is:
/data/monitor/promethues/config
include thepromethues.yml
andnode_down.yml
alert rules file
1global:
2 scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
3 evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
4 # scrape_timeout is set to the global default (10s).
5
6# Alertmanager configuration
7alerting:
8 alertmanagers:
9 - scheme: http
10 static_configs:
11 - targets:
12 - 'alertmanager:9093'
13
14# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
15rule_files:
16 - "node_down.yml"
17 # - "first_rules.yml"
18 # - "second_rules.yml"
19
20# A scrape configuration containing exactly one endpoint to scrape:
21# Here it's Prometheus itself.
22scrape_configs:
23 # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
24 - job_name: 'prometheus'
25 scrape_interval: 10s
26 static_configs:
27 - targets: ['localhost:9090']
28
29 - job_name: 'node'
30 scrape_interval: 5s
31 static_configs:
32 - targets: ['node_exporter:9100']
33# use consul discovery target and drop the default consul target retain the defined include 'exporter' tags target
34 - job_name: 'consul-prometheus'
35 consul_sd_configs:
36 - server: 'consul:8500'
37 services: []
38 relabel_configs:
39 - source_labels: [__meta_consul_tags]
40 regex: .*exporter.*
41 action: keep
the config/node_down.yml
1groups:
2- name: node_down
3 rules:
4 - alert: InstanceDown
5 expr: up == 0
6 for: 1m
7 labels:
8 user: test
9 annotations:
10 summary: "Instance {{ $labels.instance }} down"
11 description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
- The
/data/monitor/alertmanager/config.yml
look like this following
1global:
2 smtp_smarthost: 'smtp.xxxx.com:25'
3 smtp_from: '[email protected]'
4 smtp_auth_username: '[email protected]'
5 smtp_auth_password: 'TPP***'
6 smtp_require_tls: false
7
8route:
9 group_by: ['alertname']
10 group_wait: 10s
11 group_interval: 10s
12 repeat_interval: 10m
13 receiver: live-monitoring
14
15receivers:
16 - name: 'live-monitoring'
17 email_configs:
18 - to: '[email protected]'
- Define the launch script and refer to default credentials
1#!/usr/bin/env bash
2
3export ADMIN_USER=test
4export ADMIN_PASSWORD='xxxxxxx'
5docker-compose up -d
- Configuration the export and expose the metrics and regist to consul
1#The service config file `node_exporter.service`
2[Unit]
3Description=node_exporter
4Documentation=https://prometheus.io/
5After=network.target
6
7[Service]
8Type=simple
9ExecStart=/usr/local/node_exporter-1.2.2.linux-amd64/node_exporter
10Restart=on-failure
11
12[Install]
13WantedBy=mulser.target
14
15#Regist the endpoint to consul
16curl -X PUT -d '{"id": "node-exporter","name": "node-exporter-test","address": "test.node-exporter.com","port": 9100,"tags": ["exporter","node"],"checks": [{"http": "http://test.node-exporter.com:9100/metrics", "interval": "5s"}]}' http://test.consul.com:8500/v1/agent/service/register
17curl -X PUT -d '{"id": "process-exporter","name": "process-exporter-test","address": "test.process-exporter.com","port": 9256,"tags": ["exporter","process"],"checks": [{"http": "http://test.process-exporter.com:9256/metrics", "interval": "5s"}]}' http:///test.consul.com:8500/v1/agent/service/register