docker-compose-monitor.yml version: '2' networks: monitor: driver: bridge services: influxdb: image: influxdb:latest container_name: tig-influxdb ports: - "18083:8083" - "18086:8086" - "18090:8090" env_file: - 'env.influxdb' volumes: # Data persistence # sudo mkdir -p ./influxdb/data - ./influxdb/data:/var/lib/influxdb # Configure the time in docker to be Eastern Time Zone 8 - ./timezone:/etc/timezone:ro - ./localtime:/etc/localtime:ro restart: unless-stopped #Automatically restart telegraf after stopping: image: telegraf:latest container_name: tig-telegraf links: - influxdb volumes: - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro - ./timezone:/etc/timezone:ro - ./localtime:/etc/localtime:ro restart: unless-stopped prometheus: image: prom/prometheus container_name: prometheus hostname: prometheus restart: always volumes: - /home/qa/docker/grafana/prometheus.yml:/etc/prometheus/prometheus.yml - /home/qa/docker/grafana/node_down.yml:/etc/prometheus/node_down.yml ports: - '9090:9090' networks: - monitor alertmanager: image: prom/alertmanager container_name: alertmanager hostname: alertmanager restart: always volumes: - /home/qa/docker/grafana/alertmanager.yml:/etc/alertmanager/alertmanager.yml ports: - '9093:9093' networks: - monitor grafana: image: grafana/grafana:6.7.4 container_name: grafana hostname: grafana restart: always ports: - '13000:3000' networks: - monitor node-exporter: image: quay.io/prometheus/node-exporter container_name: node-exporter hostname: node-exporter restart: always ports: - '9100:9100' networks: - monitor cadvisor: image: google/cadvisor:latest container_name: cadvisor hostname: cadvisor restart: always volumes: - /:/rootfs:ro - /var/run:/var/run:rw - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro ports: - '18080:8080' networks: - monitor alertmanager.yml global: resolve_timeout: 5m smtp_from: 'email' smtp_smarthost: 'smtp.exmail.qq.com:25' smtp_auth_username: 'email' smtp_auth_password: 'password' smtp_require_tls: false smtp_hello: 'qq.com' route: group_by: ['alertname'] group_wait: 5s group_interval: 5s repeat_interval: 5m receiver: 'email' receivers: - name: 'email' email_configs: - to: 'Email address' send_resolved: true inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname', 'dev', 'instance'] prometheus.yml global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: ['192.168.32.117:9093'] # - alertmanager:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "node_down.yml" # - "node-exporter-alert-rules.yml" # - "first_rules.yml" # - "second_rules.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: #IO storage node group - job_name: 'io' scrape_interval: 8s static_configs: #The port is the port where node-exporter is started - targets: ['192.168.32.117:9100'] - targets: ['192.168.32.196:9100'] - targets: ['192.168.32.136:9100'] - targets: ['192.168.32.193:9100'] - targets: ['192.168.32.153:9100'] - targets: ['192.168.32.185:9100'] - targets: ['192.168.32.190:19100'] - targets: ['192.168.32.192:9100'] # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: 'cadvisor' static_configs: #The port is the port started by cadvisor - targets: ['192.168.32.117:18080'] - targets: ['192.168.32.193:8080'] - targets: ['192.168.32.153:8080'] - targets: ['192.168.32.185:8080'] - targets: ['192.168.32.190:18080'] - targets: ['192.168.32.192:18080'] node_down.yml groups: - name: node_down rules: - alert: InstanceDown expr: up == 0 for: 1m labels: user: test annotations: summary: 'Instance {{ $labels.instance }} down' description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes.' #Remaining memory is less than 10% - alert: Remaining memory is less than 10% expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10 for: 2m labels: severity: warning annotations: summary: Host out of memory (instance {{ $labels.instance }}) description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" #The remaining disk is less than 10% - alert: The remaining disk space is less than 10%. expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0 for: 2m labels: severity: warning annotations: summary: Host out of disk space (instance {{ $labels.instance }}) description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" #cpu load > 80% - alert: CPU load > 80% expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80 for: 0m labels: severity: warning annotations: summary: Host high CPU load (instance {{ $labels.instance }}) description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" Alerts: https://awesome-prometheus-alerts.grep.to/rules#prometheus-self-monitoring Official dashboard: https://grafana.com/grafana/dashboards/ This is the end of this article about docker deployment of grafana+prometheus configuration. For more relevant docker deployment of grafana+prometheus content, please search for previous articles on 123WORDPRESS.COM or continue to browse the following related articles. I hope you will support 123WORDPRESS.COM in the future! You may also be interested in:
|
<<: Use html-webpack-plugin' to generate HTML page plugin in memory
>>: HTML5+CSS3 header creation example and update
Table of contents File() grammar parameter Exampl...
Download link: Operating Environment CentOS 7.6 i...
This article tests the environment: CentOS 7 64-b...
The following code introduces MySQL to update som...
What is an HTML file? HTML stands for Hyper Text M...
Shtml and asp are similar. In files named shtml, s...
Seeing the recent popular WeChat tap function, I ...
Simulation tables and data scripts Copy the follo...
The previous article explained how to reset the M...
Introduction to CentOS CentOS is an enterprise-cl...
1. What is scaffolding? 1. Vue CLI Vue CLI is a c...
Docker is an open source container engine that he...
Table of contents Preface Generate SVG Introducti...
Table of contents Hidden, descending, and functio...
Table of contents 1. Particle Effects 2. Load the...