add all services that have no secret stuff
This commit is contained in:
13
roles/prometheus/files/alert.rules
Normal file
13
roles/prometheus/files/alert.rules
Normal file
@ -0,0 +1,13 @@
|
||||
groups:
|
||||
- name: example
|
||||
rules:
|
||||
|
||||
# Alert for any instance that is unreachable for >2 minutes.
|
||||
- alert: service_down
|
||||
expr: up == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Instance {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
|
13
roles/prometheus/files/alertmanager.yml
Normal file
13
roles/prometheus/files/alertmanager.yml
Normal file
@ -0,0 +1,13 @@
|
||||
---
|
||||
route:
|
||||
receiver: 'email'
|
||||
|
||||
receivers:
|
||||
- name: 'email'
|
||||
email_configs:
|
||||
- to: "tormakristof@tormakristof.eu"
|
||||
from: "monitoring@tormakris.dev"
|
||||
smarthost: "smtp.intra.tormakris.dev:25"
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
...
|
80
roles/prometheus/files/docker-compose.yaml
Normal file
80
roles/prometheus/files/docker-compose.yaml
Normal file
@ -0,0 +1,80 @@
|
||||
---
|
||||
version: '3.7'
|
||||
|
||||
networks:
|
||||
monitoring:
|
||||
|
||||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
volumes:
|
||||
- ./prometheus/:/etc/prometheus/
|
||||
- ./prometheus_data:/prometheus
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
||||
- '--web.console.templates=/usr/share/prometheus/consoles'
|
||||
# ports:
|
||||
# - "127.0.0.1:8080:9090"
|
||||
depends_on:
|
||||
- node-exporter
|
||||
- cadvisor
|
||||
networks:
|
||||
- monitoring
|
||||
restart: always
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
command:
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.sysfs=/host/sys'
|
||||
- --collector.filesystem.ignored-mount-points
|
||||
- "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
|
||||
networks:
|
||||
- monitoring
|
||||
restart: always
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager
|
||||
# ports:
|
||||
# - "127.0.0.1:8282:9093"
|
||||
volumes:
|
||||
- ./alertmanager/:/etc/alertmanager/
|
||||
networks:
|
||||
- monitoring
|
||||
restart: always
|
||||
command:
|
||||
- '--config.file=/etc/alertmanager/config.yml'
|
||||
- '--storage.path=/alertmanager'
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:rw
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
networks:
|
||||
- monitoring
|
||||
restart: always
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana
|
||||
user: "472"
|
||||
depends_on:
|
||||
- prometheus
|
||||
ports:
|
||||
- "127.0.0.1:8181:3000"
|
||||
volumes:
|
||||
- ./grafana_data:/var/lib/grafana
|
||||
env_file:
|
||||
- ./grafana/config.monitoring
|
||||
networks:
|
||||
- monitoring
|
||||
restart: always
|
||||
...
|
158
roles/prometheus/files/prometheus.yml
Normal file
158
roles/prometheus/files/prometheus.yml
Normal file
@ -0,0 +1,158 @@
|
||||
---
|
||||
# my global config
|
||||
global:
|
||||
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# Attach these labels to any time series or alerts when communicating with
|
||||
# external systems (federation, remote storage, Alertmanager).
|
||||
external_labels:
|
||||
monitor: 'stargate-cluster'
|
||||
|
||||
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
|
||||
rule_files:
|
||||
- 'alert.rules'
|
||||
# - "first.rules"
|
||||
# - "second.rules"
|
||||
|
||||
# alert
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- scheme: http
|
||||
static_configs:
|
||||
- targets:
|
||||
- "alertmanager:9093"
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
|
||||
- job_name: 'prometheus'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'node-exporter'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['node-exporter:9100']
|
||||
- targets: ['zelenka.intra.tormakris.dev:9100']
|
||||
- targets: ['drone.intra.tormakris.dev:9100']
|
||||
- targets: ['matrix.intra.tormakris.dev:9100']
|
||||
- targets: ['swagger.intra.tormakris.dev:9100']
|
||||
- targets: ['drone-runner.intra.tormakris.dev:9100']
|
||||
- targets: ['smtp.intra.tormakris.dev:9100']
|
||||
- targets: ['webgateway.intra.tormakris.dev:9100']
|
||||
- targets: ['openvpn.intra.tormakris.dev:9100']
|
||||
- targets: ['nexus.intra.tormakris.dev:9100']
|
||||
- targets: ['git.intra.tormakris.dev:9100']
|
||||
- targets: ['postgres.intra.tormakris.dev:9100']
|
||||
- targets: ['guacamole.intra.tormakris.dev:9100']
|
||||
- targets: ['bitwarden.intra.tormakris.dev:9100']
|
||||
- targets: ['nextcloud.intra.tormakris.dev:9100']
|
||||
- targets: ['backup.intra.tormakris.dev:9100']
|
||||
- targets: ['ssh.intra.tormakris.dev:9100']
|
||||
- targets: ['minecraft.intra.tormakris.dev:9100']
|
||||
|
||||
- job_name: 'postfix-exporter'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['zelenka.intra.tormakris.dev:9154']
|
||||
- targets: ['drone.intra.tormakris.dev:9154']
|
||||
- targets: ['matrix.intra.tormakris.dev:9154']
|
||||
- targets: ['swagger.intra.tormakris.dev:9154']
|
||||
- targets: ['drone-runner.intra.tormakris.dev:9154']
|
||||
- targets: ['smtp.intra.tormakris.dev:9154']
|
||||
- targets: ['webgateway.intra.tormakris.dev:9154']
|
||||
- targets: ['openvpn.intra.tormakris.dev:9154']
|
||||
- targets: ['nexus.intra.tormakris.dev:9154']
|
||||
- targets: ['git.intra.tormakris.dev:9154']
|
||||
- targets: ['guacamole.intra.tormakris.dev:9154']
|
||||
- targets: ['bitwarden.intra.tormakris.dev:9154']
|
||||
- targets: ['nextcloud.intra.tormakris.dev:9154']
|
||||
- targets: ['backup.intra.tormakris.dev:9154']
|
||||
|
||||
- job_name: 'nginx-exporter'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['drone.intra.tormakris.dev:9113']
|
||||
- targets: ['matrix.intra.tormakris.dev:9113']
|
||||
- targets: ['swagger.intra.tormakris.dev:9113']
|
||||
- targets: ['webgateway.intra.tormakris.dev:9113']
|
||||
- targets: ['nexus.intra.tormakris.dev:9113']
|
||||
- targets: ['git.intra.tormakris.dev:9113']
|
||||
- targets: ['guacamole.intra.tormakris.dev:9113']
|
||||
- targets: ['bitwarden.intra.tormakris.dev:9113']
|
||||
- targets: ['nextcloud.intra.tormakris.dev:9113']
|
||||
|
||||
- job_name: 'cadvisor'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['cadvisor:8080']
|
||||
- targets: ['zelenka.intra.tormakris.dev:4194']
|
||||
- targets: ['drone.intra.tormakris.dev:4194']
|
||||
- targets: ['matrix.intra.tormakris.dev:4194']
|
||||
- targets: ['swagger.intra.tormakris.dev:4194']
|
||||
- targets: ['drone-runner.intra.tormakris.dev:4194']
|
||||
- targets: ['nexus.intra.tormakris.dev:4194']
|
||||
- targets: ['git.intra.tormakris.dev:4194']
|
||||
- targets: ['guacamole.intra.tormakris.dev:4194']
|
||||
- targets: ['nextcloud.intra.tormakris.dev:4194']
|
||||
|
||||
- job_name: 'drone-server'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['drone.intra.tormakris.dev:443']
|
||||
scheme: https
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
bearer_token: a96fdbbbfb1072836bf81b2eab456773
|
||||
|
||||
- job_name: 'postgres-exporter'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['postgresql.intra.tormakris.dev:9187']
|
||||
|
||||
- job_name: 'gitea-server'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['git.intra.tormakris.dev:443']
|
||||
scheme: https
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
|
||||
- job_name: 'windows_exporter'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['woolsey-host.intra.tormakris.dev:9182']
|
||||
- targets: ['mckay-host.intra.tormakris.dev:9182']
|
||||
...
|
33
roles/prometheus/tasks/main.yaml
Normal file
33
roles/prometheus/tasks/main.yaml
Normal file
@ -0,0 +1,33 @@
|
||||
---
|
||||
- name: Copy prometheus configuration
|
||||
ansible.builtin.copy:
|
||||
src: prometheus.yml
|
||||
dest: /home/service-user/prometheus/prometheus.yml
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: Copy alertmanager configuration
|
||||
ansible.builtin.copy:
|
||||
src: alertmanager.yml
|
||||
dest: /home/service-user/alertmanager/config.yml
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: Copy alert rules
|
||||
ansible.builtin.copy:
|
||||
src: alert.rules
|
||||
dest: /home/service-user/prometheus/alert.rules
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: Copy docker-compose configuration
|
||||
ansible.builtin.copy:
|
||||
src: docker-compose.yaml
|
||||
dest: /home/service-user/docker-compose.yaml
|
||||
owner: service-user
|
||||
group: service-user
|
||||
mode: '0644'
|
||||
...
|
Reference in New Issue
Block a user