Add Pi-hole with AdGuard DOH/DOT integration, reorganize swarm stacks, add DNS/n8n docs
This commit is contained in:
14
services/swarm/stacks/monitoring/alertmanager.yml
Normal file
14
services/swarm/stacks/monitoring/alertmanager.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
|
||||
route:
|
||||
group_by: ['alertname']
|
||||
group_wait: 10s
|
||||
group_interval: 10s
|
||||
repeat_interval: 1h
|
||||
receiver: 'web.hook'
|
||||
|
||||
receivers:
|
||||
- name: 'web.hook'
|
||||
webhook_configs:
|
||||
- url: 'http://127.0.0.1:5001/'
|
||||
248
services/swarm/stacks/monitoring/monitoring-stack.yml
Normal file
248
services/swarm/stacks/monitoring/monitoring-stack.yml
Normal file
@@ -0,0 +1,248 @@
|
||||
version: '3.8'
|
||||
|
||||
networks:
|
||||
traefik-public:
|
||||
external: true
|
||||
monitoring:
|
||||
driver: overlay
|
||||
|
||||
volumes:
|
||||
prometheus_data:
|
||||
grafana_data:
|
||||
alertmanager_data:
|
||||
|
||||
secrets:
|
||||
grafana_admin_password:
|
||||
external: true
|
||||
|
||||
configs:
|
||||
prometheus_config:
|
||||
external: true
|
||||
name: prometheus.yml
|
||||
alertmanager_config:
|
||||
external: true
|
||||
name: alertmanager.yml
|
||||
|
||||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
volumes:
|
||||
- prometheus_data:/prometheus
|
||||
configs:
|
||||
- source: prometheus_config
|
||||
target: /etc/prometheus/prometheus.yml
|
||||
networks:
|
||||
- monitoring
|
||||
- traefik-public
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
deploy:
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
resources:
|
||||
limits:
|
||||
memory: 2G
|
||||
cpus: '1.0'
|
||||
reservations:
|
||||
memory: 512M
|
||||
cpus: '0.25'
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
failure_action: rollback
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.prometheus.rule=Host(`prometheus.sterl.xyz`)"
|
||||
- "traefik.http.routers.prometheus.entrypoints=websecure"
|
||||
- "traefik.http.routers.prometheus.tls.certresolver=cfresolver"
|
||||
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
|
||||
- "traefik.swarm.network=traefik-public"
|
||||
- "tsdproxy.enable=true"
|
||||
- "tsdproxy.name=prometheus"
|
||||
- "tsdproxy.container_port=9090"
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
environment:
|
||||
- GF_SERVER_ROOT_URL=https://grafana.sterl.xyz
|
||||
- GF_SECURITY_ADMIN_PASSWORD__FILE=/run/secrets/grafana_admin_password
|
||||
secrets:
|
||||
- grafana_admin_password
|
||||
networks:
|
||||
- monitoring
|
||||
- traefik-public
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/api/health"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
deploy:
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
resources:
|
||||
limits:
|
||||
memory: 1G
|
||||
cpus: '1.0'
|
||||
reservations:
|
||||
memory: 256M
|
||||
cpus: '0.25'
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
failure_action: rollback
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.grafana.rule=Host(`grafana.sterl.xyz`)"
|
||||
- "traefik.http.routers.grafana.entrypoints=websecure"
|
||||
- "traefik.http.routers.grafana.tls.certresolver=cfresolver"
|
||||
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
|
||||
- "traefik.swarm.network=traefik-public"
|
||||
- "tsdproxy.enable=true"
|
||||
- "tsdproxy.name=grafana"
|
||||
- "tsdproxy.container_port=3000"
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:latest
|
||||
volumes:
|
||||
- alertmanager_data:/alertmanager
|
||||
configs:
|
||||
- source: alertmanager_config
|
||||
target: /etc/alertmanager/config.yml
|
||||
command:
|
||||
- '--config.file=/etc/alertmanager/config.yml'
|
||||
- '--storage.path=/alertmanager'
|
||||
networks:
|
||||
- monitoring
|
||||
- traefik-public
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9093/-/healthy"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
deploy:
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
cpus: '0.25'
|
||||
reservations:
|
||||
memory: 64M
|
||||
cpus: '0.05'
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.alertmanager.rule=Host(`alertmanager.sterl.xyz`)"
|
||||
- "traefik.http.routers.alertmanager.entrypoints=websecure"
|
||||
- "traefik.http.routers.alertmanager.tls.certresolver=cfresolver"
|
||||
- "traefik.http.services.alertmanager.loadbalancer.server.port=9093"
|
||||
- "traefik.swarm.network=traefik-public"
|
||||
- "tsdproxy.enable=true"
|
||||
- "tsdproxy.name=alertmanager"
|
||||
- "tsdproxy.container_port=9093"
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter:latest
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
command:
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.rootfs=/rootfs'
|
||||
- '--path.sysfs=/host/sys'
|
||||
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
|
||||
networks:
|
||||
- monitoring
|
||||
deploy:
|
||||
mode: global
|
||||
resources:
|
||||
limits:
|
||||
memory: 128M
|
||||
cpus: '0.2'
|
||||
reservations:
|
||||
memory: 32M
|
||||
cpus: '0.05'
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "5m"
|
||||
max-file: "2"
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:latest
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
- /dev/disk/:/dev/disk:ro
|
||||
command:
|
||||
- '--docker_only=true'
|
||||
- '--housekeeping_interval=30s'
|
||||
networks:
|
||||
- monitoring
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/healthz"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
deploy:
|
||||
mode: global
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
cpus: '0.3'
|
||||
reservations:
|
||||
memory: 64M
|
||||
cpus: '0.1'
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "5m"
|
||||
max-file: "2"
|
||||
38
services/swarm/stacks/monitoring/node-exporter-stack.yml
Normal file
38
services/swarm/stacks/monitoring/node-exporter-stack.yml
Normal file
@@ -0,0 +1,38 @@
|
||||
version: '3.8'
|
||||
|
||||
networks:
|
||||
monitoring:
|
||||
external: true
|
||||
|
||||
services:
|
||||
node-exporter:
|
||||
image: prom/node-exporter:v1.8.2
|
||||
command:
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.rootfs=/rootfs'
|
||||
- '--path.sysfs=/host/sys'
|
||||
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
|
||||
volumes:
|
||||
- '/proc:/host/proc:ro'
|
||||
- '/sys:/host/sys:ro'
|
||||
- '/:/rootfs:ro,rslave'
|
||||
networks:
|
||||
- monitoring
|
||||
deploy:
|
||||
mode: global
|
||||
resources:
|
||||
limits:
|
||||
memory: 128M
|
||||
cpus: '0.2'
|
||||
reservations:
|
||||
memory: 32M
|
||||
cpus: '0.05'
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "5m"
|
||||
max-file: "2"
|
||||
Reference in New Issue
Block a user