version: '3.8' networks: traefik-public: external: true monitoring: driver: overlay volumes: prometheus_data: grafana_data: alertmanager_data: secrets: grafana_admin_password: external: true configs: prometheus_config: external: true name: prometheus.yml services: prometheus: image: prom/prometheus:v3.0.1 volumes: - prometheus_data:/prometheus configs: - source: prometheus_config target: /etc/prometheus/prometheus.yml networks: - monitoring - traefik-public healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy"] interval: 30s timeout: 5s retries: 3 start_period: 30s deploy: placement: constraints: - node.role == manager resources: limits: memory: 2G cpus: '1.0' reservations: memory: 512M cpus: '0.25' restart_policy: condition: on-failure delay: 5s max_attempts: 3 update_config: parallelism: 1 delay: 10s failure_action: rollback labels: - "traefik.enable=true" - "traefik.http.routers.prometheus.rule=Host(`prometheus.sj98.duckdns.org`)" - "traefik.http.routers.prometheus.entrypoints=websecure" - "traefik.http.routers.prometheus.tls.certresolver=leresolver" - "traefik.http.services.prometheus.loadbalancer.server.port=9090" - "traefik.docker.network=traefik-public" logging: driver: "json-file" options: max-size: "10m" max-file: "3" grafana: image: grafana/grafana:11.3.1 volumes: - grafana_data:/var/lib/grafana environment: - GF_SERVER_ROOT_URL=https://grafana.sj98.duckdns.org - GF_SECURITY_ADMIN_PASSWORD__FILE=/run/secrets/grafana_admin_password secrets: - grafana_admin_password networks: - monitoring - traefik-public healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/api/health"] interval: 30s timeout: 5s retries: 3 start_period: 30s deploy: placement: constraints: - node.role == manager resources: limits: memory: 1G cpus: '1.0' reservations: memory: 256M cpus: '0.25' restart_policy: condition: on-failure delay: 5s max_attempts: 3 update_config: parallelism: 1 delay: 10s failure_action: rollback labels: - "traefik.enable=true" - "traefik.http.routers.grafana.rule=Host(`grafana.sj98.duckdns.org`)" - "traefik.http.routers.grafana.entrypoints=websecure" - "traefik.http.routers.grafana.tls.certresolver=leresolver" - "traefik.http.services.grafana.loadbalancer.server.port=3000" - "traefik.docker.network=traefik-public" logging: driver: "json-file" options: max-size: "10m" max-file: "3" alertmanager: image: prom/alertmanager:v0.27.0 volumes: - alertmanager_data:/alertmanager command: - '--config.file=/etc/alertmanager/config.yml' - '--storage.path=/alertmanager' networks: - monitoring - traefik-public healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9093/-/healthy"] interval: 30s timeout: 5s retries: 3 start_period: 15s deploy: placement: constraints: - node.role == manager resources: limits: memory: 256M cpus: '0.25' reservations: memory: 64M cpus: '0.05' restart_policy: condition: on-failure delay: 5s max_attempts: 3 labels: - "traefik.enable=true" - "traefik.http.routers.alertmanager.rule=Host(`alertmanager.sj98.duckdns.org`)" - "traefik.http.routers.alertmanager.entrypoints=websecure" - "traefik.http.routers.alertmanager.tls.certresolver=leresolver" - "traefik.http.services.alertmanager.loadbalancer.server.port=9093" - "traefik.docker.network=traefik-public" logging: driver: "json-file" options: max-size: "10m" max-file: "3" node-exporter: image: prom/node-exporter:v1.8.2 volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro command: - '--path.procfs=/host/proc' - '--path.rootfs=/rootfs' - '--path.sysfs=/host/sys' - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' networks: - monitoring deploy: mode: global resources: limits: memory: 128M cpus: '0.2' reservations: memory: 32M cpus: '0.05' restart_policy: condition: on-failure delay: 5s max_attempts: 3 logging: driver: "json-file" options: max-size: "5m" max-file: "2" cadvisor: image: gcr.io/cadvisor/cadvisor:v0.50.0 volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro - /dev/disk/:/dev/disk:ro command: - '--docker_only=true' - '--housekeeping_interval=30s' networks: - monitoring healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/healthz"] interval: 30s timeout: 5s retries: 3 deploy: mode: global resources: limits: memory: 256M cpus: '0.3' reservations: memory: 64M cpus: '0.1' restart_policy: condition: on-failure delay: 5s max_attempts: 3 logging: driver: "json-file" options: max-size: "5m" max-file: "2"