Files
Homelab/scripts/validate_deployment.sh

196 lines
5.4 KiB
Bash
Executable File

#!/bin/bash
# validate_deployment.sh - Validation script to verify all homelab components
# Run this after deployment to ensure everything is working correctly
set -euo pipefail
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
PASSED=0
FAILED=0
WARNINGS=0
check_pass() {
echo -e "${GREEN}$1${NC}"
((PASSED++))
}
check_fail() {
echo -e "${RED}$1${NC}"
((FAILED++))
}
check_warn() {
echo -e "${YELLOW}$1${NC}"
((WARNINGS++))
}
echo "========================================="
echo "Home Lab Deployment Validation"
echo "Started at $(date)"
echo "========================================="
# Network Validation
echo -e "\n${YELLOW}[1/6] Network Configuration${NC}"
if ip -d link show | grep -q "vlan"; then
check_pass "VLANs configured"
else
check_warn "VLANs not detected (may not be configured yet)"
fi
if command -v ethtool >/dev/null 2>&1; then
SPEED=$(ethtool eth0 2>/dev/null | grep Speed | awk '{print $2}')
if [[ "$SPEED" == *"2500"* ]] || [[ "$SPEED" == *"5000"* ]]; then
check_pass "High-speed network detected: $SPEED"
else
check_warn "Network speed: $SPEED (expected 2.5Gb or higher)"
fi
else
check_warn "ethtool not installed, cannot verify network speed"
fi
# Storage Validation
echo -e "\n${YELLOW}[2/6] Storage Configuration${NC}"
if command -v zpool >/dev/null 2>&1; then
if zpool list tank >/dev/null 2>&1; then
HEALTH=$(zpool list -H -o health tank)
if [[ "$HEALTH" == "ONLINE" ]]; then
check_pass "ZFS pool 'tank' is ONLINE"
else
check_fail "ZFS pool 'tank' health: $HEALTH"
fi
else
check_warn "ZFS pool 'tank' not found (may not be on this node)"
fi
else
check_warn "ZFS not installed on this node"
fi
if mount | grep -q "/mnt/nas"; then
check_pass "NAS is mounted"
else
check_warn "NAS not mounted at /mnt/nas"
fi
if crontab -l 2>/dev/null | grep -q "prune_ai_models.sh"; then
check_pass "AI model pruning cron job configured"
else
check_warn "AI model pruning cron job not found"
fi
# Service Validation
echo -e "\n${YELLOW}[3/6] Docker Services${NC}"
if command -v docker >/dev/null 2>&1; then
if docker service ls >/dev/null 2>&1; then
TRAEFIK_COUNT=$(docker service ls | grep -c traefik || true)
if [[ $TRAEFIK_COUNT -ge 1 ]]; then
REPLICAS=$(docker service ls | grep traefik | awk '{print $4}')
check_pass "Traefik service running ($REPLICAS)"
else
check_warn "Traefik service not found in Swarm"
fi
if docker service ls | grep -q node-exporter; then
check_pass "node-exporter service running"
else
check_warn "node-exporter service not found"
fi
else
check_warn "Not a Swarm manager node"
fi
UNHEALTHY=$(docker ps --filter "health=unhealthy" --format "{{.Names}}" | wc -l)
if [[ $UNHEALTHY -eq 0 ]]; then
check_pass "No unhealthy containers"
else
check_fail "$UNHEALTHY unhealthy containers detected"
docker ps --filter "health=unhealthy" --format " - {{.Names}}"
fi
else
check_fail "Docker not installed"
fi
# Security Validation
echo -e "\n${YELLOW}[4/6] Security Configuration${NC}"
if systemctl is-active --quiet fail2ban 2>/dev/null; then
check_pass "fail2ban service is active"
BANNED=$(sudo fail2ban-client status sshd 2>/dev/null | grep "Currently banned" | awk '{print $4}')
if [[ -n "$BANNED" ]]; then
check_pass "fail2ban protecting SSH ($BANNED IPs banned)"
fi
else
check_warn "fail2ban not installed or not running"
fi
if sudo iptables -L >/dev/null 2>&1; then
RULES=$(sudo iptables -L | grep -c "ACCEPT\|DROP" || true)
if [[ $RULES -gt 0 ]]; then
check_pass "Firewall rules configured ($RULES rules)"
else
check_warn "No firewall rules detected"
fi
else
check_warn "Cannot check iptables (permission denied)"
fi
# Monitoring Validation
echo -e "\n${YELLOW}[5/6] Monitoring & Metrics${NC}"
if curl -s http://localhost:9100/metrics >/dev/null 2>&1; then
check_pass "node-exporter metrics accessible"
else
check_warn "node-exporter not accessible on this node"
fi
if curl -s http://192.168.1.196:3000 >/dev/null 2>&1; then
check_pass "Grafana UI accessible"
else
check_warn "Grafana not accessible (may not be on this node)"
fi
# Backup Validation
echo -e "\n${YELLOW}[6/6] Backup Configuration${NC}"
if systemctl list-timers --all | grep -q restic-backup.timer; then
if systemctl is-active --quiet restic-backup.timer; then
check_pass "Restic backup timer is active"
NEXT_RUN=$(systemctl list-timers | grep restic-backup | awk '{print $1, $2}')
echo " Next backup: $NEXT_RUN"
else
check_fail "Restic backup timer is not active"
fi
else
check_warn "Restic backup timer not found"
fi
if command -v restic >/dev/null 2>&1; then
check_pass "Restic is installed"
else
check_warn "Restic not installed"
fi
# Summary
echo -e "\n========================================="
echo "Validation Summary"
echo "========================================="
echo -e "${GREEN}Passed: $PASSED${NC}"
echo -e "${YELLOW}Warnings: $WARNINGS${NC}"
echo -e "${RED}Failed: $FAILED${NC}"
if [[ $FAILED -eq 0 ]]; then
echo -e "\n${GREEN}✓ Deployment validation successful!${NC}"
exit 0
else
echo -e "\n${RED}✗ Some checks failed. Review above for details.${NC}"
exit 1
fi