196 lines
5.4 KiB
Bash
Executable File
196 lines
5.4 KiB
Bash
Executable File
#!/bin/bash
|
|
# validate_deployment.sh - Validation script to verify all homelab components
|
|
# Run this after deployment to ensure everything is working correctly
|
|
|
|
set -euo pipefail
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m'
|
|
|
|
PASSED=0
|
|
FAILED=0
|
|
WARNINGS=0
|
|
|
|
check_pass() {
|
|
echo -e "${GREEN}✓ $1${NC}"
|
|
((PASSED++))
|
|
}
|
|
|
|
check_fail() {
|
|
echo -e "${RED}✗ $1${NC}"
|
|
((FAILED++))
|
|
}
|
|
|
|
check_warn() {
|
|
echo -e "${YELLOW}⚠ $1${NC}"
|
|
((WARNINGS++))
|
|
}
|
|
|
|
echo "========================================="
|
|
echo "Home Lab Deployment Validation"
|
|
echo "Started at $(date)"
|
|
echo "========================================="
|
|
|
|
# Network Validation
|
|
echo -e "\n${YELLOW}[1/6] Network Configuration${NC}"
|
|
|
|
if ip -d link show | grep -q "vlan"; then
|
|
check_pass "VLANs configured"
|
|
else
|
|
check_warn "VLANs not detected (may not be configured yet)"
|
|
fi
|
|
|
|
if command -v ethtool >/dev/null 2>&1; then
|
|
SPEED=$(ethtool eth0 2>/dev/null | grep Speed | awk '{print $2}')
|
|
if [[ "$SPEED" == *"2500"* ]] || [[ "$SPEED" == *"5000"* ]]; then
|
|
check_pass "High-speed network detected: $SPEED"
|
|
else
|
|
check_warn "Network speed: $SPEED (expected 2.5Gb or higher)"
|
|
fi
|
|
else
|
|
check_warn "ethtool not installed, cannot verify network speed"
|
|
fi
|
|
|
|
# Storage Validation
|
|
echo -e "\n${YELLOW}[2/6] Storage Configuration${NC}"
|
|
|
|
if command -v zpool >/dev/null 2>&1; then
|
|
if zpool list tank >/dev/null 2>&1; then
|
|
HEALTH=$(zpool list -H -o health tank)
|
|
if [[ "$HEALTH" == "ONLINE" ]]; then
|
|
check_pass "ZFS pool 'tank' is ONLINE"
|
|
else
|
|
check_fail "ZFS pool 'tank' health: $HEALTH"
|
|
fi
|
|
else
|
|
check_warn "ZFS pool 'tank' not found (may not be on this node)"
|
|
fi
|
|
else
|
|
check_warn "ZFS not installed on this node"
|
|
fi
|
|
|
|
if mount | grep -q "/mnt/nas"; then
|
|
check_pass "NAS is mounted"
|
|
else
|
|
check_warn "NAS not mounted at /mnt/nas"
|
|
fi
|
|
|
|
if crontab -l 2>/dev/null | grep -q "prune_ai_models.sh"; then
|
|
check_pass "AI model pruning cron job configured"
|
|
else
|
|
check_warn "AI model pruning cron job not found"
|
|
fi
|
|
|
|
# Service Validation
|
|
echo -e "\n${YELLOW}[3/6] Docker Services${NC}"
|
|
|
|
if command -v docker >/dev/null 2>&1; then
|
|
if docker service ls >/dev/null 2>&1; then
|
|
TRAEFIK_COUNT=$(docker service ls | grep -c traefik || true)
|
|
if [[ $TRAEFIK_COUNT -ge 1 ]]; then
|
|
REPLICAS=$(docker service ls | grep traefik | awk '{print $4}')
|
|
check_pass "Traefik service running ($REPLICAS)"
|
|
else
|
|
check_warn "Traefik service not found in Swarm"
|
|
fi
|
|
|
|
if docker service ls | grep -q node-exporter; then
|
|
check_pass "node-exporter service running"
|
|
else
|
|
check_warn "node-exporter service not found"
|
|
fi
|
|
else
|
|
check_warn "Not a Swarm manager node"
|
|
fi
|
|
|
|
UNHEALTHY=$(docker ps --filter "health=unhealthy" --format "{{.Names}}" | wc -l)
|
|
if [[ $UNHEALTHY -eq 0 ]]; then
|
|
check_pass "No unhealthy containers"
|
|
else
|
|
check_fail "$UNHEALTHY unhealthy containers detected"
|
|
docker ps --filter "health=unhealthy" --format " - {{.Names}}"
|
|
fi
|
|
else
|
|
check_fail "Docker not installed"
|
|
fi
|
|
|
|
# Security Validation
|
|
echo -e "\n${YELLOW}[4/6] Security Configuration${NC}"
|
|
|
|
if systemctl is-active --quiet fail2ban 2>/dev/null; then
|
|
check_pass "fail2ban service is active"
|
|
|
|
BANNED=$(sudo fail2ban-client status sshd 2>/dev/null | grep "Currently banned" | awk '{print $4}')
|
|
if [[ -n "$BANNED" ]]; then
|
|
check_pass "fail2ban protecting SSH ($BANNED IPs banned)"
|
|
fi
|
|
else
|
|
check_warn "fail2ban not installed or not running"
|
|
fi
|
|
|
|
if sudo iptables -L >/dev/null 2>&1; then
|
|
RULES=$(sudo iptables -L | grep -c "ACCEPT\|DROP" || true)
|
|
if [[ $RULES -gt 0 ]]; then
|
|
check_pass "Firewall rules configured ($RULES rules)"
|
|
else
|
|
check_warn "No firewall rules detected"
|
|
fi
|
|
else
|
|
check_warn "Cannot check iptables (permission denied)"
|
|
fi
|
|
|
|
# Monitoring Validation
|
|
echo -e "\n${YELLOW}[5/6] Monitoring & Metrics${NC}"
|
|
|
|
if curl -s http://localhost:9100/metrics >/dev/null 2>&1; then
|
|
check_pass "node-exporter metrics accessible"
|
|
else
|
|
check_warn "node-exporter not accessible on this node"
|
|
fi
|
|
|
|
if curl -s http://192.168.1.196:3000 >/dev/null 2>&1; then
|
|
check_pass "Grafana UI accessible"
|
|
else
|
|
check_warn "Grafana not accessible (may not be on this node)"
|
|
fi
|
|
|
|
# Backup Validation
|
|
echo -e "\n${YELLOW}[6/6] Backup Configuration${NC}"
|
|
|
|
if systemctl list-timers --all | grep -q restic-backup.timer; then
|
|
if systemctl is-active --quiet restic-backup.timer; then
|
|
check_pass "Restic backup timer is active"
|
|
NEXT_RUN=$(systemctl list-timers | grep restic-backup | awk '{print $1, $2}')
|
|
echo " Next backup: $NEXT_RUN"
|
|
else
|
|
check_fail "Restic backup timer is not active"
|
|
fi
|
|
else
|
|
check_warn "Restic backup timer not found"
|
|
fi
|
|
|
|
if command -v restic >/dev/null 2>&1; then
|
|
check_pass "Restic is installed"
|
|
else
|
|
check_warn "Restic not installed"
|
|
fi
|
|
|
|
# Summary
|
|
echo -e "\n========================================="
|
|
echo "Validation Summary"
|
|
echo "========================================="
|
|
echo -e "${GREEN}Passed: $PASSED${NC}"
|
|
echo -e "${YELLOW}Warnings: $WARNINGS${NC}"
|
|
echo -e "${RED}Failed: $FAILED${NC}"
|
|
|
|
if [[ $FAILED -eq 0 ]]; then
|
|
echo -e "\n${GREEN}✓ Deployment validation successful!${NC}"
|
|
exit 0
|
|
else
|
|
echo -e "\n${RED}✗ Some checks failed. Review above for details.${NC}"
|
|
exit 1
|
|
fi
|