Skip to content

Daily Tasks

Common commands and procedures for day-to-day cluster operations.

Health Checks

Cluster Status

# Check cluster quorum
pvecm status

# Check Ceph health
ceph -s

# Check HA status
ha-manager status

# List all VMs/CTs with status
infra list

Service Health

# Check hub2 services (all core services are here now)
ssh hub2 docker ps

# Quick service health checks
curl -s https://domains.charliehub.net/health      # Domain Manager
curl -s https://prometheus.charliehub.net/-/healthy  # Prometheus
curl -s https://grafana.charliehub.net/api/health    # Grafana

Storage Status

# Check Ceph status
ceph -s
ceph osd tree
ceph df

# Check storage pools
pvesm status

# Check disk space
df -h

Common Operations

Start/Stop VMs

# Start a VM
qm start <vmid>

# Stop a VM (graceful)
qm shutdown <vmid>

# Force stop
qm stop <vmid>

# Check VM status
qm status <vmid>

Start/Stop Containers

# Start a container
pct start <vmid>

# Stop a container
pct shutdown <vmid>

# Execute command in container
pct exec <vmid> -- <command>

Check Logs

# Proxmox system logs
journalctl -f

# Container logs
pct exec <vmid> -- journalctl -f

# Docker logs (VM1111)
ssh root@REDACTED_IP docker logs <container_name>

# Ceph logs
journalctl -u ceph-osd@* -f

Infrastructure CLI

# List all infrastructure
infra list

# Get details for specific VMID
infra get 1112

# Check for conflicts
infra conflicts

# Get next available VMID
infra next-vmid px1 prod

# Get next available IP
infra next-ip uk

Backup Verification

Ceph Snapshots

# List snapshots for critical VMs
rbd snap ls ceph-pool/vm-1111-disk-0
rbd snap ls ceph-pool/vm-1112-disk-0
rbd snap ls ceph-pool/vm-1113-disk-0

# Check snapshot logs
tail -20 /var/log/ceph-snapshots.log

Vzdump Backups

# List recent backups
ls -la /mnt/backup-storage/dump/*.vma.zst | tail -10

# Check backup logs
cat /var/log/vzdump/*.log | tail -30

DR Exports

# Check DR export logs
tail -20 /var/log/ceph-dr-export.log

# Check exports on px5
ssh root@REDACTED_IP "ls -lh /mnt/nvme-vmdata/dr-images/"

Network Troubleshooting

# Check if service is reachable
curl -s http://REDACTED_IP:8000/health

# Check Traefik routing
ssh root@REDACTED_IP docker logs traefik_prod --tail 20

# Check DNS resolution
dig charliehub.net

# Check WireGuard VPN (from hub2)
ssh hub2 sudo wg show

Ceph Operations

# Create manual snapshot
rbd snap create ceph-pool/vm-1111-disk-0@manual-$(date +%Y%m%d-%H%M)

# List all images
rbd ls ceph-pool

# Check OSD performance
ceph osd perf

Useful Aliases

Add to /root/.bashrc:

alias dps='docker ps'
alias dlog='docker logs -f'
alias pct-list='pct list'
alias qm-list='qm list'
alias ceph-health='ceph -s'
alias ha-status='ha-manager status'