Service Health Checks
Overall Cluster Health
# Check all pods are running
kubectl get pods --all-namespaces
# Verify no pods are in CrashLoopBackOff or Error state
kubectl get pods --all-namespaces --field-selector=status.phase!=Running
# Check service endpoints
kubectl get endpoints
PostgreSQL Health:
# Verify all databases are accessible
kubectl exec -it <postgresql-pod> -- psql -U gv -c '\l'
# Check for any corrupted tables
kubectl exec -it <postgresql-pod> -- psql -U gv -d <database> -c 'SELECT * FROM pg_stat_database;'
# Verify replication (if applicable)
kubectl exec -it <postgresql-pod> -- psql -U gv -c 'SELECT * FROM pg_stat_replication;'
Elasticsearch Health:
# Verify cluster health is green
curl -u elastic:<password> http://localhost:9200/_cluster/health
# Check all indices are present
curl -u elastic:<password> http://localhost:9200/_cat/indices?v
# Verify shard allocation
curl -u elastic:<password> http://localhost:9200/_cat/shards?v
Consul Health:
# Check all members are alive
kubectl exec -it <consul-pod> -- consul members
# Verify service catalog
kubectl exec -it <consul-pod> -- consul catalog services
# Check key-value store accessibility
kubectl exec -it <consul-pod> -- consul kv get -recurse | head -20