Service Health Checks

Overall Cluster Health

# Check all pods are running
kubectl get pods --all-namespaces

# Verify no pods are in CrashLoopBackOff or Error state
kubectl get pods --all-namespaces --field-selector=status.phase!=Running

# Check service endpoints
kubectl get endpoints

PostgreSQL Health:

# Verify all databases are accessible
kubectl exec -it <postgresql-pod> -- psql -U gv -c '\l'

# Check for any corrupted tables
kubectl exec -it <postgresql-pod> -- psql -U gv -d <database> -c 'SELECT * FROM pg_stat_database;'

# Verify replication (if applicable)
kubectl exec -it <postgresql-pod> -- psql -U gv -c 'SELECT * FROM pg_stat_replication;'

Elasticsearch Health:

# Verify cluster health is green
curl -u elastic:<password> http://localhost:9200/_cluster/health

# Check all indices are present
curl -u elastic:<password> http://localhost:9200/_cat/indices?v

# Verify shard allocation
curl -u elastic:<password> http://localhost:9200/_cat/shards?v

Consul Health:

# Check all members are alive
kubectl exec -it <consul-pod> -- consul members

# Verify service catalog
kubectl exec -it <consul-pod> -- consul catalog services

# Check key-value store accessibility
kubectl exec -it <consul-pod> -- consul kv get -recurse | head -20