# Health Check (HC) ## Prereqs * Comando jq * Comando oc * Comando redhat-support-tool * Nro de caso generado por el TAM ## Crear Directorio donde se alojaran los HC ``` $ mkdir hc-ocp ; cd hc-ocp ``` ## Crear scripts ### Script que toma info de OCP y los nodos ``` $ vim hc-nodes.sh #!/bin/bash oc whoami if [ $? -ne 0 ]; then echo "ERROR: Valide que se encuentre conectado a OCP" exit 2 fi oc adm release info |head -20 ; echo -e "-------\n\n" oc get secrets kubeadmin -n kube-system ; echo -e "-------\n\n" echo "identityProviders" oc get oauth/cluster -o json | jq '. |.spec.identityProviders' ; echo -e "-------\n\n" echo "Node List" oc get nodes ; echo -e "-------\n\n" echo "Number of pods per node. < 250 (for OpenShift 4.2) and < 500 (for OpenShift 4.3+):" oc get nodes -o json | jq '.items[] | .status.allocatable.pods' ; echo -e "-------\n\n" echo "Number of namespaces. < 10,000" oc get namespaces | grep -v NAME | wc -l ; echo -e "-------\n\n" echo "Number of pods per namespace. < 25,000" oc get pods -A | awk '{ print $1 }' | uniq -c | sort -rn | head -10 ; echo -e "-------\n\n" echo "Number of services. < 10,000" oc get services -A | grep -v NAME | wc -l ; echo -e "-------\n\n" echo "Number of pods. < 150,000" oc get pods -A | grep -v NAME | wc -l ; echo -e "-------\n\n" echo "Number of deployments per namespace. < 2,000" oc get deployment -A | awk '{ print $1 }' | uniq -c | sort -rn | head -10 ; echo -e "-------\n\n" oc adm top nodes ; echo -e "-------\n\n" oc get csr ; echo -e "-------\n\n" oc get crd ; echo -e "-------\n\n" oc get clusteroperators ; echo -e "-------\n\n" oc get mcp ; echo -e "-------\n\n" for project in $(oc get projects |awk '{print $1}'|grep -v NAME); do if [[ $(oc get subs -n $project) =~ "NAME" ]]; then echo -e "***************************************************************"; echo -e "Namespace: " $project ""; oc get subs -n $project; echo -e "***************************************************************"; fi; done ; echo -e "-------\n\n" oc get pods -o wide -n openshift-dns ; echo -e "-------\n\n" for i in `oc get nodes | awk '!/NAME/ {print $1}'`; do echo ${i} oc describe node ${i} |grep OS|tr -d '[[:space:]]' oc describe node ${i} |grep Capacity -A 13|egrep 'Capacity|Allocatable|cpu' oc describe node ${i} |grep Capacity -A 13|egrep 'Capacity|Allocatable|memory' oc describe node ${i} |grep Capacity -A 13|egrep 'Capacity|Allocatable|storage' oc get node ${i} -o json | jq '. | .status.conditions'|grep reason oc get node ${i} -o json | jq '. |.status.nodeInfo' |grep kubeProxyVersion oc get node ${i} -o json | jq '. |.status.nodeInfo' |grep kernelVersion oc get node ${i} -o json | jq '. |.status.nodeInfo' |grep containerRuntimeVersion oc get node ${i} -o json | jq '. |.status.nodeInfo' |grep kubeletVersion oc debug -q node/${i} -- chroot /host systemctl show kubelet --property=ActiveState oc debug -q node/${i} -- chroot /host systemctl show NetworkManager --property=SubState oc debug -q node/${i} -- chroot /host systemctl show NetworkManager --property=ActiveState oc debug -q node/${i} -- chroot /host cat /etc/selinux/config|grep -v '#' |grep SELINUX= oc debug -q node/${i} -- chroot /host systemctl show kubelet --property=SubState oc debug -q node/${i} -- chroot /host systemctl show crio --property=ActiveState oc debug -q node/${i} -- chroot /host systemctl show crio --property=SubState oc debug -q node/${i} -- chroot /host sudo /bin/top -b -n 5 -d 5 | grep Tasks -A20 oc debug -q node/${i} -- chroot /host timedatectl status oc debug -q node/${i} -- chroot /host chronyc tracking oc debug -q node/${i} -- chroot /host cat /proc/sys/kernel/random/entropy_avail oc debug -q node/${i} -- chroot /host df -hT /var #oc debug -q node/${i} -- chroot /host; chronyc tracking echo -e "-------\n\n" done echo "Check Registry Url\n" for j in registry.redhat.io quay.io cdn.quay.io sso.redhat.com mirror.openshift.com api.openshift.com; do oc debug -q node/`oc get nodes --selector=node-role.kubernetes.io/master | awk '!/NAME/ {print $1}' | head -n1` -- echo -n ${j}' - '; curl ${j} -ksI | head -1 done ; echo -e "-------\n\n" echo "Check Telemetry Url\n" for j in cert-api.access.redhat.com api.access.redhat.com infogw.api.openshift.com cloud.redhat.com; do oc debug -q node/`oc get nodes --selector=node-role.kubernetes.io/master | awk '!/NAME/ {print $1}' | head -n1` -- echo -n ${j}' - '; curl ${j} -ksI | head -1 done ; echo -e "-------\n\n" ``` ### Script que toma info de los pods en falla ``` $ vim hc-pods.sh #!/bin/bash oc whoami if [ $? -ne 0 ]; then echo "ERROR: Valide que se encuentre conectado a OCP" exit 2 fi for project in $(oc get project | grep -v openshift | grep -v kube | grep -v default | grep -v NAME | awk '{print $1}'); do echo "***************************************************************"; echo "Project: "$project; oc get pods -n $project; podcount=0;poderror=0; for pod in $(oc get pods -n $project | grep -v NAME | awk '{print $1}'); do let podcount=podcount+1; estado=$(oc get pod/$pod -n $project|grep -v NAME |awk '{print $3}'); if [[ $estado != "Running" && $estado != "Completed" ]]; then echo -e "\n"; echo $pod $estado; let poderror=poderror+1; oc describe pod/$pod -n $project; fi; done; echo -e "\n"; echo "Pods Count: "$podcount; echo "Pods with errors:"$poderror; echo -e "***************************************************************\n\n\n"; done ``` ### Script que toma info de los pods de namespaces de infraestructura en falla ``` $ vim hc-infra-pods.sh #!/bin/bash oc whoami if [ $? -ne 0 ]; then echo "ERROR: Valide que se encuentre conectado a OCP" exit 2 fi for project in $(oc get project | egrep 'openshift|kube' | awk '{print $1}'); do echo "***************************************************************"; echo "Project: "$project; oc get pods -n $project; podcount=0;poderror=0; for pod in $(oc get pods -n $project | grep -v NAME | awk '{print $1}'); do let podcount=podcount+1; estado=$(oc get pod/$pod -n $project|grep -v NAME |awk '{print $3}'); if [[ $estado != "Running" && $estado != "Completed" ]]; then echo -e "\n"; echo $pod $estado; let poderror=poderror+1; oc describe pod/$pod -n $project; fi; done; echo -e "\n"; echo "Pods Count: "$podcount; echo "Pods with errors:"$poderror; echo -e "***************************************************************\n\n\n"; done ``` ### Setearle los permisos de ejecución a los scripts ``` $ chmod +x hc-infra-pods.sh hc-nodes.sh hc-pods.sh ``` ## Toma de datos ``` $ mkdir HC-`date -I` $ ./hc-nodes.sh | tee HC-`date -I`/hc-nodes.log $ ./hc-infra-pods.sh | tee HC-`date -I`/hc-infra-pods.log $ ./hc-pods.sh | tee HC-`date -I`/hc-pods.log $ oc adm must-gather --dest-dir=./HC-`date -I` $ oc adm must-gather --dest-dir=./HC-`date -I` --image=$(oc -n openshift-logging get deployment.apps/cluster-logging-operator -o jsonpath='{.spec.template.spec.containers[?(@.name == "cluster-logging-operator")].image}') ``` ### En caso de tener OCS/ODF, colectar el must-gather de OCS ``` $ oc adm must-gather --image=registry.redhat.io/ocs4/ocs-must-gather-rhel8 --dest-dir=./HC-`date -I` ``` ### Comprimir todo ``` $ tar cfJ HealthCheck-`date -I`.txz ./HC-`date -I` ``` ### Adjuntar el archivo HealthCheck al caso de soporte ``` $ redhat-support-tool addattachment -c NRO_Caso ./HealthCheck-`date -I`.txz ```
×
Sign in
Email
Password
Forgot password
or
By clicking below, you agree to our
terms of service
.
Sign in via Facebook
Sign in via Twitter
Sign in via GitHub
Sign in via Dropbox
Sign in with Wallet
Wallet (
)
Connect another wallet
New to HackMD?
Sign up