In case there are multiple armada pods: one stuck in Terminating, one Ready. Doesn't work selecting the good pod[reference 1] instead of `pod = pods[0]` because at a later time helmv2-cli is called: helmv2-cli helmv2-cli[3839105] ERROR Could not find tiller listen port. Kubernetes python client doesn't have support for pod delete --force flag. A call to kubectl --force must be done: kubectl delete pods -n armada armada-pod-hash47 --force [1] diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py b/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py index 8f5f5cb0..03a07041 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py @@ -3027,6 +3027,24 @@ class ArmadaHelper(object): return False return True + def _check_pod_ready_probe(self, pod): + """ Pod is of the form returned by self._kube.kube_get_pods_by_selector + Returns true if last probe shows the container is in `Ready` state + """ + conditions = list(filter(lambda x: x.type == 'Ready', pod.status.conditions)) + return conditions[0].status == 'True' + + def _prefer_select_one_running_ready_pod(self, pods): + """ Find one running and ready pod. + Return found if one, otherwise first pod. + """ + for pod in pods: + if pod.status.phase == 'Running' and \ + self._check_pod_ready_probe(pod): + return pod + + return pods[0] + def _start_armada_service(self): """Armada pod is managed by Kubernetes / Helm. This routine checks and waits for armada to be providing service. @@ -3058,7 +3076,7 @@ class ArmadaHelper(object): "application=%s" % ARMADA_APPLICATION, "") if not pods: raise RuntimeError('armada pod not found') - pod = pods[0] + pod = self._prefer_select_one_running_ready_pod(pods) if pod and pod.status.phase != 'Running': # Delete the pod, it should restart if it can @@ -3067,7 +3085,8 @@ class ArmadaHelper(object): LOG.warning("Pod %s/%s deletion unsuccessful...", ARMADA_NAMESPACE, pod.metadata.name) - if pod and pod.status.phase == 'Running': + if pod and pod.status.phase == 'Running' and \ + self._check_pod_ready_probe(pod): # Test that we can copy files into armada-api container src = '/etc/build.info' dest_dir = '{}:{}'.format(pod.metadata.name, '/tmp') @@ -3081,6 +3100,16 @@ class ArmadaHelper(object): else: return True return True + # + # elif pod and pod.status.phase == 'Running': + # LOG.warning("Pod %s/%s running but not ready", + # ARMADA_NAMESPACE, pod.metadata.name) + # + # # Delete the pod, it should restart if it can + # if not self._kube.kube_delete_pod(pod.metadata.name, + # ARMADA_NAMESPACE, grace_periods_seconds=0, force=None): + # LOG.warning("Pod %s/%s deletion unsuccessful...", + # ARMADA_NAMESPACE, pod.metadata.name) except Exception as e: LOG.info("Could not get Armada service : %s " % e) @@ -3151,7 +3180,7 @@ class ArmadaHelper(object): "status.phase=Running") if not pods: raise RuntimeError('armada pod not found') - armada_pod = pods[0].metadata.name + armada_pod = self._prefer_select_one_running_ready_pod(pods).metadata.name if not self.copy_manifests_and_overrides_to_armada(armada_pod, manifest_file): raise RuntimeError('could not access armada pod')