Comment 11 for bug 1827009

Revision history for this message
John A Meinel (jameinel) wrote :

The log has these lines at the end:
2019-05-15 02:21:41 INFO juju.worker.httpserver worker.go:168 shutting down HTTP server
2019-05-15 02:22:15 INFO juju.agent uninstall.go:47 agent already marked ready for uninstall
2019-05-15 02:22:15 INFO juju.cmd.jujud machine.go:1224 uninstalling agent

That log line is found in:
func CanUninstall(a Agent) bool {
 if _, err := os.Stat(uninstallFile(a)); err != nil {
  logger.Debugf("agent not marked ready for uninstall")
  return false
 }

and that function is only called in
func (a *MachineAgent) uninstallAgent() error {
 // We should only uninstall if the uninstall file is present.
 if !agent.CanUninstall(a) {
  logger.Infof("ignoring uninstall request")
  return nil
 }
 logger.Infof("uninstalling agent")

which is only called by:
func (a *MachineAgent) Run(*cmd.Context) (err error) {
...
 close(a.workersStarted)
 err = a.runner.Wait()
 switch errors.Cause(err) {
 case jworker.ErrTerminateAgent:
  err = a.uninstallAgent()

However, there are 30 or so references to ErrTerminateAgent. Trying to restrict it to ones that are returning that error
agent/machine.go:693
 if err := a.setupContainerSupport(runner, apiConn, agentConfig); err != nil {
  cause := errors.Cause(err)
  if params.IsCodeDead(cause) || cause == jworker.ErrTerminateAgent {
   return nil, jworker.ErrTerminateAgent
  }
  return nil, errors.Annotate(err, "setting up container support")
 }

...
func (a *MachineAgent) setControllerNetworkConfig(apiConn api.Connection) error {
 // TODO(bootstrap): do we need this for k8s???
 machine, err := a.machine(apiConn)
 if errors.IsNotFound(err) || err == nil && machine.Life() == params.Dead {
  return jworker.ErrTerminateAgent
 }

...
func (a *MachineAgent) updateSupportedContainers(
 runner *worker.Runner,
 st api.Connection,
 containers []instance.ContainerType,
 agentConfig agent.Config,
) error {
 pr := apiprovisioner.NewState(st)
 tag := agentConfig.Tag().(names.MachineTag)
 result, err := pr.Machines(tag)
 if err != nil {
  return errors.Annotatef(err, "cannot load machine %s from state", tag)
 }
 if len(result) != 1 {
  return errors.Errorf("expected 1 result, got %d", len(result))
 }
 if errors.IsNotFound(result[0].Err) || (result[0].Err == nil && result[0].Machine.Life() == params.Dead) {
  return jworker.ErrTerminateAgent
 }
...
func openStatePool(
 agentConfig agent.Config,
 dialOpts mongo.DialOpts,
 runTransactionObserver state.RunTransactionObserverFunc,
) (_ *state.StatePool, _ *state.Machine, err error) {

...
 st := pool.SystemState()
 m0, err := st.FindEntity(agentConfig.Tag())
 if err != nil {
  if errors.IsNotFound(err) {
   err = jworker.ErrTerminateAgent
  }
  return nil, nil, err
 }
 m := m0.(*state.Machine)
 if m.Life() == state.Dead {
  return nil, nil, jworker.ErrTerminateAgent
 }
...
 if !m.CheckProvisioned(agentConfig.Nonce()) {
  // The agent is running on a different machine to the one it
  // should be according to state. It must stop immediately.
  logger.Errorf("running machine %v agent on inappropriate instance", m)
  return nil, nil, jworker.ErrTerminateAgent
 }
^- not this one because we don't see that error message

machiner.go:
func (mr *Machiner) SetUp() (watcher.NotifyWatcher, error) {
 // Find which machine we're responsible for.
 m, err := mr.config.MachineAccessor.Machine(mr.config.Tag)
 if params.IsCodeNotFoundOrCodeUnauthorized(err) {
  return nil, jworker.ErrTerminateAgent
...
func (mr *Machiner) Handle(_ <-chan struct{}) error {
 if err := mr.machine.Refresh(); params.IsCodeNotFoundOrCodeUnauthorized(err) {
  // NOTE(axw) we can distinguish between NotFound and CodeUnauthorized,
  // so we could call NotifyMachineDead here in case the agent failed to
  // call NotifyMachineDead directly after setting the machine Dead in
  // the first place. We're not doing that to be cautious: the machine
  // could be missing from state due to invalid global state.
  return jworker.ErrTerminateAgent
...
 if mr.config.NotifyMachineDead != nil {
  if err := mr.config.NotifyMachineDead(); err != nil {
   return errors.Annotate(err, "reporting machine death")
  }
 }
 return jworker.ErrTerminateAgent
...
manifolds.go:
 connectFilter := func(err error) error {
  cause := errors.Cause(err)
  if cause == apicaller.ErrConnectImpossible {
   err2 := coreagent.SetCanUninstall(config.Agent)
   if err2 != nil {
    return errors.Trace(err2)
   }
   return jworker.ErrTerminateAgent

Note there are 2 places where we call SetCanUninstall which is the only way we actually delete the code.
The above code
and this one
...
 accessor := APIMachineAccessor{apimachiner.NewState(apiCaller)}
 w, err := NewMachiner(Config{
  MachineAccessor: accessor,
  Tag: tag.(names.MachineTag),
  ClearMachineAddressesOnStart: ignoreMachineAddresses,
  NotifyMachineDead: func() error {
   return agent.SetCanUninstall(a)
  },
 })

...
func (u *Uniter) stopUnitError() error {
 logger.Debugf("u.modelType: %s", u.modelType)
 if u.modelType == model.CAAS {
  return ErrCAASUnitDead
 }
 return jworker.ErrTerminateAgent
}

...
func (w *RemoteStateWatcher) setUp(unitTag names.UnitTag) error {
 // TODO(axw) move this logic
 var err error
 defer func() {
  cause := errors.Cause(err)
  if params.IsCodeNotFoundOrCodeUnauthorized(cause) {
   // We only want to terminate the agent for IAAS models.
   if w.modelType == model.IAAS {
    err = jworker.ErrTerminateAgent
   }
  }
 }()

terminationworker/worker.go:
func (w *terminationWorker) loop(c <-chan os.Signal) (err error) {
 select {
 case <-c:
  return jworker.ErrTerminateAgent
 case <-w.tomb.Dying():
  return tomb.ErrDying
 }
}

That seems to be all the references.