So this isn't enough by itself to avoid the failure case listed in c#0 as the call to resume_state_on_host_boot in turn calls _hard_reboot that always deletes the volume secret rendering the optimisation landed above useless.
It's pretty easy to reproduce this using the demo user account in devstack:
$ . openrc demo demo
$ openstack volume create --size 1 --type luks test
$ openstack server create --image cirros-0.5.1-x86_64-disk --flavor 1 --network private test
$ openstack server add volume test test
$ . openrc admin admin
$ openstack server reboot --hard test
$ openstack server event list f65c96c6-f63f-42b3-8e00-fff5b24daa35
+------------------------------------------+--------------------------------------+---------------+----------------------------+
| Request ID | Server ID | Action | Start Time |
+------------------------------------------+--------------------------------------+---------------+----------------------------+
| req-d22d8d5a-a090-4f03-a246-a4c4487319aa | f65c96c6-f63f-42b3-8e00-fff5b24daa35 | reboot | 2021-05-27T09:42:56.000000 |
| req-e8ab2b76-00a4-4c3c-9616-c1437acd17db | f65c96c6-f63f-42b3-8e00-fff5b24daa35 | attach_volume | 2021-05-27T09:41:52.000000 |
| req-2314c5c8-1584-4d7e-9044-78bcececb459 | f65c96c6-f63f-42b3-8e00-fff5b24daa35 | create | 2021-05-27T09:41:43.000000 |
+------------------------------------------+--------------------------------------+---------------+----------------------------+
$ openstack server event show f65c96c6-f63f-42b3-8e00-fff5b24daa35 req-d22d8d5a-a090-4f03-a246-a4c4487319aa -f json -c events | awk '{gsub("\\\\n","\n")};1'
{
"events": [
{
"event": "compute_reboot_instance",
"start_time": "2021-05-27T09:42:56.000000",
"finish_time": "2021-05-27T09:42:59.000000",
"result": "Error",
"traceback": " File \"/opt/stack/nova/nova/compute/utils.py\", line 1434, in decorated_function
return function(self, context, *args, **kwargs)
File \"/opt/stack/nova/nova/compute/manager.py\", line 211, in decorated_function
compute_utils.add_instance_fault_from_exc(context,
File \"/usr/local/lib/python3.8/site-packages/oslo_utils/excutils.py\", line 227, in __exit__
self.force_reraise()
File \"/usr/local/lib/python3.8/site-packages/oslo_utils/excutils.py\", line 200, in force_reraise
raise self.value
File \"/opt/stack/nova/nova/compute/manager.py\", line 200, in decorated_function
return function(self, context, *args, **kwargs)
File \"/opt/stack/nova/nova/compute/manager.py\", line 3709, in reboot_instance
do_reboot_instance(context, instance, block_device_info, reboot_type)
File \"/usr/local/lib/python3.8/site-packages/oslo_concurrency/lockutils.py\", line 360, in inner
return f(*args, **kwargs)
File \"/opt/stack/nova/nova/compute/manager.py\", line 3707, in do_reboot_instance
self._reboot_instance(context, instance, block_device_info,
File \"/opt/stack/nova/nova/compute/manager.py\", line 3801, in _reboot_instance
self._set_instance_obj_error_state(instance)
File \"/usr/local/lib/python3.8/site-packages/oslo_utils/excutils.py\", line 227, in __exit__
self.force_reraise()
File \"/usr/local/lib/python3.8/site-packages/oslo_utils/excutils.py\", line 200, in force_reraise
raise self.value
File \"/opt/stack/nova/nova/compute/manager.py\", line 3771, in _reboot_instance
self.driver.reboot(context, instance,
File \"/opt/stack/nova/nova/virt/libvirt/driver.py\", line 3659, in reboot
return self._hard_reboot(context, instance, network_info,
File \"/opt/stack/nova/nova/virt/libvirt/driver.py\", line 3748, in _hard_reboot
xml = self._get_guest_xml(context, instance, network_info, disk_info,
File \"/opt/stack/nova/nova/virt/libvirt/driver.py\", line 6990, in _get_guest_xml
conf = self._get_guest_config(instance, network_info, image_meta,
File \"/opt/stack/nova/nova/virt/libvirt/driver.py\", line 6612, in _get_guest_config
storage_configs = self._get_guest_storage_config(context,
File \"/opt/stack/nova/nova/virt/libvirt/driver.py\", line 5253, in _get_guest_storage_config
self._connect_volume(context, connection_info, instance)
File \"/opt/stack/nova/nova/virt/libvirt/driver.py\", line 1800, in _connect_volume
vol_driver.disconnect_volume(connection_info, instance)
File \"/usr/local/lib/python3.8/site-packages/oslo_utils/excutils.py\", line 227, in __exit__
self.force_reraise()
File \"/usr/local/lib/python3.8/site-packages/oslo_utils/excutils.py\", line 200, in force_reraise
raise self.value
File \"/opt/stack/nova/nova/virt/libvirt/driver.py\", line 1794, in _connect_volume
self._attach_encryptor(context, connection_info, encryption)
File \"/opt/stack/nova/nova/virt/libvirt/driver.py\", line 1935, in _attach_encryptor
key = keymgr.get(context, encryption['encryption_key_id'])
File \"/usr/local/lib/python3.8/site-packages/castellan/key_manager/migration.py\", line 55, in get
secret = super(MigrationKeyManager, self).get(
File \"/usr/local/lib/python3.8/site-packages/castellan/key_manager/barbican_key_manager.py\", line 588, in get
raise exception.KeyManagerError(reason=e)
"
}
]
}
I'll post a follow up change now and pause the backports for the time being.
So this isn't enough by itself to avoid the failure case listed in c#0 as the call to resume_ state_on_ host_boot in turn calls _hard_reboot that always deletes the volume secret rendering the optimisation landed above useless.
It's pretty easy to reproduce this using the demo user account in devstack:
$ . openrc admin admin provider luks --encryption-cipher aes-xts-plain64 --encryption- key-size 256 --encryption- control- location front-end LUKS
$ openstack volume type create --encryption-
$ . openrc demo demo 0.5.1-x86_ 64-disk --flavor 1 --network private test
$ openstack volume create --size 1 --type luks test
$ openstack server create --image cirros-
$ openstack server add volume test test
$ . openrc admin admin f63f-42b3- 8e00-fff5b24daa 35 ------- ------- ------- ------- ------- -+----- ------- ------- ------- ------- -----+- ------- ------- +------ ------- ------- ------- -+ ------- ------- ------- ------- ------- -+----- ------- ------- ------- ------- -----+- ------- ------- +------ ------- ------- ------- -+ a090-4f03- a246-a4c4487319 aa | f65c96c6- f63f-42b3- 8e00-fff5b24daa 35 | reboot | 2021-05- 27T09:42: 56.000000 | 00a4-4c3c- 9616-c1437acd17 db | f65c96c6- f63f-42b3- 8e00-fff5b24daa 35 | attach_volume | 2021-05- 27T09:41: 52.000000 | 1584-4d7e- 9044-78bcececb4 59 | f65c96c6- f63f-42b3- 8e00-fff5b24daa 35 | create | 2021-05- 27T09:41: 43.000000 | ------- ------- ------- ------- ------- -+----- ------- ------- ------- ------- -----+- ------- ------- +------ ------- ------- ------- -+
$ openstack server reboot --hard test
$ openstack server event list f65c96c6-
+------
| Request ID | Server ID | Action | Start Time |
+------
| req-d22d8d5a-
| req-e8ab2b76-
| req-2314c5c8-
+------
$ openstack server event show f65c96c6- f63f-42b3- 8e00-fff5b24daa 35 req-d22d8d5a- a090-4f03- a246-a4c4487319 aa -f json -c events | awk '{gsub( "\\\\n" ,"\n")} ;1' reboot_ instance" , 27T09:42: 56.000000" , finish_ time": "2021-05- 27T09:42: 59.000000" , stack/nova/ nova/compute/ utils.py\ ", line 1434, in decorated_function stack/nova/ nova/compute/ manager. py\", line 211, in decorated_function utils.add_ instance_ fault_from_ exc(context, local/lib/ python3. 8/site- packages/ oslo_utils/ excutils. py\", line 227, in __exit__ force_reraise( ) local/lib/ python3. 8/site- packages/ oslo_utils/ excutils. py\", line 200, in force_reraise stack/nova/ nova/compute/ manager. py\", line 200, in decorated_function stack/nova/ nova/compute/ manager. py\", line 3709, in reboot_instance reboot_ instance( context, instance, block_device_info, reboot_type) local/lib/ python3. 8/site- packages/ oslo_concurrenc y/lockutils. py\", line 360, in inner stack/nova/ nova/compute/ manager. py\", line 3707, in do_reboot_instance _reboot_ instance( context, instance, block_device_info, stack/nova/ nova/compute/ manager. py\", line 3801, in _reboot_instance _set_instance_ obj_error_ state(instance) local/lib/ python3. 8/site- packages/ oslo_utils/ excutils. py\", line 227, in __exit__ force_reraise( ) local/lib/ python3. 8/site- packages/ oslo_utils/ excutils. py\", line 200, in force_reraise stack/nova/ nova/compute/ manager. py\", line 3771, in _reboot_instance driver. reboot( context, instance, stack/nova/ nova/virt/ libvirt/ driver. py\", line 3659, in reboot reboot( context, instance, network_info, stack/nova/ nova/virt/ libvirt/ driver. py\", line 3748, in _hard_reboot guest_xml( context, instance, network_info, disk_info, stack/nova/ nova/virt/ libvirt/ driver. py\", line 6990, in _get_guest_xml guest_config( instance, network_info, image_meta, stack/nova/ nova/virt/ libvirt/ driver. py\", line 6612, in _get_guest_config guest_storage_ config( context, stack/nova/ nova/virt/ libvirt/ driver. py\", line 5253, in _get_guest_ storage_ config _connect_ volume( context, connection_info, instance) stack/nova/ nova/virt/ libvirt/ driver. py\", line 1800, in _connect_volume driver. disconnect_ volume( connection_ info, instance) local/lib/ python3. 8/site- packages/ oslo_utils/ excutils. py\", line 227, in __exit__ force_reraise( ) local/lib/ python3. 8/site- packages/ oslo_utils/ excutils. py\", line 200, in force_reraise stack/nova/ nova/virt/ libvirt/ driver. py\", line 1794, in _connect_volume _attach_ encryptor( context, connection_info, encryption) stack/nova/ nova/virt/ libvirt/ driver. py\", line 1935, in _attach_encryptor 'encryption_ key_id' ]) local/lib/ python3. 8/site- packages/ castellan/ key_manager/ migration. py\", line 55, in get KeyManager, self).get( local/lib/ python3. 8/site- packages/ castellan/ key_manager/ barbican_ key_manager. py\", line 588, in get KeyManagerError (reason= e)
{
"events": [
{
"event": "compute_
"start_time": "2021-05-
"
"result": "Error",
"traceback": " File \"/opt/
return function(self, context, *args, **kwargs)
File \"/opt/
compute_
File \"/usr/
self.
File \"/usr/
raise self.value
File \"/opt/
return function(self, context, *args, **kwargs)
File \"/opt/
do_
File \"/usr/
return f(*args, **kwargs)
File \"/opt/
self.
File \"/opt/
self.
File \"/usr/
self.
File \"/usr/
raise self.value
File \"/opt/
self.
File \"/opt/
return self._hard_
File \"/opt/
xml = self._get_
File \"/opt/
conf = self._get_
File \"/opt/
storage_configs = self._get_
File \"/opt/
self.
File \"/opt/
vol_
File \"/usr/
self.
File \"/usr/
raise self.value
File \"/opt/
self.
File \"/opt/
key = keymgr.get(context, encryption[
File \"/usr/
secret = super(Migration
File \"/usr/
raise exception.
"
}
]
}
I'll post a follow up change now and pause the backports for the time being.