# this is where the call happens that causes the crash - the crash is coming from ceph though, not the fault of this
# NOTE: running `sudo ceph nfs cluster ls` prints:
# Error ENOENT: No orchestrator configured (try `ceph orch set backend`)
# but does not show a traceback.
# This may be limited to the python api? mgr.remote('nfs', 'cluster_ls')
When the orchestrator is not present, we see this traceback:
```
{
"archived": "2023-11-20 04:58:57.151697",
"backtrace": [
" File \"/usr/share/ceph/mgr/nfs/module.py\", line 169, in cluster_ls\n return available_clusters(self)",
" File \"/usr/share/ceph/mgr/nfs/utils.py\", line 38, in available_clusters\n completion = mgr.describe_service(service_type='nfs')",
" File \"/usr/share/ceph/mgr/orchestrator/_interface.py\", line 1488, in inner\n completion = self._oremote(method_name, args, kwargs)",
" File \"/usr/share/ceph/mgr/orchestrator/_interface.py\", line 1555, in _oremote\n raise NoOrchestrator()", "orchestrator._interface.NoOrchestrator: No orchestrator configured (try `ceph orch set backend`)"
],
"ceph_version": "17.2.6",
"crash_id": "2023-11-20T04:47:16.737623Z_8a944527-1cc1-4ed5-b58b-86bf97bcf3b1",
"entity_name": "mgr.juju-108031-1-lxd-1",
"mgr_module": "nfs",
"mgr_module_caller": "ActivePyModule::dispatch_remote cluster_ls",
"mgr_python_exception": "NoOrchestrator",
"os_id": "22.04",
"os_name": "Ubuntu 22.04.3 LTS",
"os_version": "22.04.3 LTS (Jammy Jellyfish)",
"os_version_id": "22.04",
"process_name": "ceph-mgr",
"stack_sig": "b01db59d356dd52f69bfb0b128a216e7606f54a60674c3c82711c23cf64832ce",
"timestamp": "2023-11-20T04:47:16.737623Z",
"utsname_hostname": "juju-108031-1-lxd-1",
"utsname_machine": "x86_64",
"utsname_release": "5.15.0-88-generic",
"utsname_sysname": "Linux",
"utsname_version": "#98-Ubuntu SMP Mon Oct 2 15:18:56 UTC 2023"
}
```
I guess this is the part that maps directly to the `cluster_ls` method:
```
"mgr_module_caller": "ActivePyModule::dispatch_remote cluster_ls",
```
This is `cluster_ls`, in `src/pybind/mgr/nfs/module.py`.
```
# this raises an error, causing a module crash, if orchestrator is not available
def cluster_ls(self) -> List[str]:
return available_clusters(self)
```
^ This is the root of the traceback we're seeing.
I guess the reason we're seeing a crash, is because this method doesn't catch any errors thrown from `available_clusters`.
For reference, other methods I've checked here will handle the error.
For example:
(in `src/pybind/mgr/nfs/cluster.py`, called from `ceph nfs cluster ls` handler in `_cmd_nfs_cluster_ls()` in `src/pybind/mgr/nfs/module.py`)
```
def list_nfs_cluster(self) -> List[str]:
try:
return available_clusters(self.mgr)
except Exception as e: log.exception("Failed to list NFS Cluster")
raise ErrorResponse.wrap(e)
```
I tried the same pattern of catching the error, and raising `ErrorResponse` within `cluster_ls`,
but that still resulted in a crash:
```
{
"backtrace": [
" File \"/usr/share/ceph/mgr/nfs/module.py\", line 173, in cluster_ls\n return available_clusters(self)",
" File \"/usr/share/ceph/mgr/nfs/utils.py\", line 38, in available_clusters\n completion = mgr.describe_service(service_type='nfs')",
" File \"/usr/share/ceph/mgr/orchestrator/_interface.py\", line 1488, in inner\n completion = self._oremote(method_name, args, kwargs)",
" File \"/usr/share/ceph/mgr/orchestrator/_interface.py\", line 1555, in _oremote\n raise NoOrchestrator()", "orchestrator._interface.NoOrchestrator: No orchestrator configured (try `ceph orch set backend`)",
"\nThe above exception was the direct cause of the following exception:\n",
"Traceback (most recent call last):",
" File \"/usr/share/ceph/mgr/nfs/module.py\", line 175, in cluster_ls\n raise ErrorResponse.wrap(e)", "object_format.ErrorResponse: No orchestrator configured (try `ceph orch set backend`)"
],
"ceph_version": "17.2.6",
"crash_id": "2023-11-20T04:59:04.018086Z_2a16b6a4-85e5-49ee-93f0-c1b552f1df06",
"entity_name": "mgr.juju-108031-1-lxd-1",
"mgr_module": "nfs",
"mgr_module_caller": "ActivePyModule::dispatch_remote cluster_ls",
"mgr_python_exception": "ErrorResponse",
"os_id": "22.04",
"os_name": "Ubuntu 22.04.3 LTS",
"os_version": "22.04.3 LTS (Jammy Jellyfish)",
"os_version_id": "22.04",
"process_name": "ceph-mgr",
"stack_sig": "6a64a2a392fc0ad969c705c51ccec3206fab079f3c53ef566d1ed1d6f5088851",
"timestamp": "2023-11-20T04:59:04.018086Z",
"utsname_hostname": "juju-108031-1-lxd-1",
"utsname_machine": "x86_64",
"utsname_release": "5.15.0-88-generic",
"utsname_sysname": "Linux",
"utsname_version": "#98-Ubuntu SMP Mon Oct 2 15:18:56 UTC 2023"
}
```
I'm not sure what kind of pattern is required here for this kind of remote module method call where it's not a cli command.
We still need to convey an error response to the remote called (eg. ceph-dashboard in this case),
but without "crashing".
Definitely an upstream issue, not related to the ceph-dashboard charm.
Exploring the ceph repository:
`src/pybind/ mgr/dashboard/ controllers/ nfs.py`
```
@Endpoint()
@ReadPermission
def status(self):
status = {'available': True, 'message': None}
try:
# this is where the call happens that causes the crash - the crash is coming from ceph though, not the fault of this
mgr. remote( 'nfs', 'cluster_ls')
# NOTE: running `sudo ceph nfs cluster ls` prints:
# Error ENOENT: No orchestrator configured (try `ceph orch set backend`)
# but does not show a traceback.
# This may be limited to the python api?
except (ImportError, RuntimeError) as error:
logger. exception( error)
status[ 'available' ] = False
status[ 'message' ] = str(error) # type: ignore
return status
```
When the orchestrator is not present, we see this traceback:
``` share/ceph/ mgr/nfs/ module. py\", line 169, in cluster_ls\n return available_ clusters( self)", share/ceph/ mgr/nfs/ utils.py\ ", line 38, in available_ clusters\ n completion = mgr.describe_ service( service_ type='nfs' )", share/ceph/ mgr/orchestrato r/_interface. py\", line 1488, in inner\n completion = self._oremote( method_ name, args, kwargs)", share/ceph/ mgr/orchestrato r/_interface. py\", line 1555, in _oremote\n raise NoOrchestrator()",
"orchestrator. _interface. NoOrchestrator: No orchestrator configured (try `ceph orch set backend`)" 20T04:47: 16.737623Z_ 8a944527- 1cc1-4ed5- b58b-86bf97bcf3 b1", 108031- 1-lxd-1" , module_ caller" : "ActivePyModule ::dispatch_ remote cluster_ls", python_ exception" : "NoOrchestrator", version_ id": "22.04", 2f69bfb0b128a21 6e7606f54a60674 c3c82711c23cf64 832ce", 20T04:47: 16.737623Z" , hostname" : "juju-108031- 1-lxd-1" , machine" : "x86_64", release" : "5.15.0- 88-generic" , sysname" : "Linux", version" : "#98-Ubuntu SMP Mon Oct 2 15:18:56 UTC 2023"
{
"archived": "2023-11-20 04:58:57.151697",
"backtrace": [
" File \"/usr/
" File \"/usr/
" File \"/usr/
" File \"/usr/
],
"ceph_version": "17.2.6",
"crash_id": "2023-11-
"entity_name": "mgr.juju-
"mgr_module": "nfs",
"mgr_
"mgr_
"os_id": "22.04",
"os_name": "Ubuntu 22.04.3 LTS",
"os_version": "22.04.3 LTS (Jammy Jellyfish)",
"os_
"process_name": "ceph-mgr",
"stack_sig": "b01db59d356dd5
"timestamp": "2023-11-
"utsname_
"utsname_
"utsname_
"utsname_
"utsname_
}
```
I guess this is the part that maps directly to the `cluster_ls` method: caller" : "ActivePyModule ::dispatch_ remote cluster_ls",
```
"mgr_module_
```
This is `cluster_ls`, in `src/pybind/ mgr/nfs/ module. py`.
``` clusters( self)
# this raises an error, causing a module crash, if orchestrator is not available
def cluster_ls(self) -> List[str]:
return available_
```
^ This is the root of the traceback we're seeing.
I guess the reason we're seeing a crash, is because this method doesn't catch any errors thrown from `available_ clusters` .
For reference, other methods I've checked here will handle the error.
For example:
(in `src/pybind/ mgr/nfs/ cluster. py`, called from `ceph nfs cluster ls` handler in `_cmd_nfs_ cluster_ ls()` in `src/pybind/ mgr/nfs/ module. py`)
``` cluster( self) -> List[str]: clusters( self.mgr)
log. exception( "Failed to list NFS Cluster") wrap(e)
def list_nfs_
try:
return available_
except Exception as e:
raise ErrorResponse.
```
I tried the same pattern of catching the error, and raising `ErrorResponse` within `cluster_ls`,
but that still resulted in a crash:
``` share/ceph/ mgr/nfs/ module. py\", line 173, in cluster_ls\n return available_ clusters( self)", share/ceph/ mgr/nfs/ utils.py\ ", line 38, in available_ clusters\ n completion = mgr.describe_ service( service_ type='nfs' )", share/ceph/ mgr/orchestrato r/_interface. py\", line 1488, in inner\n completion = self._oremote( method_ name, args, kwargs)", share/ceph/ mgr/orchestrato r/_interface. py\", line 1555, in _oremote\n raise NoOrchestrator()",
"orchestrator. _interface. NoOrchestrator: No orchestrator configured (try `ceph orch set backend`)", share/ceph/ mgr/nfs/ module. py\", line 175, in cluster_ls\n raise ErrorResponse. wrap(e) ",
"object_ format. ErrorResponse: No orchestrator configured (try `ceph orch set backend`)" 20T04:59: 04.018086Z_ 2a16b6a4- 85e5-49ee- 93f0-c1b552f1df 06", 108031- 1-lxd-1" , module_ caller" : "ActivePyModule ::dispatch_ remote cluster_ls", python_ exception" : "ErrorResponse", version_ id": "22.04", d969c705c51ccec 3206fab079f3c53 ef566d1ed1d6f50 88851", 20T04:59: 04.018086Z" , hostname" : "juju-108031- 1-lxd-1" , machine" : "x86_64", release" : "5.15.0- 88-generic" , sysname" : "Linux", version" : "#98-Ubuntu SMP Mon Oct 2 15:18:56 UTC 2023"
{
"backtrace": [
" File \"/usr/
" File \"/usr/
" File \"/usr/
" File \"/usr/
"\nThe above exception was the direct cause of the following exception:\n",
"Traceback (most recent call last):",
" File \"/usr/
],
"ceph_version": "17.2.6",
"crash_id": "2023-11-
"entity_name": "mgr.juju-
"mgr_module": "nfs",
"mgr_
"mgr_
"os_id": "22.04",
"os_name": "Ubuntu 22.04.3 LTS",
"os_version": "22.04.3 LTS (Jammy Jellyfish)",
"os_
"process_name": "ceph-mgr",
"stack_sig": "6a64a2a392fc0a
"timestamp": "2023-11-
"utsname_
"utsname_
"utsname_
"utsname_
"utsname_
}
```
I'm not sure what kind of pattern is required here for this kind of remote module method call where it's not a cli command.
We still need to convey an error response to the remote called (eg. ceph-dashboard in this case),
but without "crashing".