Comment 20 for bug 619981

Revision history for this message
Michael Casadevall (mcasadevall) wrote :

Update on this bug:

After much debugging work with Jani and others on the ARM team, it looks like we're dealing with a race condition somewhere in the ARM JIT code (likely a missing mutex). Mono upstream has not been able to reproduce nor have I been able to successfully isolate the bug. I do however have a workaround patch which forces mono on ARM to run on a single processor which avoids the race condition exposed by banshee and f-spot. While obviously not true fix, this stabilizes mono on multicore ARM systems.

The current patch is as follows:

mcasadevall@daybreak:~/tmp/mono/natty-mono$ bzr diff
=== modified file 'debian/changelog'
--- debian/changelog 2011-02-19 16:59:03 +0000
+++ debian/changelog 2011-03-25 05:39:07 +0000
@@ -1,3 +1,16 @@
+mono (2.6.7-5ubuntu2) natty; urgency=high
+
+ * Backport ARM OP_MEMORY_BARRIER support and GCC workaround from
+ mono 2.10.1. Partial fix forLP: #619981
+ - Following revisions from git:
+ - 7bd422cfeee3622c4ebfe75ba450ca0d664fedbe - Implement
+ mono_memory_barrier () and OP_MEMORY_BARRIER for ARM.
+ - 9c868e2ee43178c8d05161c92489cc9191cc29c7 - Set cfg->uses_rgctx_reg in
+ another code path too on arm, to fix --regression generics.exe.
+ * Disable SMP on ARM by default (Works around LP: #619981)
+
+ -- Michael Casadevall <email address hidden> Tue, 08 Mar 2011 09:56:36 -0800
+
 mono (2.6.7-5ubuntu1) natty; urgency=low

   * Build packages for ppc64.

=== modified file 'debian/control'
--- debian/control 2011-02-19 16:59:03 +0000
+++ debian/control 2011-03-08 18:08:25 +0000
@@ -1,7 +1,8 @@
 Source: mono
 Section: cli-mono
 Priority: optional
-Maintainer: Debian Mono Group <email address hidden>
+Maintainer: Ubuntu Developers <email address hidden>
+XSBC-Original-Maintainer: Debian Mono Group <email address hidden>
 Uploaders: Mirco Bauer <email address hidden>, Sebastian Dröge <email address hidden>, Jo Shields <email address hidden>
 Build-Depends: debhelper (>= 7),
  dpkg-dev (>= 1.13.19),

=== modified file 'mono/arch/arm/arm-codegen.h'
--- mono/arch/arm/arm-codegen.h 2010-06-06 17:45:35 +0000
+++ mono/arch/arm/arm-codegen.h 2011-03-08 17:40:37 +0000
@@ -1084,6 +1084,16 @@
 #define ARM_MOVT_REG_IMM_COND(p, rd, imm16, cond) ARM_EMIT(p, (((cond) << 28) | (3 << 24) | (4 << 20) | ((((guint32)(imm16)) >> 12) << 16) | ((rd) << 12) | (((guint32)(imm16)) & 0xfff)))
 #define ARM_MOVT_REG_IMM(p, rd, imm16) ARM_MOVT_REG_IMM_COND ((p), (rd), (imm16), ARMCOND_AL)

+/* MCR */
+#define ARM_DEF_MCR_COND(coproc, opc1, rt, crn, crm, opc2, cond) \
+ ARM_DEF_COND ((cond)) | ((0xe << 24) | (((opc1) & 0x7) << 21) | (0 << 20) | (((crn) & 0xf) << 16) | (((rt) & 0xf) << 12) | (((coproc) & 0xf) << 8) | (((opc2) & 0x7) << 5) | (1 << 4) | (((crm) & 0xf) << 0))
+
+#define ARM_MCR_COND(p, coproc, opc1, rt, crn, crm, opc2, cond) \
+ ARM_EMIT(p, ARM_DEF_MCR_COND ((coproc), (opc1), (rt), (crn), (crm), (opc2), (cond)))
+
+#define ARM_MCR(p, coproc, opc1, rt, crn, crm, opc2) \
+ ARM_MCR_COND ((p), (coproc), (opc1), (rt), (crn), (crm), (opc2), ARMCOND_AL)
+
 #ifdef __cplusplus
 }
 #endif

=== modified file 'mono/mini/cpu-arm.md'
--- mono/mini/cpu-arm.md 2010-06-06 17:45:35 +0000
+++ mono/mini/cpu-arm.md 2011-03-08 17:40:37 +0000
@@ -46,7 +46,7 @@
 #
 # See the code in mini-x86.c for more details on how the specifiers are used.
 #
-memory_barrier: len:4
+memory_barrier: len:8 clob:a
 nop: len:4
 relaxed_nop: len:4
 break: len:4

=== modified file 'mono/mini/driver.c'
--- mono/mini/driver.c 2010-06-06 17:45:35 +0000
+++ mono/mini/driver.c 2011-03-25 05:33:36 +0000
@@ -1291,8 +1291,20 @@
  setlocale (LC_ALL, "");

 #if HAVE_SCHED_SETAFFINITY
+
+/**
+ * FIXME: The Mono JIT (mini) is non-SMP safe on ARM currently.
+ * Force us to be non-SMP unless a we have MONO_FORCE_SMP
+ * environmental variable set (to allow us to continue to
+ * debugging efforts
+ **/
+#if defined(__ARM_EABI__)
+ if (!getenv ("MONO_FORCE_SMP")) {
+#else
  if (getenv ("MONO_NO_SMP")) {
+#endif // __ARM_EABI__
   unsigned long proc_mask = 1;
+
 #ifdef GLIBC_BEFORE_2_3_4_SCHED_SETAFFINITY
   sched_setaffinity (getpid(), (gpointer)&proc_mask);
 #else

=== modified file 'mono/mini/mini-arm.c'
--- mono/mini/mini-arm.c 2010-09-14 19:58:16 +0000
+++ mono/mini/mini-arm.c 2011-03-08 17:40:54 +0000
@@ -1055,6 +1055,10 @@

  header = mono_method_get_header (cfg->method);

+ /* See mono_arch_get_global_int_regs () */
+ if (cfg->flags & MONO_CFG_HAS_CALLS)
+ cfg->uses_rgctx_reg = TRUE;
+
  /*
   * We use the frame register also for any method that has
   * exception clauses. This way, when the handlers are called,
@@ -2988,6 +2992,8 @@

   switch (ins->opcode) {
   case OP_MEMORY_BARRIER:
+ ARM_MOV_REG_IMM8 (code, ARMREG_R0, 0);
+ ARM_MCR (code, 15, 0, ARMREG_R0, 7, 10, 5);
    break;
   case OP_TLS_GET:
 #ifdef HAVE_AEABI_READ_TP

=== modified file 'mono/utils/mono-membar.h'
--- mono/utils/mono-membar.h 2009-06-14 16:52:28 +0000
+++ mono/utils/mono-membar.h 2011-03-08 17:40:54 +0000
@@ -129,7 +129,7 @@
 #elif defined(__arm__)
 static inline void mono_memory_barrier (void)
 {
- __asm__ __volatile__ ("" : : : "memory");
+ __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
 }

My PPA currently has a new revision of mono with this patch in place, and I'm waiting for independent verification that it properly fixes banshee/f-spot before uploading.