From b1957eb7c96002ef7d1c180a78ff5418689cfdea Mon Sep 17 00:00:00 2001
From: Brian Smith <brian@dbsoft.org>
Date: Thu, 5 May 2022 15:53:25 -0500
Subject: Issue #1891 - Fix ASAN and clang crashes on Linux, BSD and MacOS.
 Merge with the existing Solaris fix, by folding Solaris into the Linux/BSD
 section. Add Apple Silicon (ARM64) support. This should also address Issue
 #1884 by moving the offending code into assembly.

---
 xpcom/reflect/xptcall/md/unix/moz.build            |  23 +---
 .../reflect/xptcall/md/unix/xptcinvoke_darwin.cpp  |   2 +
 .../xptcall/md/unix/xptcinvoke_x86_64_solaris.cpp  |  75 ------------
 .../xptcall/md/unix/xptcinvoke_x86_64_unix.cpp     | 129 ++-------------------
 4 files changed, 16 insertions(+), 213 deletions(-)
 delete mode 100644 xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_solaris.cpp

diff --git a/xpcom/reflect/xptcall/md/unix/moz.build b/xpcom/reflect/xptcall/md/unix/moz.build
index 776ff13e4f..afab3acd2b 100644
--- a/xpcom/reflect/xptcall/md/unix/moz.build
+++ b/xpcom/reflect/xptcall/md/unix/moz.build
@@ -13,6 +13,10 @@ if CONFIG['OS_ARCH'] == 'Darwin':
             '!xptcstubs_asm_ppc_darwin.s',
             'xptcinvoke_asm_ppc_rhapsody.s',
         ]
+    if CONFIG['OS_TEST'] == 'x86_64':
+        SOURCES += [
+            'xptcinvoke_asm_x86_64_unix.S',
+        ]
     if '86' in CONFIG['OS_TEST'] and CONFIG['OS_TEST'] != 'x86_64':
         DEFINES['MOZ_NEED_LEADING_UNDERSCORE'] = True
 
@@ -23,10 +27,11 @@ if CONFIG['OS_ARCH'] == 'GNU':
             'xptcstubs_gcc_x86_unix.cpp'
         ]
 
-if CONFIG['OS_ARCH'] in ('Linux', 'Bitrig', 'DragonFly', 'FreeBSD', 'NetBSD', 'OpenBSD') or \
+if CONFIG['OS_ARCH'] in ('Linux', 'Bitrig', 'DragonFly', 'FreeBSD', 'NetBSD', 'OpenBSD', 'SunOS') or \
    CONFIG['OS_ARCH'].startswith('GNU_'):
     if CONFIG['OS_TEST'] == 'x86_64':
         SOURCES += [
+            'xptcinvoke_asm_x86_64_unix.S',
             'xptcinvoke_x86_64_unix.cpp',
             'xptcstubs_x86_64_linux.cpp',
         ]
@@ -45,22 +50,6 @@ if CONFIG['OS_ARCH'] in ('Linux', 'FreeBSD'):
             'xptcstubs_ipf64.cpp'
         ]
 
-# Without the need to accomodate the Sun compiler, Solaris/GCC support is pretty
-# standard, actually. Not all that different from Linux.
-
-if CONFIG['OS_ARCH'] == 'SunOS':
-    if CONFIG['OS_TEST'] == 'x86_64':
-            SOURCES += [
-                'xptcinvoke_asm_x86_64_unix.S',
-                'xptcinvoke_x86_64_solaris.cpp',
-                'xptcstubs_x86_64_linux.cpp'
-            ]
-    elif '86' in CONFIG['OS_TEST']:
-            SOURCES += [
-                'xptcinvoke_gcc_x86_unix.cpp',
-                'xptcstubs_gcc_x86_unix.cpp'
-            ]
-
 if CONFIG['OS_TEST'] == 'alpha':
     if CONFIG['OS_ARCH'] in ('Linux', 'FreeBSD', 'NetBSD'):
         SOURCES += [
diff --git a/xpcom/reflect/xptcall/md/unix/xptcinvoke_darwin.cpp b/xpcom/reflect/xptcall/md/unix/xptcinvoke_darwin.cpp
index 04407db396..e6b7932e8d 100644
--- a/xpcom/reflect/xptcall/md/unix/xptcinvoke_darwin.cpp
+++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_darwin.cpp
@@ -11,6 +11,8 @@
 #include "xptcinvoke_ppc_rhapsody.cpp"
 #elif defined(__arm__)
 #include "xptcinvoke_arm.cpp"
+#elif defined(__aarch64__)
+#include "xptcinvoke_aarch64.cpp"
 #else
 #error unknown cpu architecture
 #endif
diff --git a/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_solaris.cpp b/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_solaris.cpp
deleted file mode 100644
index a9db2a693a..0000000000
--- a/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_solaris.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-// Platform specific code to invoke XPCOM methods on native objects
-
-#include "xptcprivate.h"
-
-// 6 integral parameters are passed in registers, but 1 is |this| which isn't
-// considered here.
-const uint32_t GPR_COUNT = 5;
-
-// 8 floating point parameters are passed in SSE registers
-const uint32_t FPR_COUNT = 8;
-
-extern "C" void
-InvokeCopyToStack(uint64_t * gpregs, double * fpregs,
-                     uint32_t paramCount, nsXPTCVariant * s,
-                     uint64_t* d)
-{
-    uint32_t nr_gpr = 0u; // skip one GP register for 'that'
-    uint32_t nr_fpr = 0u;
-    uint64_t value = 0u;
-
-    for (uint32_t i = 0; i < paramCount; i++, s++) {
-        if (s->IsPtrData())
-            value = (uint64_t) s->ptr;
-        else {
-            switch (s->type) {
-            case nsXPTType::T_FLOAT:                                break;
-            case nsXPTType::T_DOUBLE:                               break;
-            case nsXPTType::T_I8:     value = s->val.i8;            break;
-            case nsXPTType::T_I16:    value = s->val.i16;           break;
-            case nsXPTType::T_I32:    value = s->val.i32;           break;
-            case nsXPTType::T_I64:    value = s->val.i64;           break;
-            case nsXPTType::T_U8:     value = s->val.u8;            break;
-            case nsXPTType::T_U16:    value = s->val.u16;           break;
-            case nsXPTType::T_U32:    value = s->val.u32;           break;
-            case nsXPTType::T_U64:    value = s->val.u64;           break;
-            case nsXPTType::T_BOOL:   value = s->val.b;             break;
-            case nsXPTType::T_CHAR:   value = s->val.c;             break;
-            case nsXPTType::T_WCHAR:  value = s->val.wc;            break;
-            default:                  value = (uint64_t) s->val.p;  break;
-            }
-        }
-
-        if (!s->IsPtrData() && s->type == nsXPTType::T_DOUBLE) {
-            if (nr_fpr < FPR_COUNT)
-                fpregs[nr_fpr++] = s->val.d;
-            else {
-                *((double *)d) = s->val.d;
-                d++;
-            }
-        }
-        else if (!s->IsPtrData() && s->type == nsXPTType::T_FLOAT) {
-            if (nr_fpr < FPR_COUNT)
-                // The value in %xmm register is already prepared to
-                // be retrieved as a float. Therefore, we pass the
-                // value verbatim, as a double without conversion.
-                fpregs[nr_fpr++] = s->val.d;
-            else {
-                *((float *)d) = s->val.f;
-                d++;
-            }
-        }
-        else {
-            if (nr_gpr < GPR_COUNT)
-                gpregs[nr_gpr++] = value;
-            else
-                *d++ = value;
-        }
-    }
-}
diff --git a/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_unix.cpp b/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_unix.cpp
index 08e5198897..c1b21bf25a 100644
--- a/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_unix.cpp
+++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_unix.cpp
@@ -8,46 +8,19 @@
 
 #include "xptcprivate.h"
 
-// 6 integral parameters are passed in registers
-const uint32_t GPR_COUNT = 6;
+// 6 integral parameters are passed in registers, but 1 is |this| which isn't
+// considered here.
+const uint32_t GPR_COUNT = 5;
 
 // 8 floating point parameters are passed in SSE registers
 const uint32_t FPR_COUNT = 8;
 
-// Remember that these 'words' are 64-bit long
-static inline void
-invoke_count_words(uint32_t paramCount, nsXPTCVariant * s,
-                   uint32_t & nr_stack)
+extern "C" void
+InvokeCopyToStack(uint64_t * gpregs, double * fpregs,
+                     uint32_t paramCount, nsXPTCVariant * s,
+                     uint64_t* d)
 {
-    uint32_t nr_gpr;
-    uint32_t nr_fpr;
-    nr_gpr = 1; // skip one GP register for 'that'
-    nr_fpr = 0;
-    nr_stack = 0;
-
-    /* Compute number of eightbytes of class MEMORY.  */
-    for (uint32_t i = 0; i < paramCount; i++, s++) {
-        if (!s->IsPtrData()
-            && (s->type == nsXPTType::T_FLOAT || s->type == nsXPTType::T_DOUBLE)) {
-            if (nr_fpr < FPR_COUNT)
-                nr_fpr++;
-            else
-                nr_stack++;
-        }
-        else {
-            if (nr_gpr < GPR_COUNT)
-                nr_gpr++;
-            else
-                nr_stack++;
-        }
-    }
-}
-
-static void
-invoke_copy_to_stack(uint64_t * d, uint32_t paramCount, nsXPTCVariant * s,
-                     uint64_t * gpregs, double * fpregs)
-{
-    uint32_t nr_gpr = 1u; // skip one GP register for 'that'
+    uint32_t nr_gpr = 0u;
     uint32_t nr_fpr = 0u;
     uint64_t value = 0u;
 
@@ -100,89 +73,3 @@ invoke_copy_to_stack(uint64_t * d, uint32_t paramCount, nsXPTCVariant * s,
         }
     }
 }
-
-// Disable avx for the next function to allow compilation with
-// -march=native on new machines, or similar hardcoded -march options.
-// Having avx enabled appears to change the alignment behavior of alloca
-// (apparently adding an extra 16 bytes) of padding/alignment (and using
-// 32-byte alignment instead of 16-byte).  This seems to be the best
-// available workaround, given that this code, which should perhaps
-// better be written in assembly, is written in C++.
-#ifndef __clang__
-#pragma GCC push_options
-#pragma GCC target ("no-avx")
-#endif
-
-// Avoid AddressSanitizer instrumentation for the next function because it
-// depends on __builtin_alloca behavior and alignment that cannot be relied on
-// once the function is compiled with a version of ASan that has dynamic-alloca
-// instrumentation enabled.
-
-MOZ_ASAN_BLACKLIST
-EXPORT_XPCOM_API(nsresult)
-NS_InvokeByIndex(nsISupports * that, uint32_t methodIndex,
-                 uint32_t paramCount, nsXPTCVariant * params)
-{
-    uint32_t nr_stack;
-    invoke_count_words(paramCount, params, nr_stack);
-    
-    // Stack, if used, must be 16-bytes aligned
-    if (nr_stack)
-        nr_stack = (nr_stack + 1) & ~1;
-
-    // Load parameters to stack, if necessary
-    uint64_t *stack = (uint64_t *) __builtin_alloca(nr_stack * 8);
-    uint64_t gpregs[GPR_COUNT];
-    double fpregs[FPR_COUNT];
-    invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs);
-
-    // We used to have switches to make sure we would only load the registers
-    // that are needed for this call. That produced larger code that was
-    // not faster in practice. It also caused compiler warnings about the
-    // variables being used uninitialized.
-    // We now just load every every register. There could still be a warning
-    // from a memory analysis tools that we are loading uninitialized stack
-    // positions.
-
-    // FIXME: this function depends on the above __builtin_alloca placing
-    // the array in the correct spot for the ABI.
-
-    // Load FPR registers from fpregs[]
-    double d0, d1, d2, d3, d4, d5, d6, d7;
-
-    d7 = fpregs[7];
-    d6 = fpregs[6];
-    d5 = fpregs[5];
-    d4 = fpregs[4];
-    d3 = fpregs[3];
-    d2 = fpregs[2];
-    d1 = fpregs[1];
-    d0 = fpregs[0];
-
-    // Load GPR registers from gpregs[]
-    uint64_t a0, a1, a2, a3, a4, a5;
-
-    a5 = gpregs[5];
-    a4 = gpregs[4];
-    a3 = gpregs[3];
-    a2 = gpregs[2];
-    a1 = gpregs[1];
-    a0 = (uint64_t) that;
-
-    // Get pointer to method
-    uint64_t methodAddress = *((uint64_t *)that);
-    methodAddress += 8 * methodIndex;
-    methodAddress = *((uint64_t *)methodAddress);
-    
-    typedef nsresult (*Method)(uint64_t, uint64_t, uint64_t, uint64_t,
-                               uint64_t, uint64_t, double, double, double,
-                               double, double, double, double, double);
-    nsresult result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5,
-                                              d0, d1, d2, d3, d4, d5,
-                                              d6, d7);
-    return result;
-}
-
-#ifndef __clang__
-#pragma GCC pop_options
-#endif
-- 
cgit v1.2.3