/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* For overall documentation, see jit/AtomicOperations.h */ #ifndef jit_shared_AtomicOperations_x86_shared_h #define jit_shared_AtomicOperations_x86_shared_h #include "mozilla/Assertions.h" #include "mozilla/Types.h" // Lock-freedom on x86 and x64: // // On x86 and x64 there are atomic instructions for 8-byte accesses: // // Load and stores: // - Loads and stores are single-copy atomic for up to 8 bytes // starting with the Pentium; the store requires a post-fence for // sequential consistency // // CompareExchange: // - On x64 CMPXCHGQ can always be used // - On x86 CMPXCHG8B can be used starting with the first Pentium // // Exchange: // - On x64 XCHGQ can always be used // - On x86 one has to use a CompareExchange loop // // Observe also that the JIT will not be enabled unless we have SSE2, // which was introduced with the Pentium 4. Ergo the JIT will be able // to use atomic instructions for up to 8 bytes on all x86 platforms // for the primitives we care about. // // However, C++ compilers and libraries may not provide access to // those 8-byte instructions directly. Clang in 32-bit mode does not // provide 8-byte atomic primitives at all (even with eg -arch i686 // specified). On Windows 32-bit, MSVC does not provide // _InterlockedExchange64 since it does not map directly to an // instruction. // // There are thus sundry workarounds below to handle known corner // cases. #if defined(__clang__) || defined(__GNUC__) // Lock-free 8-byte atomics are assumed on x86 but must be disabled in // corner cases, see comments below and in isLockfree8(). # define LOCKFREE8 // This pertains to Clang compiling with -m32, in this case the 64-bit // __atomic builtins are not available (observed on various Mac OS X // versions with Apple Clang and on Linux with Clang 3.5). // // For now just punt: disable lock-free 8-word data. The JIT will // call isLockfree8() to determine what to do and will stay in sync. // (Bug 1146817 tracks the work to improve on this.) # if defined(__clang__) && defined(__i386) # undef LOCKFREE8 # endif inline bool js::jit::AtomicOperations::isLockfree8() { MOZ_ASSERT(__atomic_always_lock_free(sizeof(int8_t), 0)); MOZ_ASSERT(__atomic_always_lock_free(sizeof(int16_t), 0)); MOZ_ASSERT(__atomic_always_lock_free(sizeof(int32_t), 0)); # ifdef LOCKFREE8 MOZ_ASSERT(__atomic_always_lock_free(sizeof(int64_t), 0)); return true; # else return false; # endif } inline void js::jit::AtomicOperations::fenceSeqCst() { __atomic_thread_fence(__ATOMIC_SEQ_CST); } template inline T js::jit::AtomicOperations::loadSeqCst(T* addr) { MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); T v; __atomic_load(addr, &v, __ATOMIC_SEQ_CST); return v; } # ifndef LOCKFREE8 template<> inline int64_t js::jit::AtomicOperations::loadSeqCst(int64_t* addr) { MOZ_CRASH(); } template<> inline uint64_t js::jit::AtomicOperations::loadSeqCst(uint64_t* addr) { MOZ_CRASH(); } # endif // LOCKFREE8 template inline void js::jit::AtomicOperations::storeSeqCst(T* addr, T val) { MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); __atomic_store(addr, &val, __ATOMIC_SEQ_CST); } # ifndef LOCKFREE8 template<> inline void js::jit::AtomicOperations::storeSeqCst(int64_t* addr, int64_t val) { MOZ_CRASH(); } template<> inline void js::jit::AtomicOperations::storeSeqCst(uint64_t* addr, uint64_t val) { MOZ_CRASH(); } # endif // LOCKFREE8 template inline T js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val) { MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); T v; __atomic_exchange(addr, &val, &v, __ATOMIC_SEQ_CST); return v; } # ifndef LOCKFREE8 template<> inline int64_t js::jit::AtomicOperations::exchangeSeqCst(int64_t* addr, int64_t val) { MOZ_CRASH(); } template<> inline uint64_t js::jit::AtomicOperations::exchangeSeqCst(uint64_t* addr, uint64_t val) { MOZ_CRASH(); } # endif // LOCKFREE8 template inline T js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval) { MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); __atomic_compare_exchange(addr, &oldval, &newval, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return oldval; } # ifndef LOCKFREE8 template<> inline int64_t js::jit::AtomicOperations::compareExchangeSeqCst(int64_t* addr, int64_t oldval, int64_t newval) { MOZ_CRASH(); } template<> inline uint64_t js::jit::AtomicOperations::compareExchangeSeqCst(uint64_t* addr, uint64_t oldval, uint64_t newval) { MOZ_CRASH(); } # endif // LOCKFREE8 template inline T js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val) { static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST); } template inline T js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val) { static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST); } template inline T js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val) { static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); return __atomic_fetch_and(addr, val, __ATOMIC_SEQ_CST); } template inline T js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val) { static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); return __atomic_fetch_or(addr, val, __ATOMIC_SEQ_CST); } template inline T js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val) { static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); return __atomic_fetch_xor(addr, val, __ATOMIC_SEQ_CST); } template inline T js::jit::AtomicOperations::loadSafeWhenRacy(T* addr) { return *addr; // FIXME (1208663): not yet safe } template inline void js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val) { *addr = val; // FIXME (1208663): not yet safe } inline void js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest, const void* src, size_t nbytes) { ::memcpy(dest, src, nbytes); // FIXME (1208663): not yet safe } inline void js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest, const void* src, size_t nbytes) { ::memmove(dest, src, nbytes); // FIXME (1208663): not yet safe } template inline void js::jit::RegionLock::acquire(void* addr) { uint32_t zero = 0; uint32_t one = 1; while (!__atomic_compare_exchange(&spinlock, &zero, &one, false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)) { zero = 0; continue; } } template inline void js::jit::RegionLock::release(void* addr) { MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock"); uint32_t zero = 0; __atomic_store(&spinlock, &zero, __ATOMIC_SEQ_CST); } # undef LOCKFREE8 #elif defined(_MSC_VER) // On 32-bit CPUs there is no 64-bit XCHG instruction, one must // instead use a loop with CMPXCHG8B. Since MSVC provides // _InterlockedExchange64 only if it maps directly to XCHG, the // workaround must be manual. # define HAVE_EXCHANGE64 # if !_WIN64 # undef HAVE_EXCHANGE64 # endif // Below, _ReadWriteBarrier is a compiler directive, preventing // reordering of instructions and reuse of memory values across it. inline bool js::jit::AtomicOperations::isLockfree8() { // See general comments at the start of this file. // // The MSDN docs suggest very strongly that if code is compiled for // Pentium or better the 64-bit primitives will be lock-free, see // eg the "Remarks" secion of the page for _InterlockedCompareExchange64, // currently here: // https://msdn.microsoft.com/en-us/library/ttk2z1ws%28v=vs.85%29.aspx // // But I've found no way to assert that at compile time or run time, // there appears to be no WinAPI is_lock_free() test. return true; } inline void js::jit::AtomicOperations::fenceSeqCst() { _ReadWriteBarrier(); # if JS_BITS_PER_WORD == 32 // If configured for SSE2+ we can use the MFENCE instruction, available // through the _mm_mfence intrinsic. But for non-SSE2 systems we have // to do something else. Linux uses "lock add [esp], 0", so why not? __asm lock add [esp], 0; # else _mm_mfence(); # endif } template inline T js::jit::AtomicOperations::loadSeqCst(T* addr) { MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); _ReadWriteBarrier(); T v = *addr; _ReadWriteBarrier(); return v; } template inline void js::jit::AtomicOperations::storeSeqCst(T* addr, T val) { MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); _ReadWriteBarrier(); *addr = val; fenceSeqCst(); } # define MSC_EXCHANGEOP(T, U, xchgop) \ template<> inline T \ js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val) { \ MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \ return (T)xchgop((U volatile*)addr, (U)val); \ } # define MSC_EXCHANGEOP_CAS(T, U, cmpxchg) \ template<> inline T \ js::jit::AtomicOperations::exchangeSeqCst(T* addr, T newval) { \ MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \ T oldval; \ do { \ _ReadWriteBarrier(); \ oldval = *addr; \ } while (!cmpxchg((U volatile*)addr, (U)newval, (U)oldval)); \ return oldval; \ } MSC_EXCHANGEOP(int8_t, char, _InterlockedExchange8) MSC_EXCHANGEOP(uint8_t, char, _InterlockedExchange8) MSC_EXCHANGEOP(int16_t, short, _InterlockedExchange16) MSC_EXCHANGEOP(uint16_t, short, _InterlockedExchange16) MSC_EXCHANGEOP(int32_t, long, _InterlockedExchange) MSC_EXCHANGEOP(uint32_t, long, _InterlockedExchange) # ifdef HAVE_EXCHANGE64 MSC_EXCHANGEOP(int64_t, __int64, _InterlockedExchange64) MSC_EXCHANGEOP(uint64_t, __int64, _InterlockedExchange64) # else MSC_EXCHANGEOP_CAS(int64_t, __int64, _InterlockedCompareExchange64) MSC_EXCHANGEOP_CAS(uint64_t, __int64, _InterlockedCompareExchange64) # endif # undef MSC_EXCHANGEOP # undef MSC_EXCHANGEOP_CAS # define MSC_CAS(T, U, cmpxchg) \ template<> inline T \ js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval) { \ MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \ return (T)cmpxchg((U volatile*)addr, (U)newval, (U)oldval); \ } MSC_CAS(int8_t, char, _InterlockedCompareExchange8) MSC_CAS(uint8_t, char, _InterlockedCompareExchange8) MSC_CAS(int16_t, short, _InterlockedCompareExchange16) MSC_CAS(uint16_t, short, _InterlockedCompareExchange16) MSC_CAS(int32_t, long, _InterlockedCompareExchange) MSC_CAS(uint32_t, long, _InterlockedCompareExchange) MSC_CAS(int64_t, __int64, _InterlockedCompareExchange64) MSC_CAS(uint64_t, __int64, _InterlockedCompareExchange64) # undef MSC_CAS # define MSC_FETCHADDOP(T, U, xadd) \ template<> inline T \ js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val) { \ static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ return (T)xadd((U volatile*)addr, (U)val); \ } \ template<> inline T \ js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val) { \ static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ return (T)xadd((U volatile*)addr, -(U)val); \ } MSC_FETCHADDOP(int8_t, char, _InterlockedExchangeAdd8) MSC_FETCHADDOP(uint8_t, char, _InterlockedExchangeAdd8) MSC_FETCHADDOP(int16_t, short, _InterlockedExchangeAdd16) MSC_FETCHADDOP(uint16_t, short, _InterlockedExchangeAdd16) MSC_FETCHADDOP(int32_t, long, _InterlockedExchangeAdd) MSC_FETCHADDOP(uint32_t, long, _InterlockedExchangeAdd) # undef MSC_FETCHADDOP # define MSC_FETCHBITOP(T, U, andop, orop, xorop) \ template<> inline T \ js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val) { \ static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ return (T)andop((U volatile*)addr, (U)val); \ } \ template<> inline T \ js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val) { \ static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ return (T)orop((U volatile*)addr, (U)val); \ } \ template<> inline T \ js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val) { \ static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ return (T)xorop((U volatile*)addr, (U)val); \ } MSC_FETCHBITOP(int8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8) MSC_FETCHBITOP(uint8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8) MSC_FETCHBITOP(int16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16) MSC_FETCHBITOP(uint16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16) MSC_FETCHBITOP(int32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor) MSC_FETCHBITOP(uint32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor) # undef MSC_FETCHBITOP template inline T js::jit::AtomicOperations::loadSafeWhenRacy(T* addr) { return *addr; // FIXME (1208663): not yet safe } template inline void js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val) { *addr = val; // FIXME (1208663): not yet safe } inline void js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest, const void* src, size_t nbytes) { ::memcpy(dest, src, nbytes); // FIXME (1208663): not yet safe } inline void js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest, const void* src, size_t nbytes) { ::memmove(dest, src, nbytes); // FIXME (1208663): not yet safe } template inline void js::jit::RegionLock::acquire(void* addr) { while (_InterlockedCompareExchange((long*)&spinlock, /*newval=*/1, /*oldval=*/0) == 1) continue; } template inline void js::jit::RegionLock::release(void* addr) { MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock"); _InterlockedExchange((long*)&spinlock, 0); } # undef HAVE_EXCHANGE64 #elif defined(ENABLE_SHARED_ARRAY_BUFFER) # error "Either disable JS shared memory at compile time, use GCC, Clang, or MSVC, or add code here" #endif // platform #endif // jit_shared_AtomicOperations_x86_shared_h