/*
 * ntp_atomic.h - atomic operations needed for SHM clock handling
 *
 * Written by Juergen Perlinger (perlinger@ntp.org) for the NTP project.
 * The contents of 'html/copyright.html' apply.
 *
 * ---------------------------------------------------------------------
 *
 * The 'old' shared memory driver was truly multi-core agnostic. While
 * the implementation was surely fit for single-core systems, having it
 * running on a modern (as per AD 2013) multi-core system brings up a
 * few topics. Multi-level cache systems and multi-core CPUs make
 * sharing memory a bit more complicated; a fact that is normaly buried
 * inside the OS. And using the thread synchronisation primitives of the
 * compiler (if provided) and/or the runtime environment hides the
 * peculiarities of read/write reordering and data visibility.
 *
 * Using the 'pthreads' synchronisation primitives inside NTPD is not
 * the first choice, because it would need mutexes (mutices?) in a
 * shared memory between processes, which is not available under all
 * OS's that support pthreads. Some targets don't even provide
 * pthreads. (e.g. Win32...) And pthreads' mutex-in-shared-memory
 * concept would be the only thing that is standardised.
 *
 * So the current implementation of the shared memory driver uses a
 * handful of primitives to ensure atomic memory access and memory
 * barriers.
 *
 * For high-performance applications the distinction between aquisition
 * barriers, release barriers and full barriers can create quite an
 * impact on the total performance. OTOH, NTPD has a quite slow access
 * rate to the shared memory, and it's much easier to have a few
 * functions that act as full barriers than to create all the special
 * things we would need for optimum performance. The comparably
 * infrequent use should not create a dent in the performance numbers,
 * even if over-fencing memory access is a bad thing in general.
 *
 * Compared to some architectures, x86 and x64 are an island of the
 * fortunate because of the architecture-defined rules for read and
 * write order. Also the lock prefix available to some instructions
 * makes those instructions a full memory barrier.
 *
 * So with GCC on x86/x64 we use some special inline assembly, with GCC
 * for other architectures we rely on some compiler builtins, for MSVC
 * we use interlocked intrinsics, and for everything else we are lost so
 * far. It would be possible to implement dummies that do the needed
 * operations in non-atomic manner, but IMHO that just could create
 * errors that are hard to track. So if unsure we fail.
 *
 * Anyone who wants to contribute other compilers and/or platforms, feel
 * free to do so!
 */

#ifndef NTP_AOTMIC_H
#define NTP_ATOMIC_H

#if defined(HAVE_STDINT_H)
# include <stdint.h>
#elif defined(_MSC_VER)
# include "stdint_msvc.h"
#else
# include "stdint_ntp.h"
#endif

/*
 * Try to identify compiler and target, then include the specific
 * implementation that matches best.
 */

#if defined(__GNUC__) && __GNUC__ >= 3
# if defined(__i386__) || defined(__x86_64__)
#  include "atomic/gcc_x86_x64.h"
# else
#  include "atomic/gcc_generic.h"
# endif
#elif defined(_MSC_VER) && _MSC_VER >= 1400
# include "atomic/msvc8.h"
#else
# error do not know how to implement atomic ops
#endif

#endif /* !defined(NTP_ATOMIC_H) */
