Remove configurability of PPC spinlock assembly code.

Assume that we can use LWARX hint flags and the LWSYNC instruction
on any PPC machine.  The check on the assembler's behavior was only
needed for Apple's old assembler, which is no longer of interest
now that we've de-supported all PPC-era versions of macOS (thanks
to them not having clock_gettime()).  Also, given an up-to-date
assembler these instructions work even on Apple's old hardware.
It seems quite unlikely that anyone would be interested in running
current Postgres on PPC hardware that's so old as to not have
these instructions.

Hence, rip out associated configure test and manual configuration
options, and just use the modernized instructions all the time.
Also, update atomics/arch-ppc.h to use these instructions as well.
(It was already using LWSYNC unconditionally in another place,
providing further proof that nobody is using PG on hardware old
enough to have a problem with that.)

Discussion: https://postgr.es/m/166622.1660323391@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2022-08-13 13:36:39 -04:00
parent adee0df127
commit 8ded65682b
7 changed files with 26 additions and 115 deletions

34
configure vendored
View file

@ -15349,39 +15349,7 @@ $as_echo "#define HAVE_X86_64_POPCNTQ 1" >>confdefs.h
fi fi
;; ;;
ppc*|powerpc*) ppc*|powerpc*)
# On PPC, check if assembler supports LWARX instruction's mutex hint bit # On PPC, check if compiler accepts "i"(x) when __builtin_constant_p(x).
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler supports lwarx hint bit" >&5
$as_echo_n "checking whether assembler supports lwarx hint bit... " >&6; }
if ${pgac_cv_have_ppc_mutex_hint+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main ()
{
int a = 0; int *p = &a; int r;
__asm__ __volatile__ (" lwarx %0,0,%1,1\n" : "=&r"(r) : "r"(p));
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv_have_ppc_mutex_hint=yes
else
pgac_cv_have_ppc_mutex_hint=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_ppc_mutex_hint" >&5
$as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; }
if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
$as_echo "#define HAVE_PPC_LWARX_MUTEX_HINT 1" >>confdefs.h
fi
# Check if compiler accepts "i"(x) when __builtin_constant_p(x).
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance" >&5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance" >&5
$as_echo_n "checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance... " >&6; } $as_echo_n "checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance... " >&6; }
if ${pgac_cv_have_i_constraint__builtin_constant_p+:} false; then : if ${pgac_cv_have_i_constraint__builtin_constant_p+:} false; then :

View file

@ -1678,18 +1678,7 @@ case $host_cpu in
fi fi
;; ;;
ppc*|powerpc*) ppc*|powerpc*)
# On PPC, check if assembler supports LWARX instruction's mutex hint bit # On PPC, check if compiler accepts "i"(x) when __builtin_constant_p(x).
AC_CACHE_CHECK([whether assembler supports lwarx hint bit],
[pgac_cv_have_ppc_mutex_hint],
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
[int a = 0; int *p = &a; int r;
__asm__ __volatile__ (" lwarx %0,0,%1,1\n" : "=&r"(r) : "r"(p));])],
[pgac_cv_have_ppc_mutex_hint=yes],
[pgac_cv_have_ppc_mutex_hint=no])])
if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler supports PPC's LWARX mutex hint bit.])
fi
# Check if compiler accepts "i"(x) when __builtin_constant_p(x).
AC_CACHE_CHECK([whether __builtin_constant_p(x) implies "i"(x) acceptance], AC_CACHE_CHECK([whether __builtin_constant_p(x) implies "i"(x) acceptance],
[pgac_cv_have_i_constraint__builtin_constant_p], [pgac_cv_have_i_constraint__builtin_constant_p],
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [AC_COMPILE_IFELSE([AC_LANG_PROGRAM(

View file

@ -349,9 +349,6 @@
/* Define to 1 if you have the `posix_fallocate' function. */ /* Define to 1 if you have the `posix_fallocate' function. */
#undef HAVE_POSIX_FALLOCATE #undef HAVE_POSIX_FALLOCATE
/* Define to 1 if the assembler supports PPC's LWARX mutex hint bit. */
#undef HAVE_PPC_LWARX_MUTEX_HINT
/* Define to 1 if you have the `ppoll' function. */ /* Define to 1 if you have the `ppoll' function. */
#undef HAVE_PPOLL #undef HAVE_PPOLL

View file

@ -227,32 +227,6 @@
*/ */
#define DEFAULT_EVENT_SOURCE "PostgreSQL" #define DEFAULT_EVENT_SOURCE "PostgreSQL"
/*
* On PPC machines, decide whether to use the mutex hint bit in LWARX
* instructions. Setting the hint bit will slightly improve spinlock
* performance on POWER6 and later machines, but does nothing before that,
* and will result in illegal-instruction failures on some pre-POWER4
* machines. By default we use the hint bit when building for 64-bit PPC,
* which should be safe in nearly all cases. You might want to override
* this if you are building 32-bit code for a known-recent PPC machine.
*/
#ifdef HAVE_PPC_LWARX_MUTEX_HINT /* must have assembler support in any case */
#if defined(__ppc64__) || defined(__powerpc64__)
#define USE_PPC_LWARX_MUTEX_HINT
#endif
#endif
/*
* On PPC machines, decide whether to use LWSYNC instructions in place of
* ISYNC and SYNC. This provides slightly better performance, but will
* result in illegal-instruction failures on some pre-POWER4 machines.
* By default we use LWSYNC when building for 64-bit PPC, which should be
* safe in nearly all cases.
*/
#if defined(__ppc64__) || defined(__powerpc64__)
#define USE_PPC_LWSYNC
#endif
/* /*
* Assumed cache line size. This doesn't affect correctness, but can be used * Assumed cache line size. This doesn't affect correctness, but can be used
* for low-level optimizations. Currently, this is used to pad some data * for low-level optimizations. Currently, this is used to pad some data

View file

@ -90,12 +90,12 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
(int32) *expected >= PG_INT16_MIN) (int32) *expected >= PG_INT16_MIN)
__asm__ __volatile__( __asm__ __volatile__(
" sync \n" " sync \n"
" lwarx %0,0,%5 \n" " lwarx %0,0,%5,1 \n"
" cmpwi %0,%3 \n" " cmpwi %0,%3 \n"
" bne $+12 \n" /* branch to isync */ " bne $+12 \n" /* branch to lwsync */
" stwcx. %4,0,%5 \n" " stwcx. %4,0,%5 \n"
" bne $-16 \n" /* branch to lwarx */ " bne $-16 \n" /* branch to lwarx */
" isync \n" " lwsync \n"
" mfcr %1 \n" " mfcr %1 \n"
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
: "i"(*expected), "r"(newval), "r"(&ptr->value) : "i"(*expected), "r"(newval), "r"(&ptr->value)
@ -104,12 +104,12 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
#endif #endif
__asm__ __volatile__( __asm__ __volatile__(
" sync \n" " sync \n"
" lwarx %0,0,%5 \n" " lwarx %0,0,%5,1 \n"
" cmpw %0,%3 \n" " cmpw %0,%3 \n"
" bne $+12 \n" /* branch to isync */ " bne $+12 \n" /* branch to lwsync */
" stwcx. %4,0,%5 \n" " stwcx. %4,0,%5 \n"
" bne $-16 \n" /* branch to lwarx */ " bne $-16 \n" /* branch to lwarx */
" isync \n" " lwsync \n"
" mfcr %1 \n" " mfcr %1 \n"
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
: "r"(*expected), "r"(newval), "r"(&ptr->value) : "r"(*expected), "r"(newval), "r"(&ptr->value)
@ -138,11 +138,11 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
__asm__ __volatile__( __asm__ __volatile__(
" sync \n" " sync \n"
" lwarx %1,0,%4 \n" " lwarx %1,0,%4,1 \n"
" addi %0,%1,%3 \n" " addi %0,%1,%3 \n"
" stwcx. %0,0,%4 \n" " stwcx. %0,0,%4 \n"
" bne $-12 \n" /* branch to lwarx */ " bne $-12 \n" /* branch to lwarx */
" isync \n" " lwsync \n"
: "=&r"(_t), "=&b"(res), "+m"(ptr->value) : "=&r"(_t), "=&b"(res), "+m"(ptr->value)
: "i"(add_), "r"(&ptr->value) : "i"(add_), "r"(&ptr->value)
: "memory", "cc"); : "memory", "cc");
@ -150,11 +150,11 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
#endif #endif
__asm__ __volatile__( __asm__ __volatile__(
" sync \n" " sync \n"
" lwarx %1,0,%4 \n" " lwarx %1,0,%4,1 \n"
" add %0,%1,%3 \n" " add %0,%1,%3 \n"
" stwcx. %0,0,%4 \n" " stwcx. %0,0,%4 \n"
" bne $-12 \n" /* branch to lwarx */ " bne $-12 \n" /* branch to lwarx */
" isync \n" " lwsync \n"
: "=&r"(_t), "=&r"(res), "+m"(ptr->value) : "=&r"(_t), "=&r"(res), "+m"(ptr->value)
: "r"(add_), "r"(&ptr->value) : "r"(add_), "r"(&ptr->value)
: "memory", "cc"); : "memory", "cc");
@ -180,12 +180,12 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
(int64) *expected >= PG_INT16_MIN) (int64) *expected >= PG_INT16_MIN)
__asm__ __volatile__( __asm__ __volatile__(
" sync \n" " sync \n"
" ldarx %0,0,%5 \n" " ldarx %0,0,%5,1 \n"
" cmpdi %0,%3 \n" " cmpdi %0,%3 \n"
" bne $+12 \n" /* branch to isync */ " bne $+12 \n" /* branch to lwsync */
" stdcx. %4,0,%5 \n" " stdcx. %4,0,%5 \n"
" bne $-16 \n" /* branch to ldarx */ " bne $-16 \n" /* branch to ldarx */
" isync \n" " lwsync \n"
" mfcr %1 \n" " mfcr %1 \n"
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
: "i"(*expected), "r"(newval), "r"(&ptr->value) : "i"(*expected), "r"(newval), "r"(&ptr->value)
@ -194,12 +194,12 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
#endif #endif
__asm__ __volatile__( __asm__ __volatile__(
" sync \n" " sync \n"
" ldarx %0,0,%5 \n" " ldarx %0,0,%5,1 \n"
" cmpd %0,%3 \n" " cmpd %0,%3 \n"
" bne $+12 \n" /* branch to isync */ " bne $+12 \n" /* branch to lwsync */
" stdcx. %4,0,%5 \n" " stdcx. %4,0,%5 \n"
" bne $-16 \n" /* branch to ldarx */ " bne $-16 \n" /* branch to ldarx */
" isync \n" " lwsync \n"
" mfcr %1 \n" " mfcr %1 \n"
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
: "r"(*expected), "r"(newval), "r"(&ptr->value) : "r"(*expected), "r"(newval), "r"(&ptr->value)
@ -224,11 +224,11 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
__asm__ __volatile__( __asm__ __volatile__(
" sync \n" " sync \n"
" ldarx %1,0,%4 \n" " ldarx %1,0,%4,1 \n"
" addi %0,%1,%3 \n" " addi %0,%1,%3 \n"
" stdcx. %0,0,%4 \n" " stdcx. %0,0,%4 \n"
" bne $-12 \n" /* branch to ldarx */ " bne $-12 \n" /* branch to ldarx */
" isync \n" " lwsync \n"
: "=&r"(_t), "=&b"(res), "+m"(ptr->value) : "=&r"(_t), "=&b"(res), "+m"(ptr->value)
: "i"(add_), "r"(&ptr->value) : "i"(add_), "r"(&ptr->value)
: "memory", "cc"); : "memory", "cc");
@ -236,11 +236,11 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
#endif #endif
__asm__ __volatile__( __asm__ __volatile__(
" sync \n" " sync \n"
" ldarx %1,0,%4 \n" " ldarx %1,0,%4,1 \n"
" add %0,%1,%3 \n" " add %0,%1,%3 \n"
" stdcx. %0,0,%4 \n" " stdcx. %0,0,%4 \n"
" bne $-12 \n" /* branch to ldarx */ " bne $-12 \n" /* branch to ldarx */
" isync \n" " lwsync \n"
: "=&r"(_t), "=&r"(res), "+m"(ptr->value) : "=&r"(_t), "=&r"(res), "+m"(ptr->value)
: "r"(add_), "r"(&ptr->value) : "r"(add_), "r"(&ptr->value)
: "memory", "cc"); : "memory", "cc");

View file

@ -435,7 +435,8 @@ typedef unsigned int slock_t;
* *
* NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002, * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
* an isync is a sufficient synchronization barrier after a lwarx/stwcx loop. * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
* On newer machines, we can use lwsync instead for better performance. * But if the spinlock is in ordinary memory, we can use lwsync instead for
* better performance.
* *
* Ordinarily, we'd code the branches here using GNU-style local symbols, that * Ordinarily, we'd code the branches here using GNU-style local symbols, that
* is "1f" referencing "1:" and so on. But some people run gcc on AIX with * is "1f" referencing "1:" and so on. But some people run gcc on AIX with
@ -450,23 +451,15 @@ tas(volatile slock_t *lock)
int _res; int _res;
__asm__ __volatile__( __asm__ __volatile__(
#ifdef USE_PPC_LWARX_MUTEX_HINT
" lwarx %0,0,%3,1 \n" " lwarx %0,0,%3,1 \n"
#else
" lwarx %0,0,%3 \n"
#endif
" cmpwi %0,0 \n" " cmpwi %0,0 \n"
" bne $+16 \n" /* branch to li %1,1 */ " bne $+16 \n" /* branch to li %1,1 */
" addi %0,%0,1 \n" " addi %0,%0,1 \n"
" stwcx. %0,0,%3 \n" " stwcx. %0,0,%3 \n"
" beq $+12 \n" /* branch to lwsync/isync */ " beq $+12 \n" /* branch to lwsync */
" li %1,1 \n" " li %1,1 \n"
" b $+12 \n" /* branch to end of asm sequence */ " b $+12 \n" /* branch to end of asm sequence */
#ifdef USE_PPC_LWSYNC
" lwsync \n" " lwsync \n"
#else
" isync \n"
#endif
" li %1,0 \n" " li %1,0 \n"
: "=&b"(_t), "=r"(_res), "+m"(*lock) : "=&b"(_t), "=r"(_res), "+m"(*lock)
@ -477,23 +470,14 @@ tas(volatile slock_t *lock)
/* /*
* PowerPC S_UNLOCK is almost standard but requires a "sync" instruction. * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
* On newer machines, we can use lwsync instead for better performance. * But we can use lwsync instead for better performance.
*/ */
#ifdef USE_PPC_LWSYNC
#define S_UNLOCK(lock) \ #define S_UNLOCK(lock) \
do \ do \
{ \ { \
__asm__ __volatile__ (" lwsync \n" ::: "memory"); \ __asm__ __volatile__ (" lwsync \n" ::: "memory"); \
*((volatile slock_t *) (lock)) = 0; \ *((volatile slock_t *) (lock)) = 0; \
} while (0) } while (0)
#else
#define S_UNLOCK(lock) \
do \
{ \
__asm__ __volatile__ (" sync \n" ::: "memory"); \
*((volatile slock_t *) (lock)) = 0; \
} while (0)
#endif /* USE_PPC_LWSYNC */
#endif /* powerpc */ #endif /* powerpc */

View file

@ -313,7 +313,6 @@ sub GenerateFiles
HAVE_PAM_PAM_APPL_H => undef, HAVE_PAM_PAM_APPL_H => undef,
HAVE_POSIX_FADVISE => undef, HAVE_POSIX_FADVISE => undef,
HAVE_POSIX_FALLOCATE => undef, HAVE_POSIX_FALLOCATE => undef,
HAVE_PPC_LWARX_MUTEX_HINT => undef,
HAVE_PPOLL => undef, HAVE_PPOLL => undef,
HAVE_PS_STRINGS => undef, HAVE_PS_STRINGS => undef,
HAVE_PTHREAD => undef, HAVE_PTHREAD => undef,