Speed up CRC calculation using slicing-by-8 algorithm.
This speeds up WAL generation and replay. The new algorithm is significantly faster with large inputs, like full-page images or when inserting wide rows. It is slower with tiny inputs, i.e. less than 10 bytes or so, but the speedup with longer inputs more than make up for that. Even small WAL records at least have 24 byte header in the front. The output is identical to the current byte-at-a-time computation, so this does not affect compatibility. The new algorithm is only used for the CRC-32C variant, not the legacy version used in tsquery or the "traditional" CRC-32 used in hstore and ltree. Those are not as performance critical, and are usually only applied over small inputs, so it seems better to not carry around the extra lookup tables to speed up those rare cases. Abhijit Menon-Sen
This commit is contained in:
parent
cc761b170c
commit
025c02420d
7 changed files with 1246 additions and 85 deletions
|
@ -193,6 +193,23 @@ fi])# PGAC_C_TYPES_COMPATIBLE
|
|||
|
||||
|
||||
|
||||
# PGAC_C_BUILTIN_BSWAP32
|
||||
# -------------------------
|
||||
# Check if the C compiler understands __builtin_bswap32(),
|
||||
# and define HAVE__BUILTIN_BSWAP32 if so.
|
||||
AC_DEFUN([PGAC_C_BUILTIN_BSWAP32],
|
||||
[AC_CACHE_CHECK(for __builtin_bswap32, pgac_cv__builtin_bswap32,
|
||||
[AC_TRY_COMPILE([static unsigned long int x = __builtin_bswap32(0xaabbccdd);],
|
||||
[],
|
||||
[pgac_cv__builtin_bswap32=yes],
|
||||
[pgac_cv__builtin_bswap32=no])])
|
||||
if test x"$pgac_cv__builtin_bswap32" = xyes ; then
|
||||
AC_DEFINE(HAVE__BUILTIN_BSWAP32, 1,
|
||||
[Define to 1 if your compiler understands __builtin_bswap32.])
|
||||
fi])# PGAC_C_BUILTIN_BSWAP32
|
||||
|
||||
|
||||
|
||||
# PGAC_C_BUILTIN_CONSTANT_P
|
||||
# -------------------------
|
||||
# Check if the C compiler understands __builtin_constant_p(),
|
||||
|
|
30
configure
vendored
30
configure
vendored
|
@ -10332,6 +10332,36 @@ if test x"$pgac_cv__types_compatible" = xyes ; then
|
|||
|
||||
$as_echo "#define HAVE__BUILTIN_TYPES_COMPATIBLE_P 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap32" >&5
|
||||
$as_echo_n "checking for __builtin_bswap32... " >&6; }
|
||||
if ${pgac_cv__builtin_bswap32+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
static unsigned long int x = __builtin_bswap32(0xaabbccdd);
|
||||
int
|
||||
main ()
|
||||
{
|
||||
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
pgac_cv__builtin_bswap32=yes
|
||||
else
|
||||
pgac_cv__builtin_bswap32=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap32" >&5
|
||||
$as_echo "$pgac_cv__builtin_bswap32" >&6; }
|
||||
if test x"$pgac_cv__builtin_bswap32" = xyes ; then
|
||||
|
||||
$as_echo "#define HAVE__BUILTIN_BSWAP32 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_constant_p" >&5
|
||||
$as_echo_n "checking for __builtin_constant_p... " >&6; }
|
||||
|
|
|
@ -1185,6 +1185,7 @@ PGAC_C_SIGNED
|
|||
PGAC_C_FUNCNAME_SUPPORT
|
||||
PGAC_C_STATIC_ASSERT
|
||||
PGAC_C_TYPES_COMPATIBLE
|
||||
PGAC_C_BUILTIN_BSWAP32
|
||||
PGAC_C_BUILTIN_CONSTANT_P
|
||||
PGAC_C_BUILTIN_UNREACHABLE
|
||||
PGAC_C_VA_ARGS
|
||||
|
|
1224
src/common/pg_crc.c
1224
src/common/pg_crc.c
File diff suppressed because it is too large
Load diff
|
@ -41,19 +41,38 @@
|
|||
|
||||
typedef uint32 pg_crc32;
|
||||
|
||||
#ifdef HAVE__BUILTIN_BSWAP32
|
||||
#define BSWAP32(x) __builtin_bswap32(x)
|
||||
#else
|
||||
#define BSWAP32(x) (((x << 24) & 0xff000000) | \
|
||||
((x << 8) & 0x00ff0000) | \
|
||||
((x >> 8) & 0x0000ff00) | \
|
||||
((x >> 24) & 0x000000ff))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* CRC calculation using the CRC-32C (Castagnoli) polynomial.
|
||||
*
|
||||
* We use all-ones as the initial register contents and final bit inversion.
|
||||
* This is the same algorithm used e.g. in iSCSI. See RFC 3385 for more
|
||||
* details on the choice of polynomial.
|
||||
*
|
||||
* On big-endian systems, the intermediate value is kept in reverse byte
|
||||
* order, to avoid byte-swapping during the calculation. FIN_CRC32C reverses
|
||||
* the bytes to the final order.
|
||||
*/
|
||||
#define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF)
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
#define FIN_CRC32C(crc) ((crc) = BSWAP32(crc) ^ 0xFFFFFFFF)
|
||||
#else
|
||||
#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
|
||||
#endif
|
||||
#define COMP_CRC32C(crc, data, len) \
|
||||
COMP_CRC32_NORMAL_TABLE(crc, data, len, pg_crc32c_table)
|
||||
((crc) = pg_comp_crc32c((crc), (data), (len)))
|
||||
#define EQ_CRC32C(c1, c2) ((c1) == (c2))
|
||||
|
||||
extern pg_crc32 pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len);
|
||||
|
||||
/*
|
||||
* CRC-32, the same used e.g. in Ethernet.
|
||||
*
|
||||
|
@ -67,6 +86,19 @@ typedef uint32 pg_crc32;
|
|||
COMP_CRC32_NORMAL_TABLE(crc, data, len, pg_crc32_table)
|
||||
#define EQ_TRADITIONAL_CRC32(c1, c2) ((c1) == (c2))
|
||||
|
||||
/* Sarwate's algorithm, for use with a "normal" lookup table */
|
||||
#define COMP_CRC32_NORMAL_TABLE(crc, data, len, table) \
|
||||
do { \
|
||||
const unsigned char *__data = (const unsigned char *) (data); \
|
||||
uint32 __len = (len); \
|
||||
\
|
||||
while (__len-- > 0) \
|
||||
{ \
|
||||
int __tab_index = ((int) (crc) ^ *__data++) & 0xFF; \
|
||||
(crc) = table[__tab_index] ^ ((crc) >> 8); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* The CRC algorithm used for WAL et al in pre-9.5 versions.
|
||||
*
|
||||
|
@ -88,20 +120,9 @@ typedef uint32 pg_crc32;
|
|||
#define EQ_LEGACY_CRC32(c1, c2) ((c1) == (c2))
|
||||
|
||||
/*
|
||||
* Common code for CRC computation using a lookup table.
|
||||
* Sarwate's algorithm, for use with a "reflected" lookup table (but in the
|
||||
* legacy algorithm, we actually use it on a "normal" table, see above)
|
||||
*/
|
||||
#define COMP_CRC32_NORMAL_TABLE(crc, data, len, table) \
|
||||
do { \
|
||||
const unsigned char *__data = (const unsigned char *) (data); \
|
||||
uint32 __len = (len); \
|
||||
\
|
||||
while (__len-- > 0) \
|
||||
{ \
|
||||
int __tab_index = ((int) (crc) ^ *__data++) & 0xFF; \
|
||||
(crc) = table[__tab_index] ^ ((crc) >> 8); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define COMP_CRC32_REFLECTED_TABLE(crc, data, len, table) \
|
||||
do { \
|
||||
const unsigned char *__data = (const unsigned char *) (data); \
|
||||
|
@ -115,7 +136,7 @@ do { \
|
|||
} while (0)
|
||||
|
||||
/* Constant tables for CRC-32C and CRC-32 polynomials */
|
||||
extern CRCDLLIMPORT const uint32 pg_crc32c_table[];
|
||||
extern CRCDLLIMPORT const uint32 pg_crc32_table[];
|
||||
extern CRCDLLIMPORT const uint32 pg_crc32c_table[8][256];
|
||||
extern CRCDLLIMPORT const uint32 pg_crc32_table[256];
|
||||
|
||||
#endif /* PG_CRC_H */
|
||||
|
|
|
@ -663,6 +663,9 @@
|
|||
/* Define to 1 if you have the <winldap.h> header file. */
|
||||
#undef HAVE_WINLDAP_H
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_bswap32. */
|
||||
#undef HAVE__BUILTIN_BSWAP32
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_constant_p. */
|
||||
#undef HAVE__BUILTIN_CONSTANT_P
|
||||
|
||||
|
|
|
@ -517,6 +517,9 @@
|
|||
/* Define to 1 if you have the <winldap.h> header file. */
|
||||
/* #undef HAVE_WINLDAP_H */
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_bswap32. */
|
||||
/* #undef HAVE__BUILTIN_BSWAP32 */
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_constant_p. */
|
||||
/* #undef HAVE__BUILTIN_CONSTANT_P */
|
||||
|
||||
|
|
Loading…
Reference in a new issue