Speed up CRC calculation using slicing-by-8 algorithm.

This speeds up WAL generation and replay. The new algorithm is
significantly faster with large inputs, like full-page images or when
inserting wide rows. It is slower with tiny inputs, i.e. less than 10 bytes
or so, but the speedup with longer inputs more than make up for that. Even
small WAL records at least have 24 byte header in the front.

The output is identical to the current byte-at-a-time computation, so this
does not affect compatibility. The new algorithm is only used for the
CRC-32C variant, not the legacy version used in tsquery or the
"traditional" CRC-32 used in hstore and ltree. Those are not as performance
critical, and are usually only applied over small inputs, so it seems
better to not carry around the extra lookup tables to speed up those rare
cases.

Abhijit Menon-Sen
This commit is contained in:
Heikki Linnakangas 2015-02-10 10:54:40 +02:00
parent cc761b170c
commit 025c02420d
7 changed files with 1246 additions and 85 deletions

View file

@ -193,6 +193,23 @@ fi])# PGAC_C_TYPES_COMPATIBLE
# PGAC_C_BUILTIN_BSWAP32
# -------------------------
# Check if the C compiler understands __builtin_bswap32(),
# and define HAVE__BUILTIN_BSWAP32 if so.
AC_DEFUN([PGAC_C_BUILTIN_BSWAP32],
[AC_CACHE_CHECK(for __builtin_bswap32, pgac_cv__builtin_bswap32,
[AC_TRY_COMPILE([static unsigned long int x = __builtin_bswap32(0xaabbccdd);],
[],
[pgac_cv__builtin_bswap32=yes],
[pgac_cv__builtin_bswap32=no])])
if test x"$pgac_cv__builtin_bswap32" = xyes ; then
AC_DEFINE(HAVE__BUILTIN_BSWAP32, 1,
[Define to 1 if your compiler understands __builtin_bswap32.])
fi])# PGAC_C_BUILTIN_BSWAP32
# PGAC_C_BUILTIN_CONSTANT_P
# -------------------------
# Check if the C compiler understands __builtin_constant_p(),

30
configure vendored
View file

@ -10332,6 +10332,36 @@ if test x"$pgac_cv__types_compatible" = xyes ; then
$as_echo "#define HAVE__BUILTIN_TYPES_COMPATIBLE_P 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap32" >&5
$as_echo_n "checking for __builtin_bswap32... " >&6; }
if ${pgac_cv__builtin_bswap32+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static unsigned long int x = __builtin_bswap32(0xaabbccdd);
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
pgac_cv__builtin_bswap32=yes
else
pgac_cv__builtin_bswap32=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_bswap32" >&5
$as_echo "$pgac_cv__builtin_bswap32" >&6; }
if test x"$pgac_cv__builtin_bswap32" = xyes ; then
$as_echo "#define HAVE__BUILTIN_BSWAP32 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_constant_p" >&5
$as_echo_n "checking for __builtin_constant_p... " >&6; }

View file

@ -1185,6 +1185,7 @@ PGAC_C_SIGNED
PGAC_C_FUNCNAME_SUPPORT
PGAC_C_STATIC_ASSERT
PGAC_C_TYPES_COMPATIBLE
PGAC_C_BUILTIN_BSWAP32
PGAC_C_BUILTIN_CONSTANT_P
PGAC_C_BUILTIN_UNREACHABLE
PGAC_C_VA_ARGS

File diff suppressed because it is too large Load diff

View file

@ -41,19 +41,38 @@
typedef uint32 pg_crc32;
#ifdef HAVE__BUILTIN_BSWAP32
#define BSWAP32(x) __builtin_bswap32(x)
#else
#define BSWAP32(x) (((x << 24) & 0xff000000) | \
((x << 8) & 0x00ff0000) | \
((x >> 8) & 0x0000ff00) | \
((x >> 24) & 0x000000ff))
#endif
/*
* CRC calculation using the CRC-32C (Castagnoli) polynomial.
*
* We use all-ones as the initial register contents and final bit inversion.
* This is the same algorithm used e.g. in iSCSI. See RFC 3385 for more
* details on the choice of polynomial.
*
* On big-endian systems, the intermediate value is kept in reverse byte
* order, to avoid byte-swapping during the calculation. FIN_CRC32C reverses
* the bytes to the final order.
*/
#define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF)
#ifdef WORDS_BIGENDIAN
#define FIN_CRC32C(crc) ((crc) = BSWAP32(crc) ^ 0xFFFFFFFF)
#else
#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
#endif
#define COMP_CRC32C(crc, data, len) \
COMP_CRC32_NORMAL_TABLE(crc, data, len, pg_crc32c_table)
((crc) = pg_comp_crc32c((crc), (data), (len)))
#define EQ_CRC32C(c1, c2) ((c1) == (c2))
extern pg_crc32 pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len);
/*
* CRC-32, the same used e.g. in Ethernet.
*
@ -67,6 +86,19 @@ typedef uint32 pg_crc32;
COMP_CRC32_NORMAL_TABLE(crc, data, len, pg_crc32_table)
#define EQ_TRADITIONAL_CRC32(c1, c2) ((c1) == (c2))
/* Sarwate's algorithm, for use with a "normal" lookup table */
#define COMP_CRC32_NORMAL_TABLE(crc, data, len, table) \
do { \
const unsigned char *__data = (const unsigned char *) (data); \
uint32 __len = (len); \
\
while (__len-- > 0) \
{ \
int __tab_index = ((int) (crc) ^ *__data++) & 0xFF; \
(crc) = table[__tab_index] ^ ((crc) >> 8); \
} \
} while (0)
/*
* The CRC algorithm used for WAL et al in pre-9.5 versions.
*
@ -88,20 +120,9 @@ typedef uint32 pg_crc32;
#define EQ_LEGACY_CRC32(c1, c2) ((c1) == (c2))
/*
* Common code for CRC computation using a lookup table.
* Sarwate's algorithm, for use with a "reflected" lookup table (but in the
* legacy algorithm, we actually use it on a "normal" table, see above)
*/
#define COMP_CRC32_NORMAL_TABLE(crc, data, len, table) \
do { \
const unsigned char *__data = (const unsigned char *) (data); \
uint32 __len = (len); \
\
while (__len-- > 0) \
{ \
int __tab_index = ((int) (crc) ^ *__data++) & 0xFF; \
(crc) = table[__tab_index] ^ ((crc) >> 8); \
} \
} while (0)
#define COMP_CRC32_REFLECTED_TABLE(crc, data, len, table) \
do { \
const unsigned char *__data = (const unsigned char *) (data); \
@ -115,7 +136,7 @@ do { \
} while (0)
/* Constant tables for CRC-32C and CRC-32 polynomials */
extern CRCDLLIMPORT const uint32 pg_crc32c_table[];
extern CRCDLLIMPORT const uint32 pg_crc32_table[];
extern CRCDLLIMPORT const uint32 pg_crc32c_table[8][256];
extern CRCDLLIMPORT const uint32 pg_crc32_table[256];
#endif /* PG_CRC_H */

View file

@ -663,6 +663,9 @@
/* Define to 1 if you have the <winldap.h> header file. */
#undef HAVE_WINLDAP_H
/* Define to 1 if your compiler understands __builtin_bswap32. */
#undef HAVE__BUILTIN_BSWAP32
/* Define to 1 if your compiler understands __builtin_constant_p. */
#undef HAVE__BUILTIN_CONSTANT_P

View file

@ -517,6 +517,9 @@
/* Define to 1 if you have the <winldap.h> header file. */
/* #undef HAVE_WINLDAP_H */
/* Define to 1 if your compiler understands __builtin_bswap32. */
/* #undef HAVE__BUILTIN_BSWAP32 */
/* Define to 1 if your compiler understands __builtin_constant_p. */
/* #undef HAVE__BUILTIN_CONSTANT_P */