992be8b50a
Copy faster implementation of crc32c from linux kernel as of 6.5-rc7 (x86_64, arch/x86/crypto/crc32c-pcl-intel-asm_64.S). This needs assembler build support, so detect target architecture so cross-compilation still works. Add a special CPU flag so the old and new implementations can be benchmarked and verified separately. Sample benchmark: CPU flags: 0x1ff CPU features: SSE2 SSSE3 SSE41 SSE42 SHA AVX AVX2 CRC32C_PCL Block size: 4096 Iterations: 1000000 Implementation: builtin Units: CPU cycles NULL-NOP: cycles: 77177218, cycles/i 77 NULL-MEMCPY: cycles: 226313072, cycles/i 226, 62133.395 MiB/s CRC32C-ref: cycles: 24418596066, cycles/i 24418, 575.859 MiB/s CRC32C-NI: cycles: 1188335920, cycles/i 1188, 11833.073 MiB/s CRC32C-PCL: cycles: 463193456, cycles/i 463, 30358.037 MiB/s XXHASH: cycles: 851606646, cycles/i 851, 16511.916 MiB/s SHA256-ref: cycles: 74476234956, cycles/i 74476, 188.808 MiB/s SHA256-NI: cycles: 34198637428, cycles/i 34198, 411.177 MiB/s BLAKE2-ref: cycles: 14761411664, cycles/i 14761, 952.597 MiB/s BLAKE2-SSE2: cycles: 18101896796, cycles/i 18101, 776.807 MiB/s BLAKE2-SSE41: cycles: 12599091062, cycles/i 12599, 1116.087 MiB/s BLAKE2-AVX2: cycles: 9668247506, cycles/i 9668, 1454.418 MiB/s The new implementation is about 2.5x faster. Note: there new version does not work on musl because of linkage problems (relocations in .rodata), so it's still using the old implementation. Signed-off-by: David Sterba <dsterba@suse.com>
63 lines
1.7 KiB
C
63 lines
1.7 KiB
C
/*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License v2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
/*
|
|
* Detect CPU feature bits at runtime (x86_64 only)
|
|
*/
|
|
|
|
#ifndef __CPU_UTILS_H__
|
|
#define __CPU_UTILS_H__
|
|
|
|
#include <stdbool.h>
|
|
|
|
#define ENUM_CPU_BIT(name) \
|
|
__ ## name ## _BIT, \
|
|
name = (1U << __ ## name ## _BIT), \
|
|
__ ## name ## _SEQ = __ ## name ## _BIT
|
|
|
|
enum cpu_feature {
|
|
ENUM_CPU_BIT(CPU_FLAG_NONE),
|
|
ENUM_CPU_BIT(CPU_FLAG_SSE2),
|
|
ENUM_CPU_BIT(CPU_FLAG_SSSE3),
|
|
ENUM_CPU_BIT(CPU_FLAG_SSE41),
|
|
ENUM_CPU_BIT(CPU_FLAG_SSE42),
|
|
ENUM_CPU_BIT(CPU_FLAG_SHA),
|
|
ENUM_CPU_BIT(CPU_FLAG_AVX),
|
|
ENUM_CPU_BIT(CPU_FLAG_AVX2),
|
|
|
|
/* Special features */
|
|
ENUM_CPU_BIT(CPU_FLAG_CRC32C_PCL),
|
|
};
|
|
|
|
#undef ENUM_CPU_BIT
|
|
|
|
/* Private but in public header to allow inlining */
|
|
extern unsigned long __cpu_flags;
|
|
extern unsigned long __cpu_flags_orig;
|
|
|
|
/* Public API */
|
|
void cpu_detect_flags(void);
|
|
void cpu_set_level(unsigned long topbit);
|
|
void cpu_reset_level(void);
|
|
void cpu_print_flags(void);
|
|
|
|
static inline bool cpu_has_feature(enum cpu_feature f)
|
|
{
|
|
return __cpu_flags & f;
|
|
}
|
|
|
|
#endif
|