From 9527bc0649f0815ba66b187ee4f1abff1e6cc1b1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 1 Jun 2021 21:41:53 +0200 Subject: [PATCH] btrfs-progs: crypto: add perf support to speed test Use perf events to read the cycle count, this should work on all architectures. Enabled by option --perf and the sysctl kernel.perf_event_paranoid must be 0 or 1. The results are roughly the same as for raw cycles on x86_64 but worse because of the additional overhead (read, context switch): Block size: 4096 Iterations: 100000 Implementation: builtin Units: CPU cycles NULL-NOP: cycles: 42719688, cycles/i 427 NULL-MEMCPY: cycles: 72941208, cycles/i 729, 18670.314 MiB/s CRC32C: cycles: 183709926, cycles/i 1837, 7413.009 MiB/s XXHASH: cycles: 136727614, cycles/i 1367, 9960.264 MiB/s SHA256: cycles: 10711594532, cycles/i 107115, 127.137 MiB/s BLAKE2: cycles: 2256957529, cycles/i 22569, 603.398 MiB/s Block size: 4096 Iterations: 100000 Implementation: builtin Units: perf event: CPU cycles NULL-NOP: perf_c: 29649530, perf_c/i 296 NULL-MEMCPY: perf_c: 59954062, perf_c/i 599, 15137.464 MiB/s CRC32C: perf_c: 179009071, perf_c/i 1790, 6929.460 MiB/s XXHASH: perf_c: 136413509, perf_c/i 1364, 9982.950 MiB/s SHA256: perf_c: 10997356664, perf_c/i 109973, 127.046 MiB/s BLAKE2: perf_c: 2379077576, perf_c/i 23790, 588.780 MiB/s Signed-off-by: David Sterba --- configure.ac | 3 ++ crypto/hash-speedtest.c | 116 ++++++++++++++++++++++++++++++++++------ 2 files changed, 104 insertions(+), 15 deletions(-) diff --git a/configure.ac b/configure.ac index 1efee7ed..c4fa461c 100644 --- a/configure.ac +++ b/configure.ac @@ -60,6 +60,9 @@ AC_CHECK_FUNCS([reallocarray]) AC_CHECK_FUNCS([clock_gettime]) +AC_CHECK_HEADERS([linux/perf_event.h]) +AC_CHECK_HEADERS([linux/hw_breakpoint.h]) + m4_ifndef([PKG_PROG_PKG_CONFIG], [m4_fatal([Could not locate the pkg-config autoconf macros. These are usually located in /usr/share/aclocal/pkg.m4. diff --git a/crypto/hash-speedtest.c b/crypto/hash-speedtest.c index af6194e3..0e6a3b84 100644 --- a/crypto/hash-speedtest.c +++ b/crypto/hash-speedtest.c @@ -1,6 +1,13 @@ #include "../kerncompat.h" #include #include +#include +#if HAVE_LINUX_PERF_EVENT_H == 1 && HAVE_LINUX_HW_BREAKPOINT_H == 1 +#include +#include +#include +#define HAVE_PERF +#endif #include "crypto/hash.h" #include "crypto/crc32c.h" #include "crypto/sha.h" @@ -12,6 +19,12 @@ static const int cycles_supported = 1; static const int cycles_supported = 0; #endif +enum { + UNITS_CYCLES, + UNITS_TIME, + UNITS_PERF, +}; + const int blocksize = 4096; int iterations = 100000; @@ -31,14 +44,56 @@ static inline u64 read_tsc(void) return rdtsc(); } -#define get_cycles() read_tsc() +#define cpu_cycles() read_tsc() #else -#define get_cycles() (0) +#define cpu_cycles() (0) #endif +#ifdef HAVE_PERF + +static int perf_fd = -1; +static int perf_init(void) +{ + static struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES + }; + + perf_fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); + return perf_fd; +} + +static void perf_finish(void) +{ + close(perf_fd); +} + +static long long perf_cycles(void) +{ + long long cycles; + int ret; + + ret = read(perf_fd, &cycles, sizeof(cycles)); + if (ret != sizeof(cycles)) + return 0; + return cycles; +} + +#else +static int perf_init() +{ + errno = EOPNOTSUPP; + return -1; +} +static void perf_finish() {} +static long long perf_cycles() { + return 0; +} +#endif + static inline u64 get_time(void) { struct timespec ts; @@ -47,6 +102,16 @@ static inline u64 get_time(void) return ts.tv_sec * 1000 * 1000 * 1000 + ts.tv_nsec; } +static inline u64 get_cycles(int units) +{ + switch (units) { + case UNITS_CYCLES: return cpu_cycles(); + case UNITS_TIME: return get_time(); + case UNITS_PERF: return perf_cycles(); + } + return 0; +} + /* Read the input and copy last bytes as the hash */ static int hash_null_memcpy(const u8 *buf, size_t length, u8 *out) { @@ -68,11 +133,22 @@ static int hash_null_nop(const u8 *buf, size_t length, u8 *out) return 0; } -const char *units_to_str(int units) +static const char *units_to_desc(int units) { switch (units) { - case 0: return "cycles"; - case 1: return "nsecs"; + case UNITS_CYCLES: return "CPU cycles"; + case UNITS_TIME: return "time: ns"; + case UNITS_PERF: return "perf event: CPU cycles"; + } + return "unknown"; +} + +static const char *units_to_str(int units) +{ + switch (units) { + case UNITS_CYCLES: return "cycles"; + case UNITS_TIME: return "nsecs"; + case UNITS_PERF: return "perf_c"; } return "unknown"; } @@ -96,18 +172,19 @@ int main(int argc, char **argv) { { .name = "SHA256", .digest = hash_sha256, .digest_size = 32 }, { .name = "BLAKE2", .digest = hash_blake2b, .digest_size = 32 }, }; - int units = 0; + int units = UNITS_CYCLES; optind = 0; while (1) { static const struct option long_options[] = { { "cycles", no_argument, NULL, 'c' }, { "time", no_argument, NULL, 't' }, + { "perf", no_argument, NULL, 'p' }, { NULL, 0, NULL, 0} }; int c; - c = getopt_long(argc, argv, "ct", long_options, NULL); + c = getopt_long(argc, argv, "ctp", long_options, NULL); if (c < 0) break; switch (c) { @@ -117,10 +194,18 @@ int main(int argc, char **argv) { "ERROR: cannot measure cycles on this arch, use --time\n"); return 1; } - units = 0; + units = UNITS_CYCLES; break; case 't': - units = 1; + units = UNITS_TIME; + break; + case 'p': + if (perf_init() == -1) { + fprintf(stderr, +"ERROR: cannot initialize perf, please check sysctl kernel.perf_event_paranoid: %m\n"); + return 1; + } + units = UNITS_PERF; break; default: fprintf(stderr, "ERROR: unknown option\n"); @@ -140,33 +225,33 @@ int main(int argc, char **argv) { printf("Block size: %d\n", blocksize); printf("Iterations: %d\n", iterations); printf("Implementation: %s\n", CRYPTOPROVIDER); - printf("Units: %s\n", units_to_str(units)); + printf("Units: %s\n", units_to_desc(units)); printf("\n"); for (idx = 0; idx < ARRAY_SIZE(contestants); idx++) { struct contestant *c = &contestants[idx]; u64 start, end; u64 tstart, tend; - u64 total; + u64 total = 0; printf("%12s: ", c->name); fflush(stdout); tstart = get_time(); - start = get_cycles(); + start = get_cycles(units); for (iter = 0; iter < iterations; iter++) { memset(buf, iter & 0xFF, blocksize); memset(hash, 0, 32); c->digest(buf, blocksize, hash); } - end = get_cycles(); + end = get_cycles(units); tend = get_time(); c->cycles = end - start; c->time = tend - tstart; - if (units == 0) + if (units == UNITS_CYCLES || units == UNITS_PERF) total = c->cycles; - if (units == 1) + if (units == UNITS_TIME) total = c->time; printf("%s: %12llu, %s/i %8llu", @@ -182,6 +267,7 @@ int main(int argc, char **argv) { } putchar('\n'); } + perf_finish(); return 0; }