[libc] Simplify implementation of benchmarks

This also allows to run the distribution benchmarks in other frameworks
like the Google Benchmark facility.
This commit is contained in:
Guillaume Chatelet 2021-07-28 14:52:29 +00:00
parent 40080e7e7f
commit d3c70d9f77
6 changed files with 296 additions and 220 deletions

View file

@ -160,7 +160,7 @@ function(add_libc_multi_impl_benchmark name)
get_target_property(entrypoint_object_file ${fq_config_name} "OBJECT_FILE_RAW")
target_link_libraries(${benchmark_name} PUBLIC json ${entrypoint_object_file})
string(TOUPPER ${name} name_upper)
target_compile_definitions(${benchmark_name} PRIVATE "-DLIBC_BENCHMARK_FUNCTION_${name_upper}=1" "-DLIBC_BENCHMARK_FUNCTION_NAME=\"${fq_config_name}\"")
target_compile_definitions(${benchmark_name} PRIVATE "-DLIBC_BENCHMARK_FUNCTION_${name_upper}=__llvm_libc::${name}" "-DLIBC_BENCHMARK_FUNCTION_NAME=\"${fq_config_name}\"")
else()
message(STATUS "Skipping benchmark for '${fq_config_name}' insufficient host cpu features '${required_cpu_features}'")
endif()

View file

@ -8,6 +8,7 @@
#include "LibcMemoryBenchmark.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
@ -60,5 +61,70 @@ MismatchOffsetDistribution::MismatchOffsetDistribution(size_t BufferSize,
std::uniform_int_distribution<size_t>(0, MismatchIndices.size() - 1);
}
static size_t getL1DataCacheSize() {
const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches;
const auto IsL1DataCache = [](const CacheInfo &CI) {
return CI.Type == "Data" && CI.Level == 1;
};
const auto CacheIt = find_if(CacheInfos, IsL1DataCache);
if (CacheIt != CacheInfos.end())
return CacheIt->Size;
report_fatal_error("Unable to read L1 Cache Data Size");
}
static size_t getAvailableBufferSize() {
static constexpr int64_t KiB = 1024;
static constexpr int64_t ParameterStorageBytes = 4 * KiB;
static constexpr int64_t L1LeftAsideBytes = 1 * KiB;
return getL1DataCacheSize() - L1LeftAsideBytes - ParameterStorageBytes;
}
ParameterBatch::ParameterBatch(size_t BufferCount)
: BufferSize(getAvailableBufferSize() / BufferCount),
BatchSize(BufferSize / sizeof(ParameterType)), Parameters(BatchSize) {
if (BufferSize <= 0 || BatchSize < 100)
report_fatal_error("Not enough L1 cache");
}
size_t ParameterBatch::getBatchBytes() const {
size_t BatchBytes = 0;
for (auto &P : Parameters)
BatchBytes += P.SizeBytes;
return BatchBytes;
}
void ParameterBatch::checkValid(const ParameterType &P) const {
if (P.OffsetBytes + P.SizeBytes >= BufferSize)
report_fatal_error(
llvm::Twine("Call would result in buffer overflow: Offset=")
.concat(llvm::Twine(P.OffsetBytes))
.concat(", Size=")
.concat(llvm::Twine(P.SizeBytes))
.concat(", BufferSize=")
.concat(llvm::Twine(BufferSize)));
}
const ArrayRef<MemorySizeDistribution> CopyHarness::Distributions =
getMemcpySizeDistributions();
const ArrayRef<MemorySizeDistribution> ComparisonHarness::Distributions =
getMemcmpSizeDistributions();
const ArrayRef<MemorySizeDistribution> SetHarness::Distributions =
getMemsetSizeDistributions();
CopyHarness::CopyHarness()
: ParameterBatch(2), SrcBuffer(ParameterBatch::BufferSize),
DstBuffer(ParameterBatch::BufferSize) {}
ComparisonHarness::ComparisonHarness()
: ParameterBatch(2), LhsBuffer(ParameterBatch::BufferSize),
RhsBuffer(ParameterBatch::BufferSize) {
// The memcmp buffers always compare equal.
memset(LhsBuffer.begin(), 0xF, BufferSize);
memset(RhsBuffer.begin(), 0xF, BufferSize);
}
SetHarness::SetHarness()
: ParameterBatch(1), DstBuffer(ParameterBatch::BufferSize) {}
} // namespace libc_benchmarks
} // namespace llvm

View file

@ -13,6 +13,7 @@
#define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
#include "LibcBenchmark.h"
#include "MemorySizeDistributions.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Alignment.h"
#include <cstdint>
@ -162,6 +163,87 @@ public:
}
};
/// This structure holds a vector of ParameterType.
/// It makes sure that BufferCount x BufferSize Bytes and the vector of
/// ParameterType can all fit in the L1 cache.
struct ParameterBatch {
struct ParameterType {
unsigned OffsetBytes : 16; // max : 16 KiB - 1
unsigned SizeBytes : 16; // max : 16 KiB - 1
};
ParameterBatch(size_t BufferCount);
/// Verifies that memory accessed through this parameter is valid.
void checkValid(const ParameterType &) const;
/// Computes the number of bytes processed during within this batch.
size_t getBatchBytes() const;
const size_t BufferSize;
const size_t BatchSize;
std::vector<ParameterType> Parameters;
};
/// Provides source and destination buffers for the Copy operation as well as
/// the associated size distributions.
struct CopyHarness : public ParameterBatch {
CopyHarness();
static const ArrayRef<MemorySizeDistribution> Distributions;
inline void *Call(ParameterType Parameter,
void *(*memcpy)(void *__restrict, const void *__restrict,
size_t)) {
return memcpy(DstBuffer + Parameter.OffsetBytes,
SrcBuffer + Parameter.OffsetBytes, Parameter.SizeBytes);
}
private:
AlignedBuffer SrcBuffer;
AlignedBuffer DstBuffer;
};
/// Provides destination buffer for the Set operation as well as the associated
/// size distributions.
struct SetHarness : public ParameterBatch {
SetHarness();
static const ArrayRef<MemorySizeDistribution> Distributions;
inline void *Call(ParameterType Parameter,
void *(*memset)(void *, int, size_t)) {
return memset(DstBuffer + Parameter.OffsetBytes,
Parameter.OffsetBytes % 0xFF, Parameter.SizeBytes);
}
inline void *Call(ParameterType Parameter, void (*bzero)(void *, size_t)) {
bzero(DstBuffer + Parameter.OffsetBytes, Parameter.SizeBytes);
return DstBuffer.begin();
}
private:
AlignedBuffer DstBuffer;
};
/// Provides left and right buffers for the Comparison operation as well as the
/// associated size distributions.
struct ComparisonHarness : public ParameterBatch {
ComparisonHarness();
static const ArrayRef<MemorySizeDistribution> Distributions;
inline int Call(ParameterType Parameter,
int (*memcmp)(const void *, const void *, size_t)) {
return memcmp(LhsBuffer + Parameter.OffsetBytes,
RhsBuffer + Parameter.OffsetBytes, Parameter.SizeBytes);
}
private:
AlignedBuffer LhsBuffer;
AlignedBuffer RhsBuffer;
};
} // namespace libc_benchmarks
} // namespace llvm

View file

@ -14,10 +14,12 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <cstring>
#include <unistd.h>
namespace __llvm_libc {
@ -62,172 +64,43 @@ static cl::opt<uint32_t>
NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"),
cl::init(1));
static constexpr int64_t KiB = 1024;
static constexpr int64_t ParameterStorageBytes = 4 * KiB;
static constexpr int64_t L1LeftAsideBytes = 1 * KiB;
struct ParameterType {
unsigned OffsetBytes : 16; // max : 16 KiB - 1
unsigned SizeBytes : 16; // max : 16 KiB - 1
};
#if defined(LIBC_BENCHMARK_FUNCTION_MEMCPY)
struct Benchmark {
static constexpr auto GetDistributions = &getMemcpySizeDistributions;
static constexpr size_t BufferCount = 2;
Benchmark(const size_t BufferSize)
: SrcBuffer(BufferSize), DstBuffer(BufferSize) {}
inline auto functor() {
return [this](ParameterType P) {
__llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes,
P.SizeBytes);
return DstBuffer[P.OffsetBytes];
};
}
AlignedBuffer SrcBuffer;
AlignedBuffer DstBuffer;
};
#define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_MEMCPY
using BenchmarkHarness = CopyHarness;
#elif defined(LIBC_BENCHMARK_FUNCTION_MEMSET)
struct Benchmark {
static constexpr auto GetDistributions = &getMemsetSizeDistributions;
static constexpr size_t BufferCount = 1;
Benchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}
inline auto functor() {
return [this](ParameterType P) {
__llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF,
P.SizeBytes);
return DstBuffer[P.OffsetBytes];
};
}
AlignedBuffer DstBuffer;
};
#define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_MEMSET
using BenchmarkHarness = SetHarness;
#elif defined(LIBC_BENCHMARK_FUNCTION_BZERO)
struct Benchmark {
static constexpr auto GetDistributions = &getMemsetSizeDistributions;
static constexpr size_t BufferCount = 1;
Benchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}
inline auto functor() {
return [this](ParameterType P) {
__llvm_libc::bzero(DstBuffer + P.OffsetBytes, P.SizeBytes);
return DstBuffer[P.OffsetBytes];
};
}
AlignedBuffer DstBuffer;
};
#define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_BZERO
using BenchmarkHarness = SetHarness;
#elif defined(LIBC_BENCHMARK_FUNCTION_MEMCMP)
struct Benchmark {
static constexpr auto GetDistributions = &getMemcmpSizeDistributions;
static constexpr size_t BufferCount = 2;
Benchmark(const size_t BufferSize)
: BufferA(BufferSize), BufferB(BufferSize) {
// The memcmp buffers always compare equal.
memset(BufferA.begin(), 0xF, BufferSize);
memset(BufferB.begin(), 0xF, BufferSize);
}
inline auto functor() {
return [this](ParameterType P) {
return __llvm_libc::memcmp(BufferA + P.OffsetBytes,
BufferB + P.OffsetBytes, P.SizeBytes);
};
}
AlignedBuffer BufferA;
AlignedBuffer BufferB;
};
#define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_MEMCMP
using BenchmarkHarness = ComparisonHarness;
#else
#error "Missing LIBC_BENCHMARK_FUNCTION_XXX definition"
#endif
struct Harness : Benchmark {
Harness(const size_t BufferSize, size_t BatchParameterCount,
std::function<unsigned()> SizeSampler,
std::function<unsigned()> OffsetSampler)
: Benchmark(BufferSize), BufferSize(BufferSize),
Parameters(BatchParameterCount), SizeSampler(SizeSampler),
OffsetSampler(OffsetSampler) {}
struct MemfunctionBenchmarkBase : public BenchmarkHarness {
MemfunctionBenchmarkBase() : ReportProgress(isatty(fileno(stdout))) {}
virtual ~MemfunctionBenchmarkBase() {}
CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
for (auto &P : Parameters) {
P.OffsetBytes = OffsetSampler();
P.SizeBytes = SizeSampler();
if (P.OffsetBytes + P.SizeBytes >= BufferSize)
report_fatal_error("Call would result in buffer overflow");
}
virtual Study run() = 0;
CircularArrayRef<ParameterBatch::ParameterType>
generateBatch(size_t Iterations) {
randomize();
return cycle(makeArrayRef(Parameters), Iterations);
}
private:
const size_t BufferSize;
std::vector<ParameterType> Parameters;
std::function<unsigned()> SizeSampler;
std::function<unsigned()> OffsetSampler;
};
size_t getL1DataCacheSize() {
const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches;
const auto IsL1DataCache = [](const CacheInfo &CI) {
return CI.Type == "Data" && CI.Level == 1;
};
const auto CacheIt = find_if(CacheInfos, IsL1DataCache);
if (CacheIt != CacheInfos.end())
return CacheIt->Size;
report_fatal_error("Unable to read L1 Cache Data Size");
}
struct MemfunctionBenchmark {
MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize())
: AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes),
BufferSize(AvailableSize / Benchmark::BufferCount),
BatchParameterCount(BufferSize / sizeof(ParameterType)) {
// Handling command line flags
if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100)
report_fatal_error("Not enough L1 cache");
if (!isPowerOfTwoOrZero(AlignedAccess))
report_fatal_error(AlignedAccess.ArgStr +
Twine(" must be a power of two or zero"));
const bool HasDistributionName = !SizeDistributionName.empty();
if (SweepMode && HasDistributionName)
report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
"` or `--" + Twine(SizeDistributionName.ArgStr) + "`");
if (SweepMode) {
MaxSizeValue = SweepMaxSize;
} else {
std::map<StringRef, MemorySizeDistribution> Map;
for (MemorySizeDistribution Distribution : Benchmark::GetDistributions())
Map[Distribution.Name] = Distribution;
if (Map.count(SizeDistributionName) == 0) {
std::string Message;
raw_string_ostream Stream(Message);
Stream << "Unknown --" << SizeDistributionName.ArgStr << "='"
<< SizeDistributionName << "', available distributions:\n";
for (const auto &Pair : Map)
Stream << "'" << Pair.first << "'\n";
report_fatal_error(Stream.str());
}
SizeDistribution = Map[SizeDistributionName];
MaxSizeValue = SizeDistribution.Probabilities.size() - 1;
}
// Setup study.
protected:
Study createStudy() {
Study Study;
// Harness study.
Study.StudyName = StudyName;
Runtime &RI = Study.Runtime;
RI.Host = HostState::get();
RI.BufferSize = BufferSize;
RI.BatchParameterCount = BatchParameterCount;
RI.BatchParameterCount = BatchSize;
BenchmarkOptions &BO = RI.BenchmarkOptions;
BO.MinDuration = std::chrono::milliseconds(1);
@ -241,56 +114,34 @@ struct MemfunctionBenchmark {
StudyConfiguration &SC = Study.Configuration;
SC.NumTrials = NumTrials;
SC.IsSweepMode = SweepMode;
if (SweepMode)
SC.SweepModeMaxSize = SweepMaxSize;
else
SC.SizeDistributionName = SizeDistributionName;
SC.AccessAlignment = MaybeAlign(AlignedAccess);
SC.Function = LIBC_BENCHMARK_FUNCTION_NAME;
}
Study run() {
if (SweepMode)
runSweepMode();
else
runDistributionMode();
return Study;
}
void runTrials(const BenchmarkOptions &Options,
std::vector<Duration> &Measurements) {
for (size_t i = 0; i < NumTrials; ++i) {
const BenchmarkResult Result = benchmark(
Options, *this, [this](ParameterBatch::ParameterType Parameter) {
return Call(Parameter, LIBC_BENCHMARK_FUNCTION);
});
Measurements.push_back(Result.BestGuess);
reportProgress(Measurements);
}
}
virtual void randomize() = 0;
private:
const int64_t AvailableSize;
const int64_t BufferSize;
const size_t BatchParameterCount;
size_t MaxSizeValue = 0;
MemorySizeDistribution SizeDistribution;
Study Study;
std::mt19937_64 Gen;
bool ReportProgress;
static constexpr bool isPowerOfTwoOrZero(size_t Value) {
return (Value & (Value - 1U)) == 0;
}
std::function<unsigned()> geOffsetSampler() {
return [this]() {
static OffsetDistribution OD(BufferSize, MaxSizeValue,
Study.Configuration.AccessAlignment);
return OD(Gen);
};
}
std::function<unsigned()> getSizeSampler() {
return [this]() {
static std::discrete_distribution<unsigned> Distribution(
SizeDistribution.Probabilities.begin(),
SizeDistribution.Probabilities.end());
return Distribution(Gen);
};
}
void reportProgress() {
void reportProgress(const std::vector<Duration> &Measurements) {
if (!ReportProgress)
return;
static size_t LastPercent = -1;
const size_t TotalSteps = Study.Measurements.capacity();
const size_t Steps = Study.Measurements.size();
const size_t TotalSteps = Measurements.capacity();
const size_t Steps = Measurements.size();
const size_t Percent = 100 * Steps / TotalSteps;
if (Percent == LastPercent)
return;
@ -303,40 +154,76 @@ private:
errs() << '_';
errs() << "] " << Percent << '%' << '\r';
}
};
void runTrials(const BenchmarkOptions &Options,
std::function<unsigned()> SizeSampler,
std::function<unsigned()> OffsetSampler) {
Harness B(BufferSize, BatchParameterCount, SizeSampler, OffsetSampler);
for (size_t i = 0; i < NumTrials; ++i) {
const BenchmarkResult Result = benchmark(Options, B, B.functor());
Study.Measurements.push_back(Result.BestGuess);
reportProgress();
struct MemfunctionBenchmarkSweep final : public MemfunctionBenchmarkBase {
MemfunctionBenchmarkSweep()
: OffsetSampler(MemfunctionBenchmarkBase::BufferSize, SweepMaxSize,
MaybeAlign(AlignedAccess)) {}
virtual void randomize() override {
for (auto &P : Parameters) {
P.OffsetBytes = OffsetSampler(Gen);
P.SizeBytes = CurrentSweepSize;
checkValid(P);
}
}
void runSweepMode() {
Study.Measurements.reserve(NumTrials * SweepMaxSize);
virtual Study run() override {
Study Study = createStudy();
Study.Configuration.SweepModeMaxSize = SweepMaxSize;
BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
BO.MinDuration = std::chrono::milliseconds(1);
BO.InitialIterations = 100;
auto &Measurements = Study.Measurements;
Measurements.reserve(NumTrials * SweepMaxSize);
for (size_t Size = 0; Size <= SweepMaxSize; ++Size) {
const auto SizeSampler = [Size]() { return Size; };
runTrials(BO, SizeSampler, geOffsetSampler());
CurrentSweepSize = Size;
runTrials(BO, Measurements);
}
return Study;
}
private:
size_t CurrentSweepSize = 0;
OffsetDistribution OffsetSampler;
std::mt19937_64 Gen;
};
struct MemfunctionBenchmarkDistribution final
: public MemfunctionBenchmarkBase {
MemfunctionBenchmarkDistribution(MemorySizeDistribution Distribution)
: Distribution(Distribution), Probabilities(Distribution.Probabilities),
SizeSampler(Probabilities.begin(), Probabilities.end()),
OffsetSampler(MemfunctionBenchmarkBase::BufferSize,
Probabilities.size() - 1, MaybeAlign(AlignedAccess)) {}
virtual void randomize() override {
for (auto &P : Parameters) {
P.OffsetBytes = OffsetSampler(Gen);
P.SizeBytes = SizeSampler(Gen);
checkValid(P);
}
}
void runDistributionMode() {
Study.Measurements.reserve(NumTrials);
virtual Study run() override {
Study Study = createStudy();
Study.Configuration.SizeDistributionName = Distribution.Name.str();
BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
BO.MinDuration = std::chrono::milliseconds(10);
BO.InitialIterations = BatchParameterCount * 10;
runTrials(BO, getSizeSampler(), geOffsetSampler());
BO.InitialIterations = BatchSize * 10;
auto &Measurements = Study.Measurements;
Measurements.reserve(NumTrials);
runTrials(BO, Measurements);
return Study;
}
private:
MemorySizeDistribution Distribution;
ArrayRef<double> Probabilities;
std::discrete_distribution<unsigned> SizeSampler;
OffsetDistribution OffsetSampler;
std::mt19937_64 Gen;
};
void writeStudy(const Study &S) {
@ -354,20 +241,33 @@ void writeStudy(const Study &S) {
void main() {
checkRequirements();
MemfunctionBenchmark MB;
writeStudy(MB.run());
if (!isPowerOf2_32(AlignedAccess))
report_fatal_error(AlignedAccess.ArgStr +
Twine(" must be a power of two or zero"));
const bool HasDistributionName = !SizeDistributionName.empty();
if (SweepMode && HasDistributionName)
report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
"` or `--" + Twine(SizeDistributionName.ArgStr) + "`");
std::unique_ptr<MemfunctionBenchmarkBase> Benchmark;
if (SweepMode)
Benchmark.reset(new MemfunctionBenchmarkSweep());
else
Benchmark.reset(new MemfunctionBenchmarkDistribution(getDistributionOrDie(
BenchmarkHarness::Distributions, SizeDistributionName)));
writeStudy(Benchmark->run());
}
} // namespace libc_benchmarks
} // namespace llvm
#ifndef NDEBUG
#error For reproducibility benchmarks should not be compiled in DEBUG mode.
#endif
int main(int argc, char **argv) {
llvm::cl::ParseCommandLineOptions(argc, argv);
#ifndef NDEBUG
static_assert(
false,
"For reproducibility benchmarks should not be compiled in DEBUG mode.");
#endif
llvm::libc_benchmarks::main();
return EXIT_SUCCESS;
}

View file

@ -1,5 +1,8 @@
#include "MemorySizeDistributions.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace libc_benchmarks {
@ -135,5 +138,24 @@ ArrayRef<MemorySizeDistribution> getMemcmpSizeDistributions() {
};
return kDistributions;
}
MemorySizeDistribution
getDistributionOrDie(ArrayRef<MemorySizeDistribution> Distributions,
StringRef Name) {
size_t Index = 0;
for (const auto &MSD : Distributions) {
if (MSD.Name == Name)
return MSD;
++Index;
}
std::string Message;
raw_string_ostream Stream(Message);
Stream << "Unknown MemorySizeDistribution '" << Name
<< "', available distributions:\n";
for (const auto &MSD : Distributions)
Stream << "'" << MSD.Name << "'\n";
report_fatal_error(Stream.str());
}
} // namespace libc_benchmarks
} // namespace llvm

View file

@ -38,6 +38,12 @@ ArrayRef<MemorySizeDistribution> getMemsetSizeDistributions();
/// Returns a list of memcmp size distributions.
ArrayRef<MemorySizeDistribution> getMemcmpSizeDistributions();
/// Returns the first MemorySizeDistribution from Distributions with the
/// specified Name.
MemorySizeDistribution
getDistributionOrDie(ArrayRef<MemorySizeDistribution> Distributions,
StringRef Name);
} // namespace libc_benchmarks
} // namespace llvm