[libc] CopyAlignedBlocks can now specify alignment on top of block size
This has been requested in D92236 Differential Revision: https://reviews.llvm.org/D94770
This commit is contained in:
parent
cf0173de69
commit
5bf47e142b
|
@ -72,28 +72,35 @@ static void CopyBlockOverlap(char *__restrict dst, const char *__restrict src,
|
|||
|
||||
// Copies `count` bytes by blocks of `kBlockSize` bytes.
|
||||
// Copies at the start and end of the buffer are unaligned.
|
||||
// Copies in the middle of the buffer are aligned to `kBlockSize`.
|
||||
// Copies in the middle of the buffer are aligned to `kAlignment`.
|
||||
//
|
||||
// e.g. with
|
||||
// [12345678123456781234567812345678]
|
||||
// [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___]
|
||||
// [__XXXXXXXX______________________]
|
||||
// [________XXXXXXXX________________]
|
||||
// [________________XXXXXXXX________]
|
||||
// [_____________________XXXXXXXX___]
|
||||
// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
|
||||
// [__XXXX___________________________]
|
||||
// [_____XXXXXXXX____________________]
|
||||
// [_____________XXXXXXXX____________]
|
||||
// [_____________________XXXXXXXX____]
|
||||
// [______________________XXXXXXXX___]
|
||||
//
|
||||
// Precondition: `count > 2 * kBlockSize` for efficiency.
|
||||
// `count >= kBlockSize` for correctness.
|
||||
template <size_t kBlockSize>
|
||||
// Precondition: `kAlignment <= kBlockSize`
|
||||
// `count > 2 * kBlockSize` for efficiency.
|
||||
// `count >= kAlignment` for correctness.
|
||||
template <size_t kBlockSize, size_t kAlignment = kBlockSize>
|
||||
static void CopyAlignedBlocks(char *__restrict dst, const char *__restrict src,
|
||||
size_t count) {
|
||||
CopyBlock<kBlockSize>(dst, src); // Copy first block
|
||||
static_assert(is_power2(kAlignment), "kAlignment must be a power of two");
|
||||
static_assert(is_power2(kBlockSize), "kBlockSize must be a power of two");
|
||||
static_assert(kAlignment <= kBlockSize,
|
||||
"kAlignment must be less or equal to block size");
|
||||
CopyBlock<kAlignment>(dst, src); // Copy first block
|
||||
|
||||
// Copy aligned blocks
|
||||
const size_t ofla = offset_from_last_aligned<kBlockSize>(src);
|
||||
const size_t ofla = offset_from_last_aligned<kAlignment>(src);
|
||||
const size_t limit = count + ofla - kBlockSize;
|
||||
for (size_t offset = kBlockSize; offset < limit; offset += kBlockSize)
|
||||
CopyBlock<kBlockSize>(dst - ofla + offset, src - ofla + offset);
|
||||
for (size_t offset = kAlignment; offset < limit; offset += kBlockSize)
|
||||
CopyBlock<kBlockSize>(dst - ofla + offset,
|
||||
assume_aligned<kAlignment>(src - ofla + offset));
|
||||
|
||||
CopyLastBlock<kBlockSize>(dst, src, count); // Copy last block
|
||||
}
|
||||
|
|
|
@ -60,6 +60,10 @@ static inline intptr_t offset_to_next_cache_line(const void *ptr) {
|
|||
return offset_to_next_aligned<LLVM_LIBC_CACHELINE_SIZE>(ptr);
|
||||
}
|
||||
|
||||
template <size_t alignment, typename T> static T *assume_aligned(T *ptr) {
|
||||
return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
||||
|
||||
#endif // LLVM_LIBC_SRC_MEMORY_UTILS_H
|
||||
|
|
|
@ -211,7 +211,24 @@ TEST(MemcpyUtilsTest, CopyAlignedBlocks) {
|
|||
EXPECT_STREQ(trace.Read(), "011121111111");
|
||||
}
|
||||
|
||||
TEST(MemcpyUtilsTest, MaxReloads) {
|
||||
TEST(MemcpyUtilsTest, CopyAlignedBlocksWithAlignment) {
|
||||
auto &trace = GetTrace();
|
||||
// Source is aligned and multiple of alignment.
|
||||
// "11111111"
|
||||
trace.Clear();
|
||||
CopyAlignedBlocks<8, 4>(I(0), I(0), 8);
|
||||
EXPECT_STREQ(trace.Write(), "22221111");
|
||||
EXPECT_STREQ(trace.Read(), "22221111");
|
||||
|
||||
// Source is aligned and multiple of alignment.
|
||||
// "111111111"
|
||||
trace.Clear();
|
||||
CopyAlignedBlocks<8, 4>(I(0), I(0), 9);
|
||||
EXPECT_STREQ(trace.Write(), "122211111");
|
||||
EXPECT_STREQ(trace.Read(), "122211111");
|
||||
}
|
||||
|
||||
TEST(MemcpyUtilsTest, CopyAlignedBlocksMaxReloads) {
|
||||
auto &trace = GetTrace();
|
||||
for (size_t alignment = 0; alignment < 32; ++alignment) {
|
||||
for (size_t count = 64; count < 768; ++count) {
|
||||
|
@ -231,4 +248,24 @@ TEST(MemcpyUtilsTest, MaxReloads) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(MemcpyUtilsTest, CopyAlignedBlocksWithAlignmentMaxReloads) {
|
||||
auto &trace = GetTrace();
|
||||
for (size_t alignment = 0; alignment < 32; ++alignment) {
|
||||
for (size_t count = 64; count < 768; ++count) {
|
||||
trace.Clear();
|
||||
// We should never reload more than twice when copying from count = 2x32.
|
||||
CopyAlignedBlocks<32, 16>(I(alignment), I(0), count);
|
||||
const char *const written = trace.Write();
|
||||
// First bytes are untouched.
|
||||
for (size_t i = 0; i < alignment; ++i)
|
||||
EXPECT_EQ(written[i], '0');
|
||||
// Next bytes are loaded once or twice but no more.
|
||||
for (size_t i = alignment; i < count; ++i) {
|
||||
EXPECT_GE(written[i], '1');
|
||||
EXPECT_LE(written[i], '2');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
||||
|
|
Loading…
Reference in a new issue