[llvm][profile] Add padding after binary IDs

Some tests with binary IDs would fail with error: no profile can be merged.
This is because raw profiles could have unaligned headers when emitting binary
IDs. This means padding should be emitted after binary IDs are emitted to
ensure everything else is aligned. This patch adds padding after each binary ID
to ensure the next binary ID size is 8-byte aligned. This also adds extra
checks to ensure we aren't reading corrupted data when printing binary IDs.

Differential Revision: https://reviews.llvm.org/D110365
This commit is contained in:
Leonard Chan 2021-09-28 11:49:37 -07:00
parent 9b944c1843
commit b9f547e8e5
7 changed files with 256 additions and 8 deletions

View file

@ -95,10 +95,13 @@ static size_t RoundUp(size_t size, size_t align) {
* have a fixed length.
*/
static int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
const uint8_t *BinaryIdData) {
const uint8_t *BinaryIdData,
uint64_t BinaryIdPadding) {
ProfDataIOVec BinaryIdIOVec[] = {
{&BinaryIdLen, sizeof(uint64_t), 1, 0},
{BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0}};
{BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0},
{NULL, sizeof(uint8_t), BinaryIdPadding, 1},
};
if (Writer->Write(Writer, BinaryIdIOVec,
sizeof(BinaryIdIOVec) / sizeof(*BinaryIdIOVec)))
return -1;
@ -130,11 +133,12 @@ static int WriteBinaryIdForNote(ProfDataWriter *Writer,
uint64_t BinaryIdLen = Note->n_descsz;
const uint8_t *BinaryIdData =
(const uint8_t *)(NoteName + RoundUp(Note->n_namesz, 4));
if (Writer != NULL &&
WriteOneBinaryId(Writer, BinaryIdLen, BinaryIdData) == -1)
uint8_t BinaryIdPadding = __llvm_profile_get_num_padding_bytes(BinaryIdLen);
if (Writer != NULL && WriteOneBinaryId(Writer, BinaryIdLen, BinaryIdData,
BinaryIdPadding) == -1)
return -1;
BinaryIdSize = sizeof(BinaryIdLen) + BinaryIdLen;
BinaryIdSize = sizeof(BinaryIdLen) + BinaryIdLen + BinaryIdPadding;
}
return BinaryIdSize;

View file

@ -0,0 +1,82 @@
// Set these requirements to ensure that we have an 8-byte binary ID.
// REQUIRES: linux
//
// This will generate a 20-byte build ID, which requires 4-byte padding.
// RUN: %clang_profgen -Wl,--build-id=sha1 -o %t %s
// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
// RUN: %run %t %t.profraw
#include <errno.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
enum ValueKind {
#define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value,
#include "profile/InstrProfData.inc"
};
typedef struct __llvm_profile_header {
#define INSTR_PROF_RAW_HEADER(Type, Name, Initializer) Type Name;
#include "profile/InstrProfData.inc"
} __llvm_profile_header;
typedef void *IntPtrT;
typedef struct __llvm_profile_data {
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name;
#include "profile/InstrProfData.inc"
} __llvm_profile_data;
void bail(const char* str) {
fprintf(stderr, "%s %s\n", str, strerror(errno));
exit(1);
}
void func() {}
int main(int argc, char** argv) {
if (argc == 2) {
int fd = open(argv[1], O_RDONLY);
if (fd == -1)
bail("open");
struct stat st;
if (stat(argv[1], &st))
bail("stat");
uint64_t FileSize = st.st_size;
char* Buf = (char *)mmap(NULL, FileSize, PROT_READ, MAP_SHARED, fd, 0);
if (Buf == MAP_FAILED)
bail("mmap");
__llvm_profile_header *Header = (__llvm_profile_header *)Buf;
if (Header->BinaryIdsSize != 32)
bail("Invalid binary ID size");
char *BinaryIdsStart = Buf + sizeof(__llvm_profile_header);
uint64_t BinaryIdSize = *((uint64_t *)BinaryIdsStart);
if (BinaryIdSize != 20)
bail("Expected a binary ID of size 20");
// Skip the size and the binary ID itself to check padding.
BinaryIdsStart += 8 + 20;
if (*((uint32_t *)BinaryIdsStart))
bail("Found non-zero binary ID padding");
if (munmap(Buf, FileSize))
bail("munmap");
if (close(fd))
bail("close");
} else {
func();
}
return 0;
}

View file

@ -367,6 +367,9 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
return error(instrprof_error::unsupported_version);
BinaryIdsSize = swap(Header.BinaryIdsSize);
if (BinaryIdsSize % sizeof(uint64_t))
return error(instrprof_error::bad_header);
CountersDelta = swap(Header.CountersDelta);
NamesDelta = swap(Header.NamesDelta);
auto DataSize = swap(Header.DataSize);
@ -402,6 +405,10 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
NamesStart = Start + NamesOffset;
ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
if (BinaryIdsStart + BinaryIdsSize > BufferEnd)
return error(instrprof_error::bad_header);
std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
if (Error E = createSymtab(*NewSymtab.get()))
return E;
@ -520,6 +527,10 @@ Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record)
return success();
}
static size_t RoundUp(size_t size, size_t align) {
return (size + align - 1) & ~(align - 1);
}
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
if (BinaryIdsSize == 0)
@ -527,8 +538,21 @@ Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
OS << "Binary IDs: \n";
const uint8_t *BI = BinaryIdsStart;
while (BI < BinaryIdsStart + BinaryIdsSize) {
const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
while (BI < BIEnd) {
size_t Remaining = BIEnd - BI;
// There should be enough left to read the binary ID size field.
if (Remaining < sizeof(uint64_t))
return make_error<InstrProfError>(instrprof_error::malformed);
uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI));
// There should be enough left to read the binary ID size field, and the
// binary ID.
if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen)
return make_error<InstrProfError>(instrprof_error::malformed);
// Increment by binary id length data type size.
BI += sizeof(BinaryIdLen);
if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
@ -538,8 +562,9 @@ Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
OS << format("%02x", BI[I]);
OS << "\n";
// Increment by binary id data length.
BI += BinaryIdLen;
// Increment by binary id data length, rounded to the next 8 bytes. This
// accounts for the zero-padding after each build ID.
BI += RoundUp(BinaryIdLen, sizeof(uint64_t));
if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
return make_error<InstrProfError>(instrprof_error::malformed);
}

View file

@ -0,0 +1,72 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
// INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
// INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
// INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
RUN: printf '\201rforpl\377' > %t.profraw
RUN: printf '\7\0\0\0\0\0\0\0' >> %t.profraw
// There will be 2 20-byte binary IDs, so the total Binary IDs size will be 64 bytes.
// 2 * 8 binary ID sizes
// + 2 * 20 binary IDs (of size 20)
// + 2 * 4 binary ID paddings
// --------
// = 64 bytes
RUN: printf '\100\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\2\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\3\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\20\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
// Binary IDs - There are only two in this case that are 20 bytes.
RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
RUN: printf '\0\1\2\3\0\0\0\0' >> %t.profraw
RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\1\1\1\1\1\1\1\1' >> %t.profraw
RUN: printf '\2\2\2\2\2\2\2\2' >> %t.profraw
RUN: printf '\3\3\3\3\0\0\0\0' >> %t.profraw
// Data Section
//
// struct ProfData {
// #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
// Type Name;
// #include "llvm/ProfileData/InstrProfData.inc"
// };
RUN: printf '\254\275\030\333\114\302\370\134' >> %t.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\067\265\035\031\112\165\023\344' >> %t.profraw
RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\xd8\xff\3\0\1\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\067\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\101\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\7\0foo\1bar\0\0\0\0\0\0\0' >> %t.profraw
// RUN: llvm-profdata show --binary-ids %t.profraw | FileCheck %s
// CHECK: Instrumentation level: Front-end
// CHECK: Binary IDs:
// CHECK-NEXT: 0001020304050607000102030405060700010203
// CHECK-NEXT: 0101010101010101020202020202020203030303

View file

@ -0,0 +1,20 @@
RUN: printf '\201rforpl\377' > %t.profraw
RUN: printf '\7\0\0\0\0\0\0\0' >> %t.profraw
// We should fail on this because the data buffer (profraw file) is not long
// enough to hold this binary IDs size. NOTE that this (combined with the 8-byte
// alignment requirement for binary IDs size) will ensure we can at least read one
// 8-byte size if the binary IDs are provided.
RUN: printf '\8\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0' >> %t.profraw
// RUN: not llvm-profdata show --binary-ids %t.profraw 2>&1 | FileCheck %s
// CHECK: invalid instrumentation profile data (file header is corrupt)

View file

@ -0,0 +1,20 @@
RUN: printf '\201rforpl\377' > %t.profraw
RUN: printf '\7\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\40\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
// Check for a corrupted size being too large past the end of the file.
RUN: printf '\7\7\7\7\7\7\7\7' >> %t.profraw
RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
RUN: printf '\0\1\2\3\0\0\0\0' >> %t.profraw
// RUN: not llvm-profdata show --binary-ids %t.profraw 2>&1 | FileCheck %s
// CHECK: malformed instrumentation profile data

View file

@ -0,0 +1,25 @@
RUN: printf '\201rforpl\377' > %t.profraw
RUN: printf '\7\0\0\0\0\0\0\0' >> %t.profraw
// We should fail on this because the binary IDs is not a multiple of 8 bytes.
RUN: printf '\77\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
// Binary IDs - There are only two in this case that are 20 bytes.
RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
RUN: printf '\0\1\2\3\0\0\0\0' >> %t.profraw
RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\1\1\1\1\1\1\1\1' >> %t.profraw
RUN: printf '\2\2\2\2\2\2\2\2' >> %t.profraw
RUN: printf '\3\3\3\3\0\0\0\0' >> %t.profraw
// RUN: not llvm-profdata show --binary-ids %t.profraw 2>&1 | FileCheck %s
// CHECK: invalid instrumentation profile data (file header is corrupt)