From e471ba3d0122d4c6601029d81c385cb6ebb9d7a4 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 25 Mar 2022 11:02:29 -0400 Subject: [PATCH] [Object] Add binary format for bundling offloading metadata We need to embed certain metadata along with a binary image when we wish to perform a device-linking job on it. Currently this metadata was embedded in the section name of the data itself. This worked, but made adding new metadata very difficult and didn't work if the user did any sort of section linking. This patch introduces a custom binary format for bundling offloading metadata with a device object file. This binary format is fundamentally a simple string map table with some additional data and an embedded image. I decided to use a custom format rather than using an existing format (ELF, JSON, etc) because of the specialty use-case of this. We need a simple binary format that can be concatenated without requiring other external dependencies. This extension will make it easier to extend the linker wrapper's capabilties with whatever data is necessary. Eventually this will allow us to remove all the external arguments passed to the linker wrapper and embed it directly in the host's linker so device linking behaves exactly like host linking. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D122069 --- llvm/include/llvm/Object/OffloadBinary.h | 148 +++++++++++++++++++++++ llvm/lib/Object/CMakeLists.txt | 1 + llvm/lib/Object/OffloadBinary.cpp | 144 ++++++++++++++++++++++ llvm/unittests/Object/CMakeLists.txt | 1 + llvm/unittests/Object/OffloadingTest.cpp | 65 ++++++++++ 5 files changed, 359 insertions(+) create mode 100644 llvm/include/llvm/Object/OffloadBinary.h create mode 100644 llvm/lib/Object/OffloadBinary.cpp create mode 100644 llvm/unittests/Object/OffloadingTest.cpp diff --git a/llvm/include/llvm/Object/OffloadBinary.h b/llvm/include/llvm/Object/OffloadBinary.h new file mode 100644 index 000000000000..0555c1dc8fb8 --- /dev/null +++ b/llvm/include/llvm/Object/OffloadBinary.h @@ -0,0 +1,148 @@ +//===--- Offloading.h - Utilities for handling offloading code -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the binary format used for budingling device metadata with +// an associated device image. The data can then be stored inside a host object +// file to create a fat binary and read by the linker. This is intended to be a +// thin wrapper around the image itself. If this format becomes sufficiently +// complex it should be moved to a standard binary format like msgpack or ELF. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_OFFLOADING_H +#define LLVM_BINARYFORMAT_OFFLOADING_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include + +namespace llvm { + +/// The producer of the associated offloading image. +enum OffloadKind : uint16_t { + OFK_None = 0, + OFK_OpenMP, + OFK_Cuda, + OFK_HIP, +}; + +/// The type of contents the offloading image contains. +enum ImageKind : uint16_t { + IMG_None = 0, + IMG_Object, + IMG_Bitcode, + IMG_Cubin, + IMG_Fatbinary, + IMG_PTX, +}; + +/// A simple binary serialization of an offloading file. We use this format to +/// embed the offloading image into the host executable so it can be extracted +/// and used by the linker. +/// +/// Many of these could be stored in the same section by the time the linker +/// sees it so we mark this information with a header. The version is used to +/// detect ABI stability and the size is used to find other offloading entries +/// that may exist in the same section. All offsets are given as absolute byte +/// offsets from the beginning of the file. +class OffloadBinary { +public: + /// The offloading metadata that will be serialized to a memory buffer. + struct OffloadingImage { + ImageKind TheImageKind; + OffloadKind TheOffloadKind; + uint32_t Flags; + StringMap StringData; + MemoryBufferRef Image; + }; + + /// Attempt to parse the offloading binary stored in \p Data. + static Expected> create(MemoryBufferRef); + + /// Serialize the contents of \p File to a binary buffer to be read later. + static std::unique_ptr write(const OffloadingImage &); + + static uint64_t getAlignment() { return alignof(Header); } + + ImageKind getImageKind() const { return TheEntry->TheImageKind; } + OffloadKind getOffloadKind() const { return TheEntry->TheOffloadKind; } + uint32_t getFlags() const { return TheEntry->Flags; } + uint64_t getSize() const { return TheHeader->Size; } + + StringRef getTriple() const { return getString("triple"); } + StringRef getArch() const { return getString("arch"); } + StringRef getImage() const { + return StringRef(&Buffer[TheEntry->ImageOffset], TheEntry->ImageSize); + } + + StringRef getString(StringRef Key) const { return StringData.lookup(Key); } + +private: + struct Header { + uint8_t Magic[4] = {0x10, 0xFF, 0x10, 0xAD}; // 0x10FF10AD magic bytes. + uint32_t Version = 1; // Version identifier. + uint64_t Size; // Size in bytes of this entire binary. + uint64_t EntryOffset; // Offset of the metadata entry in bytes. + uint64_t EntrySize; // Size of the metadata entry in bytes. + }; + + struct Entry { + ImageKind TheImageKind; // The kind of the image stored. + OffloadKind TheOffloadKind; // The producer of this image. + uint32_t Flags; // Additional flags associated with the image. + uint64_t StringOffset; // Offset in bytes to the string map. + uint64_t NumStrings; // Number of entries in the string map. + uint64_t ImageOffset; // Offset in bytes of the actual binary image. + uint64_t ImageSize; // Size in bytes of the binary image. + }; + + struct StringEntry { + uint64_t KeyOffset; + uint64_t ValueOffset; + }; + + OffloadBinary(const char *Buffer, const Header *TheHeader, + const Entry *TheEntry) + : Buffer(Buffer), TheHeader(TheHeader), TheEntry(TheEntry) { + + const StringEntry *StringMapBegin = + reinterpret_cast(&Buffer[TheEntry->StringOffset]); + for (uint64_t I = 0, E = TheEntry->NumStrings; I != E; ++I) { + StringRef Key = &Buffer[StringMapBegin[I].KeyOffset]; + StringData[Key] = &Buffer[StringMapBegin[I].ValueOffset]; + } + } + + OffloadBinary(const OffloadBinary &Other) = delete; + + /// Map from keys to offsets in the binary. + StringMap StringData; + /// Pointer to the beginning of the memory buffer for convenience. + const char *Buffer; + /// Location of the header within the binary. + const Header *TheHeader; + /// Location of the metadata entries within the binary. + const Entry *TheEntry; +}; + +/// Convert a string \p Name to an image kind. +ImageKind getImageKind(StringRef Name); + +/// Convert an image kind to its string representation. +StringRef getImageKindName(ImageKind Name); + +/// Convert a string \p Name to an offload kind. +OffloadKind getOffloadKind(StringRef Name); + +/// Convert an offload kind to its string representation. +StringRef getOffloadKindName(OffloadKind Name); + +} // namespace llvm +#endif diff --git a/llvm/lib/Object/CMakeLists.txt b/llvm/lib/Object/CMakeLists.txt index d5f123576fe2..082521049439 100644 --- a/llvm/lib/Object/CMakeLists.txt +++ b/llvm/lib/Object/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_component_library(LLVMObject ModuleSymbolTable.cpp Object.cpp ObjectFile.cpp + OffloadBinary.cpp RecordStreamer.cpp RelocationResolver.cpp SymbolicFile.cpp diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp new file mode 100644 index 000000000000..6a64147ecebf --- /dev/null +++ b/llvm/lib/Object/OffloadBinary.cpp @@ -0,0 +1,144 @@ +//===- Offloading.cpp - Utilities for handling offloading code -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/OffloadBinary.h" + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/FileOutputBuffer.h" + +using namespace llvm; + +namespace llvm { + +Expected> +OffloadBinary::create(MemoryBufferRef Buf) { + if (Buf.getBufferSize() < sizeof(Header) + sizeof(Entry)) + return errorCodeToError(llvm::object::object_error::parse_failed); + + // Check for 0x10FF1OAD magic bytes. + if (!Buf.getBuffer().startswith("\x10\xFF\x10\xAD")) + return errorCodeToError(llvm::object::object_error::parse_failed); + + const char *Start = Buf.getBufferStart(); + const Header *TheHeader = reinterpret_cast(Start); + const Entry *TheEntry = + reinterpret_cast(&Start[TheHeader->EntryOffset]); + + return std::unique_ptr( + new OffloadBinary(Buf.getBufferStart(), TheHeader, TheEntry)); +} + +std::unique_ptr +OffloadBinary::write(const OffloadingImage &OffloadingData) { + // Create a null-terminated string table with all the used strings. + StringTableBuilder StrTab(StringTableBuilder::ELF); + for (auto &KeyAndValue : OffloadingData.StringData) { + StrTab.add(KeyAndValue.getKey()); + StrTab.add(KeyAndValue.getValue()); + } + StrTab.finalize(); + + uint64_t StringEntrySize = + sizeof(StringEntry) * OffloadingData.StringData.size(); + + // Create the header and fill in the offsets. The entry will be directly + // placed after the header in memory. Align the size to the alignment of the + // header so this can be placed contiguously in a single section. + Header TheHeader; + TheHeader.Size = + alignTo(sizeof(Header) + sizeof(Entry) + StringEntrySize + + OffloadingData.Image.getBufferSize() + StrTab.getSize(), + getAlignment()); + TheHeader.EntryOffset = sizeof(Header); + TheHeader.EntrySize = sizeof(Entry); + + // Create the entry using the string table offsets. The string table will be + // placed directly after the entry in memory, and the image after that. + Entry TheEntry; + TheEntry.TheImageKind = OffloadingData.TheImageKind; + TheEntry.TheOffloadKind = OffloadingData.TheOffloadKind; + TheEntry.Flags = OffloadingData.Flags; + TheEntry.StringOffset = sizeof(Header) + sizeof(Entry); + TheEntry.NumStrings = OffloadingData.StringData.size(); + + TheEntry.ImageOffset = + sizeof(Header) + sizeof(Entry) + StringEntrySize + StrTab.getSize(); + TheEntry.ImageSize = OffloadingData.Image.getBufferSize(); + + SmallVector Data; + raw_svector_ostream OS(Data); + OS << StringRef(reinterpret_cast(&TheHeader), sizeof(Header)); + OS << StringRef(reinterpret_cast(&TheEntry), sizeof(Entry)); + for (auto &KeyAndValue : OffloadingData.StringData) { + uint64_t Offset = sizeof(Header) + sizeof(Entry) + StringEntrySize; + StringEntry Map{Offset + StrTab.getOffset(KeyAndValue.getKey()), + Offset + StrTab.getOffset(KeyAndValue.getValue())}; + OS << StringRef(reinterpret_cast(&Map), sizeof(StringEntry)); + } + StrTab.write(OS); + OS << OffloadingData.Image.getBuffer(); + + // Add final padding to required alignment. + assert(TheHeader.Size >= OS.tell() && "Too much data written?"); + OS.write_zeros(TheHeader.Size - OS.tell()); + assert(TheHeader.Size == OS.tell() && "Size mismatch"); + + return MemoryBuffer::getMemBufferCopy(OS.str()); +} + +OffloadKind getOffloadKind(StringRef Name) { + return llvm::StringSwitch(Name) + .Case("openmp", OFK_OpenMP) + .Case("cuda", OFK_Cuda) + .Case("hip", OFK_HIP) + .Default(OFK_None); +} + +StringRef getOffloadKindName(OffloadKind Kind) { + switch (Kind) { + case OFK_OpenMP: + return "openmp"; + case OFK_Cuda: + return "cuda"; + case OFK_HIP: + return "hip"; + default: + return "none"; + } +} + +ImageKind getImageKind(StringRef Name) { + return llvm::StringSwitch(Name) + .Case("o", IMG_Object) + .Case("bc", IMG_Bitcode) + .Case("cubin", IMG_Cubin) + .Case("fatbin", IMG_Fatbinary) + .Case("s", IMG_PTX) + .Default(IMG_None); +} + +StringRef getImageKindName(ImageKind Kind) { + switch (Kind) { + case IMG_Object: + return "o"; + case IMG_Bitcode: + return "bc"; + case IMG_Cubin: + return "cubin"; + case IMG_Fatbinary: + return "fatbin"; + case IMG_PTX: + return "s"; + default: + return ""; + } +} + +} // namespace llvm diff --git a/llvm/unittests/Object/CMakeLists.txt b/llvm/unittests/Object/CMakeLists.txt index 559d02ea0ae5..ea7df2246b4d 100644 --- a/llvm/unittests/Object/CMakeLists.txt +++ b/llvm/unittests/Object/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_unittest(ObjectTests ELFTest.cpp MinidumpTest.cpp ObjectFileTest.cpp + OffloadingTest.cpp SymbolSizeTest.cpp SymbolicFileTest.cpp XCOFFObjectFileTest.cpp diff --git a/llvm/unittests/Object/OffloadingTest.cpp b/llvm/unittests/Object/OffloadingTest.cpp new file mode 100644 index 000000000000..c404068c9424 --- /dev/null +++ b/llvm/unittests/Object/OffloadingTest.cpp @@ -0,0 +1,65 @@ +#include "llvm/Object/OffloadBinary.h" + +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" +#include + +TEST(OffloadingTest, checkOffloadingBinary) { + // Create random data to fill the image. + std::mt19937 Rng(std::random_device{}()); + std::uniform_int_distribution SizeDist(0, 256); + std::uniform_int_distribution KindDist(0); + std::uniform_int_distribution BinaryDist( + std::numeric_limits::min(), std::numeric_limits::max()); + std::uniform_int_distribution StringDist('!', '~'); + std::vector Image(SizeDist(Rng)); + std::generate(Image.begin(), Image.end(), [&]() { return BinaryDist(Rng); }); + std::vector> Strings(SizeDist(Rng)); + for (auto &KeyAndValue : Strings) { + std::string Key(SizeDist(Rng), '\0'); + std::string Value(SizeDist(Rng), '\0'); + + std::generate(Key.begin(), Key.end(), [&]() { return StringDist(Rng); }); + std::generate(Value.begin(), Value.end(), + [&]() { return StringDist(Rng); }); + + KeyAndValue = std::make_pair(Key, Value); + } + + // Create the image. + llvm::StringMap StringData; + for (auto &KeyAndValue : Strings) + StringData[KeyAndValue.first] = KeyAndValue.second; + std::unique_ptr ImageData = + llvm::MemoryBuffer::getMemBuffer( + {reinterpret_cast(Image.data()), Image.size()}, "", false); + + llvm::OffloadBinary::OffloadingImage Data; + Data.TheImageKind = static_cast(KindDist(Rng)); + Data.TheOffloadKind = static_cast(KindDist(Rng)); + Data.Flags = KindDist(Rng); + Data.StringData = StringData; + Data.Image = *ImageData; + + auto BinaryBuffer = llvm::OffloadBinary::write(Data); + + auto BinaryOrErr = llvm::OffloadBinary::create(*BinaryBuffer); + if (!BinaryOrErr) + FAIL(); + + // Make sure we get the same data out. + auto &Binary = **BinaryOrErr; + ASSERT_EQ(Data.TheImageKind, Binary.getImageKind()); + ASSERT_EQ(Data.TheOffloadKind, Binary.getOffloadKind()); + ASSERT_EQ(Data.Flags, Binary.getFlags()); + + for (auto &KeyAndValue : Strings) + ASSERT_TRUE(StringData[KeyAndValue.first] == + Binary.getString(KeyAndValue.first)); + + EXPECT_TRUE(Data.Image.getBuffer() == Binary.getImage()); + + // Ensure the size and alignment of the data is correct. + EXPECT_TRUE(Binary.getSize() % llvm::OffloadBinary::getAlignment() == 0); + EXPECT_TRUE(Binary.getSize() == BinaryBuffer->getBuffer().size()); +}