[pseudo] A basic implementation of compiling cxx grammar at build time.
The main idea is to compile the cxx grammar at build time, and construct the core pieces (Grammar, LRTable) of the pseudoparse based on the compiled data sources. This is a tiny implementation, which is good for start: - defines how the public API should look like; - integrates the cxx grammar compilation workflow with the cmake system. - onlynonterminal symbols of the C++ grammar are compiled, anything else are still doing the real compilation work at runtime, we can opt-in more bits in the future; - splits the monolithic clangPsuedo library for better layering; Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D125667
This commit is contained in:
parent
8919447c71
commit
cd2292ef82
|
@ -1,5 +1,7 @@
|
|||
include_directories(include)
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR}/include)
|
||||
add_subdirectory(include)
|
||||
add_subdirectory(gen)
|
||||
add_subdirectory(lib)
|
||||
add_subdirectory(tool)
|
||||
add_subdirectory(fuzzer)
|
||||
|
|
10
clang-tools-extra/pseudo/gen/CMakeLists.txt
Normal file
10
clang-tools-extra/pseudo/gen/CMakeLists.txt
Normal file
|
@ -0,0 +1,10 @@
|
|||
set(LLVM_LINK_COMPONENTS Support)
|
||||
|
||||
add_clang_executable(pseudo-gen
|
||||
Main.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(pseudo-gen
|
||||
PRIVATE
|
||||
clangPseudoGrammar
|
||||
)
|
89
clang-tools-extra/pseudo/gen/Main.cpp
Normal file
89
clang-tools-extra/pseudo/gen/Main.cpp
Normal file
|
@ -0,0 +1,89 @@
|
|||
//===--- Main.cpp - Compile BNF grammar -----------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This is a tool to compile a BNF grammar, it is used by the build system to
|
||||
// generate a necessary data bits to statically construct core pieces (Grammar,
|
||||
// LRTable etc) of the LR parser.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clang-pseudo/Grammar.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include <algorithm>
|
||||
|
||||
using llvm::cl::desc;
|
||||
using llvm::cl::init;
|
||||
using llvm::cl::opt;
|
||||
using llvm::cl::values;
|
||||
|
||||
namespace {
|
||||
enum EmitType {
|
||||
EmitSymbolList,
|
||||
EmitGrammarContent,
|
||||
};
|
||||
|
||||
opt<std::string> Grammar("grammar", desc("Parse a BNF grammar file."),
|
||||
init(""));
|
||||
opt<EmitType>
|
||||
Emit(desc("which information to emit:"),
|
||||
values(clEnumValN(EmitSymbolList, "emit-symbol-list",
|
||||
"Print nonterminal symbols (default)"),
|
||||
clEnumValN(EmitGrammarContent, "emit-grammar-content",
|
||||
"Print the BNF grammar content as a string")));
|
||||
std::string readOrDie(llvm::StringRef Path) {
|
||||
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
|
||||
llvm::MemoryBuffer::getFile(Path);
|
||||
if (std::error_code EC = Text.getError()) {
|
||||
llvm::errs() << "Error: can't read grammar file '" << Path
|
||||
<< "': " << EC.message() << "\n";
|
||||
::exit(1);
|
||||
}
|
||||
return Text.get()->getBuffer().str();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
llvm::cl::ParseCommandLineOptions(argc, argv, "");
|
||||
if (!Grammar.getNumOccurrences()) {
|
||||
llvm::errs() << "Grammar file must be provided!\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string GrammarText = readOrDie(Grammar);
|
||||
std::vector<std::string> Diags;
|
||||
auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags);
|
||||
|
||||
if (!Diags.empty()) {
|
||||
llvm::errs() << llvm::join(Diags, "\n");
|
||||
return 1;
|
||||
}
|
||||
switch (Emit) {
|
||||
|
||||
case EmitSymbolList:
|
||||
for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size();
|
||||
++ID) {
|
||||
std::string Name = G->symbolName(ID).str();
|
||||
// translation-unit -> translation_unit
|
||||
std::replace(Name.begin(), Name.end(), '-', '_');
|
||||
llvm::outs() << (llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID));
|
||||
}
|
||||
break;
|
||||
case EmitGrammarContent:
|
||||
for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {
|
||||
llvm::outs() << '"';
|
||||
llvm::outs().write_escaped((Line + "\n").str());
|
||||
llvm::outs() << "\"\n";
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
29
clang-tools-extra/pseudo/include/CMakeLists.txt
Normal file
29
clang-tools-extra/pseudo/include/CMakeLists.txt
Normal file
|
@ -0,0 +1,29 @@
|
|||
# The cxx.bnf grammar file
|
||||
set(cxx_bnf ${CMAKE_CURRENT_SOURCE_DIR}/../lib/cxx.bnf)
|
||||
|
||||
# Generate inc files.
|
||||
set(cxx_symbols_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXSymbols.inc)
|
||||
add_custom_command(OUTPUT ${cxx_symbols_inc}
|
||||
COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen"
|
||||
--grammar ${cxx_bnf}
|
||||
--emit-symbol-list
|
||||
> ${cxx_symbols_inc}
|
||||
COMMENT "Generating nonterminal symbol file for cxx grammar..."
|
||||
DEPENDS pseudo-gen
|
||||
VERBATIM)
|
||||
|
||||
set(cxx_bnf_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXBNF.inc)
|
||||
add_custom_command(OUTPUT ${cxx_bnf_inc}
|
||||
COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen"
|
||||
--grammar ${cxx_bnf}
|
||||
--emit-grammar-content
|
||||
> ${cxx_bnf_inc}
|
||||
COMMENT "Generating bnf string file for cxx grammar..."
|
||||
DEPENDS pseudo-gen
|
||||
VERBATIM)
|
||||
|
||||
# add_custom_command does not create a new target, we need to deine a target
|
||||
# explicitly, so that other targets can depend on it.
|
||||
add_custom_target(cxx_gen
|
||||
DEPENDS ${cxx_symbols_inc} ${cxx_bnf_inc}
|
||||
VERBATIM)
|
51
clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
Normal file
51
clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
//===--- CXX.h - Public interfaces for the C++ grammar -----------*- C++-*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines public interfaces for the C++ grammar
|
||||
// (pseudo/lib/cxx.bnf). It provides a fast way to access core building pieces
|
||||
// of the LR parser, e.g. Grammar, LRTable, rather than parsing the grammar
|
||||
// file at the runtime.
|
||||
//
|
||||
// We do a compilation of the C++ BNF grammar at build time, and generate
|
||||
// critical data sources. The implementation of the interfaces are based on the
|
||||
// generated data sources.
|
||||
//
|
||||
// FIXME: not everything is fully compiled yet. The implementation of the
|
||||
// interfaces are still parsing the grammar file at the runtime.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef CLANG_PSEUDO_CXX_CXX_H
|
||||
#define CLANG_PSEUDO_CXX_CXX_H
|
||||
|
||||
#include "clang-pseudo/Grammar.h"
|
||||
|
||||
namespace clang {
|
||||
namespace pseudo {
|
||||
class LRTable;
|
||||
|
||||
namespace cxx {
|
||||
// Symbol represents nonterminal symbols in the C++ grammar.
|
||||
// It provides a simple uniform way to access a particular nonterminal.
|
||||
enum class Symbol : SymbolID {
|
||||
#define NONTERMINAL(X, Y) X = Y,
|
||||
#include "CXXSymbols.inc"
|
||||
#undef NONTERMINAL
|
||||
};
|
||||
|
||||
// Returns the C++ grammar.
|
||||
const Grammar &getGrammar();
|
||||
// Returns the corresponding LRTable for the C++ grammar.
|
||||
const LRTable &getLRTable();
|
||||
|
||||
} // namespace cxx
|
||||
|
||||
} // namespace pseudo
|
||||
} // namespace clang
|
||||
|
||||
#endif // CLANG_PSEUDO_CXX_CXX_H
|
|
@ -1,3 +1,6 @@
|
|||
add_subdirectory(cxx)
|
||||
add_subdirectory(grammar)
|
||||
|
||||
set(LLVM_LINK_COMPONENTS Support)
|
||||
|
||||
add_clang_library(clangPseudo
|
||||
|
@ -5,15 +8,11 @@ add_clang_library(clangPseudo
|
|||
DirectiveTree.cpp
|
||||
Forest.cpp
|
||||
GLR.cpp
|
||||
Grammar.cpp
|
||||
GrammarBNF.cpp
|
||||
Lex.cpp
|
||||
LRGraph.cpp
|
||||
LRTable.cpp
|
||||
LRTableBuild.cpp
|
||||
Token.cpp
|
||||
|
||||
LINK_LIBS
|
||||
clangBasic
|
||||
clangLex
|
||||
clangPseudoGrammar
|
||||
)
|
||||
|
|
9
clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
Normal file
9
clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
Normal file
|
@ -0,0 +1,9 @@
|
|||
add_clang_library(clangPseudoCXX
|
||||
CXX.cpp
|
||||
|
||||
DEPENDS
|
||||
cxx_gen
|
||||
|
||||
LINK_LIBS
|
||||
clangPseudoGrammar
|
||||
)
|
34
clang-tools-extra/pseudo/lib/cxx/CXX.cpp
Normal file
34
clang-tools-extra/pseudo/lib/cxx/CXX.cpp
Normal file
|
@ -0,0 +1,34 @@
|
|||
//===--- CXX.cpp - Define public interfaces for C++ grammar ---------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clang-pseudo/cxx/CXX.h"
|
||||
#include "clang-pseudo/LRTable.h"
|
||||
|
||||
namespace clang {
|
||||
namespace pseudo {
|
||||
namespace cxx {
|
||||
|
||||
static const char *CXXBNF =
|
||||
#include "CXXBNF.inc"
|
||||
;
|
||||
|
||||
const Grammar &getGrammar() {
|
||||
static std::vector<std::string> Diags;
|
||||
static Grammar *G = Grammar::parseBNF(CXXBNF, Diags).release();
|
||||
assert(Diags.empty());
|
||||
return *G;
|
||||
}
|
||||
|
||||
const LRTable &getLRTable() {
|
||||
static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar()));
|
||||
return *Table;
|
||||
}
|
||||
|
||||
} // namespace cxx
|
||||
} // namespace pseudo
|
||||
} // namespace clang
|
18
clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt
Normal file
18
clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt
Normal file
|
@ -0,0 +1,18 @@
|
|||
set(LLVM_LINK_COMPONENTS Support)
|
||||
|
||||
# This library intents to keep as minimal dependencies as possible, it is a base
|
||||
# library of the cxx generator, to avoid creating long dep paths in the build
|
||||
# graph.
|
||||
add_clang_library(clangPseudoGrammar
|
||||
Grammar.cpp
|
||||
GrammarBNF.cpp
|
||||
LRGraph.cpp
|
||||
LRTable.cpp
|
||||
LRTableBuild.cpp
|
||||
|
||||
# FIXME: can we get rid of the clangBasic dependency? We need it for the
|
||||
# clang::tok::getTokenName and clang::tok::getPunctuatorSpelling functions, we
|
||||
# could consider remimplement these functions.
|
||||
LINK_LIBS
|
||||
clangBasic
|
||||
)
|
Loading…
Reference in a new issue