llvm/lld/ELF/ScriptLexer.h
Colin Cross e387778722 [ELF] Optimize ScriptLexer::getLineNumber by caching the previous line number and offset
getLineNumber() was counting the number of line feeds from the start of
the buffer to the current token. For large linker scripts this became a
performance bottleneck. For one 4MB linker script over 4 minutes was
spent in getLineNumber's StringRef::count.

Store the line number from the last token, and only count the additional
line feeds since the last token.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D104137
2021-06-22 15:35:24 -07:00

60 lines
1.4 KiB
C++

//===- ScriptLexer.h --------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_SCRIPT_LEXER_H
#define LLD_ELF_SCRIPT_LEXER_H
#include "lld/Common/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MemoryBuffer.h"
#include <utility>
#include <vector>
namespace lld {
namespace elf {
class ScriptLexer {
public:
explicit ScriptLexer(MemoryBufferRef mb);
void setError(const Twine &msg);
void tokenize(MemoryBufferRef mb);
StringRef skipSpace(StringRef s);
bool atEOF();
StringRef next();
StringRef peek();
StringRef peek2();
void skip();
bool consume(StringRef tok);
void expect(StringRef expect);
bool consumeLabel(StringRef tok);
std::string getCurrentLocation();
std::vector<MemoryBufferRef> mbs;
std::vector<StringRef> tokens;
bool inExpr = false;
size_t pos = 0;
size_t lastLineNumber = 0;
size_t lastLineNumberOffset = 0;
protected:
MemoryBufferRef getCurrentMB();
private:
void maybeSplitExpr();
StringRef getLine();
size_t getLineNumber();
size_t getColumnNumber();
};
} // namespace elf
} // namespace lld
#endif