2018-02-16 20:42:17 +01:00
|
|
|
#ifndef FORTRAN_PARSER_TOKEN_SEQUENCE_H_
|
|
|
|
#define FORTRAN_PARSER_TOKEN_SEQUENCE_H_
|
2018-02-13 21:50:47 +01:00
|
|
|
|
|
|
|
// A buffer class capable of holding a contiguous sequence of characters
|
|
|
|
// that have been partitioned into preprocessing tokens.
|
|
|
|
|
|
|
|
#include "provenance.h"
|
|
|
|
#include <cstring>
|
|
|
|
#include <string>
|
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace Fortran {
|
|
|
|
namespace parser {
|
|
|
|
|
|
|
|
// Just a const char pointer with an associated length; does not presume
|
|
|
|
// to own the referenced data. Used to describe buffered tokens and hash
|
|
|
|
// table keys.
|
2018-02-27 23:02:10 +01:00
|
|
|
class ContiguousChars {
|
2018-02-13 21:50:47 +01:00
|
|
|
public:
|
2018-02-27 23:02:10 +01:00
|
|
|
ContiguousChars() {}
|
|
|
|
ContiguousChars(const char *x, size_t n) : interval_{x, n} {}
|
|
|
|
ContiguousChars(const std::string &s) : interval_{s.data(), s.size()} {}
|
|
|
|
ContiguousChars(const ContiguousChars &that) = default;
|
|
|
|
ContiguousChars &operator=(const ContiguousChars &that) = default;
|
2018-02-13 21:50:47 +01:00
|
|
|
|
2018-02-27 23:02:10 +01:00
|
|
|
bool empty() const { return interval_.empty(); }
|
|
|
|
size_t size() const { return interval_.size(); }
|
|
|
|
const char &operator[](size_t j) const { return interval_.start()[j]; }
|
2018-02-13 21:50:47 +01:00
|
|
|
|
|
|
|
bool IsBlank() const;
|
2018-02-27 23:02:10 +01:00
|
|
|
std::string ToString() const {
|
|
|
|
return std::string{interval_.start(), interval_.size()};
|
|
|
|
}
|
2018-02-13 21:50:47 +01:00
|
|
|
|
|
|
|
private:
|
2018-02-27 23:02:10 +01:00
|
|
|
Interval<const char *> interval_{nullptr, 0};
|
2018-02-13 21:50:47 +01:00
|
|
|
};
|
|
|
|
} // namespace parser
|
|
|
|
} // namespace Fortran
|
|
|
|
|
2018-02-27 23:02:10 +01:00
|
|
|
// Specializations to enable std::unordered_map<ContiguousChars, ...>
|
|
|
|
template<> struct std::hash<Fortran::parser::ContiguousChars> {
|
|
|
|
size_t operator()(const Fortran::parser::ContiguousChars &x) const {
|
2018-02-13 21:50:47 +01:00
|
|
|
size_t hash{0}, bytes{x.size()};
|
|
|
|
for (size_t j{0}; j < bytes; ++j) {
|
|
|
|
hash = (hash * 31) ^ x[j];
|
|
|
|
}
|
|
|
|
return hash;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-02-27 23:02:10 +01:00
|
|
|
template<> struct std::equal_to<Fortran::parser::ContiguousChars> {
|
|
|
|
bool operator()(const Fortran::parser::ContiguousChars &x,
|
|
|
|
const Fortran::parser::ContiguousChars &y) const {
|
2018-02-13 21:50:47 +01:00
|
|
|
return x.size() == y.size() &&
|
|
|
|
std::memcmp(static_cast<const void *>(&x[0]),
|
|
|
|
static_cast<const void *>(&y[0]), x.size()) == 0;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
namespace Fortran {
|
|
|
|
namespace parser {
|
|
|
|
|
|
|
|
// Buffers a contiguous sequence of characters that has been partitioned into
|
|
|
|
// a sequence of preprocessing tokens with provenances.
|
|
|
|
class TokenSequence {
|
|
|
|
public:
|
|
|
|
TokenSequence() {}
|
|
|
|
TokenSequence(const TokenSequence &that) { Put(that); }
|
|
|
|
TokenSequence(const TokenSequence &that, size_t at, size_t count = 1) {
|
|
|
|
Put(that, at, count);
|
|
|
|
}
|
|
|
|
TokenSequence(TokenSequence &&that)
|
|
|
|
: start_{std::move(that.start_)}, nextStart_{that.nextStart_},
|
|
|
|
char_{std::move(that.char_)}, provenances_{std::move(that.provenances_)} {
|
|
|
|
}
|
|
|
|
TokenSequence(const std::string &s, Provenance p) { Put(s, p); }
|
|
|
|
|
|
|
|
TokenSequence &operator=(const TokenSequence &that) {
|
|
|
|
clear();
|
|
|
|
Put(that);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
TokenSequence &operator=(TokenSequence &&that) {
|
|
|
|
start_ = std::move(that.start_);
|
|
|
|
nextStart_ = that.nextStart_;
|
|
|
|
char_ = std::move(that.char_);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2018-02-27 23:02:10 +01:00
|
|
|
ContiguousChars operator[](size_t token) const {
|
2018-02-13 21:50:47 +01:00
|
|
|
return {&char_[start_[token]], TokenBytes(token)};
|
|
|
|
}
|
|
|
|
|
|
|
|
bool empty() const { return start_.empty(); }
|
|
|
|
size_t size() const { return start_.size(); }
|
|
|
|
const char *data() const { return &char_[0]; }
|
|
|
|
void clear();
|
|
|
|
void pop_back();
|
|
|
|
void shrink_to_fit();
|
|
|
|
|
|
|
|
void PutNextTokenChar(char ch, Provenance provenance) {
|
|
|
|
char_.emplace_back(ch);
|
|
|
|
provenances_.Put({provenance, 1});
|
|
|
|
}
|
|
|
|
|
|
|
|
void CloseToken() {
|
|
|
|
start_.emplace_back(nextStart_);
|
|
|
|
nextStart_ = char_.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReopenLastToken() {
|
|
|
|
nextStart_ = start_.back();
|
|
|
|
start_.pop_back();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Put(const TokenSequence &);
|
2018-02-15 22:13:28 +01:00
|
|
|
void Put(const TokenSequence &, ProvenanceRange);
|
2018-02-13 21:50:47 +01:00
|
|
|
void Put(const TokenSequence &, size_t at, size_t tokens = 1);
|
|
|
|
void Put(const char *, size_t, Provenance);
|
2018-02-27 23:02:10 +01:00
|
|
|
void Put(const ContiguousChars &, Provenance);
|
2018-02-13 21:50:47 +01:00
|
|
|
void Put(const std::string &, Provenance);
|
|
|
|
void Put(const std::stringstream &, Provenance);
|
2018-03-01 01:56:10 +01:00
|
|
|
void Emit(CookedSource *) const;
|
2018-02-13 21:50:47 +01:00
|
|
|
std::string ToString() const;
|
2018-02-15 22:13:28 +01:00
|
|
|
Provenance GetTokenProvenance(size_t token, size_t offset = 0) const;
|
|
|
|
ProvenanceRange GetTokenProvenanceRange(
|
|
|
|
size_t token, size_t offset = 0) const;
|
|
|
|
ProvenanceRange GetIntervalProvenanceRange(
|
|
|
|
size_t token, size_t tokens = 1) const;
|
|
|
|
ProvenanceRange GetProvenanceRange() const;
|
2018-02-13 21:50:47 +01:00
|
|
|
|
|
|
|
private:
|
|
|
|
size_t TokenBytes(size_t token) const {
|
|
|
|
return (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) -
|
|
|
|
start_[token];
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<size_t> start_;
|
|
|
|
size_t nextStart_{0};
|
|
|
|
std::vector<char> char_;
|
|
|
|
OffsetToProvenanceMappings provenances_;
|
|
|
|
};
|
|
|
|
} // namespace parser
|
|
|
|
} // namespace Fortran
|
2018-02-16 20:42:17 +01:00
|
|
|
#endif // FORTRAN_PARSER_TOKEN_SEQUENCE_H_
|