dbb202c5be
Original-commit: flang-compiler/f18@10a592a591 Reviewed-on: https://github.com/flang-compiler/f18/pull/535 Tree-same-pre-rewrite: false
85 lines
3 KiB
C++
85 lines
3 KiB
C++
// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#ifndef FORTRAN_PARSER_CHAR_SET_H_
|
|
#define FORTRAN_PARSER_CHAR_SET_H_
|
|
|
|
// Sets of distinct characters that are valid in Fortran programs outside
|
|
// character literals are encoded as 64-bit integers by mapping them to a 6-bit
|
|
// character set encoding in which the case of letters is lost (even if
|
|
// mixed case input reached the parser, which it does not). These sets
|
|
// need to be suitable for constexprs, so std::bitset<> was not eligible.
|
|
|
|
#include <cinttypes>
|
|
#include <string>
|
|
|
|
namespace Fortran::parser {
|
|
|
|
struct SetOfChars {
|
|
constexpr SetOfChars() {}
|
|
|
|
constexpr SetOfChars(char c) {
|
|
// This is basically the old DECSIX encoding, which maps the
|
|
// 7-bit ASCII codes [32..95] to [0..63]. Only '#', '&', '?', '\', and '^'
|
|
// in that range are unused in Fortran after preprocessing outside
|
|
// character literals. We repurpose '^' and '?' for newline and unknown
|
|
// characters (resp.), leaving the others alone in case this code might
|
|
// be useful in preprocssing.
|
|
if (c == '\n') {
|
|
// map newline to '^'
|
|
c = '^';
|
|
} else if (c < 32 || c >= 127) {
|
|
// map other control characters, DEL, and 8-bit characters to '?'
|
|
c = '?';
|
|
} else if (c >= 96) {
|
|
// map lower-case letters to upper-case
|
|
c -= 32;
|
|
}
|
|
// range is now [32..95]; reduce to [0..63] and use as a shift count
|
|
bits_ = static_cast<std::uint64_t>(1) << (c - 32);
|
|
}
|
|
|
|
constexpr SetOfChars(const char str[], std::size_t n) {
|
|
for (std::size_t j{0}; j < n; ++j) {
|
|
bits_ |= SetOfChars{str[j]}.bits_;
|
|
}
|
|
}
|
|
|
|
constexpr SetOfChars(const SetOfChars &) = default;
|
|
constexpr SetOfChars(SetOfChars &&) = default;
|
|
constexpr SetOfChars &operator=(const SetOfChars &) = default;
|
|
constexpr SetOfChars &operator=(SetOfChars &&) = default;
|
|
constexpr bool empty() const { return bits_ == 0; }
|
|
|
|
constexpr bool Has(SetOfChars that) const {
|
|
return (that.bits_ & ~bits_) == 0;
|
|
}
|
|
constexpr SetOfChars Union(SetOfChars that) const {
|
|
return SetOfChars{bits_ | that.bits_};
|
|
}
|
|
constexpr SetOfChars Intersection(SetOfChars that) const {
|
|
return SetOfChars{bits_ & that.bits_};
|
|
}
|
|
constexpr SetOfChars Difference(SetOfChars that) const {
|
|
return SetOfChars{bits_ & ~that.bits_};
|
|
}
|
|
|
|
std::string ToString() const;
|
|
|
|
private:
|
|
constexpr SetOfChars(std::uint64_t b) : bits_{b} {}
|
|
std::uint64_t bits_{0};
|
|
};
|
|
}
|
|
#endif // FORTRAN_PARSER_CHAR_SET_H_
|