// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef FORTRAN_PARSER_CHAR_SET_H_ #define FORTRAN_PARSER_CHAR_SET_H_ // Sets of distinct characters that are valid in Fortran programs outside // character literals are encoded as 64-bit integers by mapping them to a 6-bit // character set encoding in which the case of letters is lost (even if // mixed case input reached the parser, which it does not). These sets // need to be suitable for constexprs, so std::bitset<> was not eligible. #include #include namespace Fortran::parser { struct SetOfChars { constexpr SetOfChars() {} constexpr SetOfChars(char c) { // This is basically the old DECSIX encoding, which maps the // 7-bit ASCII codes [32..95] to [0..63]. Only '#', '&', '?', '\', and '^' // in that range are unused in Fortran after preprocessing outside // character literals. We repurpose '^' and '?' for newline and unknown // characters (resp.), leaving the others alone in case this code might // be useful in preprocssing. // TODO: EBCDIC? if (c == '\n') { // map newline to '^' c = '^'; } else if (c < 32 || c >= 127) { // map other control characters, DEL, and 8-bit characters to '?' c = '?'; } else if (c >= 96) { // map lower-case letters to upper-case c -= 32; } // range is now [32..95]; reduce to [0..63] and use as a shift count bits_ = static_cast(1) << (c - 32); } constexpr SetOfChars(const char str[], std::size_t n) { for (std::size_t j{0}; j < n; ++j) { bits_ |= SetOfChars{str[j]}.bits_; } } constexpr SetOfChars(const SetOfChars &) = default; constexpr SetOfChars(SetOfChars &&) = default; constexpr SetOfChars &operator=(const SetOfChars &) = default; constexpr SetOfChars &operator=(SetOfChars &&) = default; constexpr bool empty() const { return bits_ == 0; } constexpr bool Has(SetOfChars that) const { return (that.bits_ & ~bits_) == 0; } constexpr SetOfChars Union(SetOfChars that) const { return SetOfChars{bits_ | that.bits_}; } constexpr SetOfChars Intersection(SetOfChars that) const { return SetOfChars{bits_ & that.bits_}; } constexpr SetOfChars Difference(SetOfChars that) const { return SetOfChars{bits_ & ~that.bits_}; } std::string ToString() const; private: constexpr SetOfChars(std::uint64_t b) : bits_{b} {} std::uint64_t bits_{0}; }; } // namespace Fortran::parser #endif // FORTRAN_PARSER_CHAR_SET_H_