2018-05-01 21:50:34 +02:00
|
|
|
// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2018-02-27 23:02:10 +01:00
|
|
|
#ifndef FORTRAN_PARSER_CHARACTERS_H_
|
|
|
|
#define FORTRAN_PARSER_CHARACTERS_H_
|
|
|
|
|
|
|
|
// Define some character classification predicates and
|
|
|
|
// conversions here to avoid dependences upon <cctype> and
|
|
|
|
// also to accomodate Fortran tokenization.
|
2018-04-19 22:51:25 +02:00
|
|
|
// TODO: EBCDIC?
|
2018-02-27 23:02:10 +01:00
|
|
|
|
2018-03-20 18:59:07 +01:00
|
|
|
#include <cstddef>
|
2018-02-27 23:02:10 +01:00
|
|
|
#include <optional>
|
|
|
|
#include <string>
|
|
|
|
|
2018-05-02 22:48:12 +02:00
|
|
|
namespace Fortran::parser {
|
2018-02-27 23:02:10 +01:00
|
|
|
|
2018-03-01 01:56:10 +01:00
|
|
|
enum class Encoding { UTF8, EUC_JP };
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr bool IsUpperCaseLetter(char ch) {
|
2018-04-19 22:51:25 +02:00
|
|
|
return ch >= 'A' && ch <= 'Z';
|
2018-02-27 23:02:10 +01:00
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr bool IsLowerCaseLetter(char ch) {
|
2018-04-19 22:51:25 +02:00
|
|
|
return ch >= 'a' && ch <= 'z';
|
2018-02-27 23:02:10 +01:00
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr bool IsLetter(char ch) {
|
2018-02-27 23:02:10 +01:00
|
|
|
return IsUpperCaseLetter(ch) || IsLowerCaseLetter(ch);
|
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr bool IsDecimalDigit(char ch) { return ch >= '0' && ch <= '9'; }
|
2018-02-27 23:02:10 +01:00
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr bool IsHexadecimalDigit(char ch) {
|
2018-02-27 23:02:10 +01:00
|
|
|
return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') ||
|
|
|
|
(ch >= 'a' && ch <= 'f');
|
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr bool IsOctalDigit(char ch) { return ch >= '0' && ch <= '7'; }
|
2018-02-27 23:02:10 +01:00
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr bool IsLegalIdentifierStart(char ch) {
|
2018-02-27 23:02:10 +01:00
|
|
|
return IsLetter(ch) || ch == '_' || ch == '@' || ch == '$';
|
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr bool IsLegalInIdentifier(char ch) {
|
2018-02-27 23:02:10 +01:00
|
|
|
return IsLegalIdentifierStart(ch) || IsDecimalDigit(ch);
|
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr char ToLowerCaseLetter(char ch) {
|
2018-02-27 23:02:10 +01:00
|
|
|
return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
|
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr char ToLowerCaseLetter(char &&ch) {
|
2018-02-27 23:02:10 +01:00
|
|
|
return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
|
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline std::string ToLowerCaseLetters(const std::string &str) {
|
2018-02-27 23:02:10 +01:00
|
|
|
std::string lowered{str};
|
|
|
|
for (char &ch : lowered) {
|
|
|
|
ch = ToLowerCaseLetter(ch);
|
|
|
|
}
|
|
|
|
return lowered;
|
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr char ToUpperCaseLetter(char ch) {
|
2018-03-01 01:56:10 +01:00
|
|
|
return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch;
|
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr char ToUpperCaseLetter(char &&ch) {
|
2018-03-01 01:56:10 +01:00
|
|
|
return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch;
|
|
|
|
}
|
|
|
|
|
2018-04-19 22:51:25 +02:00
|
|
|
inline std::string ToUpperCaseLetters(const std::string &str) {
|
2018-03-01 01:56:10 +01:00
|
|
|
std::string raised{str};
|
|
|
|
for (char &ch : raised) {
|
|
|
|
ch = ToUpperCaseLetter(ch);
|
|
|
|
}
|
|
|
|
return raised;
|
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr bool IsSameApartFromCase(char x, char y) {
|
2018-03-01 01:56:10 +01:00
|
|
|
return ToLowerCaseLetter(x) == ToLowerCaseLetter(y);
|
|
|
|
}
|
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr char DecimalDigitValue(char ch) { return ch - '0'; }
|
2018-02-27 23:02:10 +01:00
|
|
|
|
2018-04-03 00:51:04 +02:00
|
|
|
inline constexpr char HexadecimalDigitValue(char ch) {
|
2018-02-27 23:02:10 +01:00
|
|
|
return IsUpperCaseLetter(ch)
|
|
|
|
? ch - 'A' + 10
|
|
|
|
: IsLowerCaseLetter(ch) ? ch - 'a' + 10 : DecimalDigitValue(ch);
|
|
|
|
}
|
|
|
|
|
2018-04-19 22:51:25 +02:00
|
|
|
inline constexpr std::optional<char> BackslashEscapeValue(char ch) {
|
2018-02-27 23:02:10 +01:00
|
|
|
switch (ch) {
|
2018-11-14 20:57:47 +01:00
|
|
|
// case 'a': return {'\a'}; // pgf90 has no \a
|
2018-02-27 23:02:10 +01:00
|
|
|
case 'b': return {'\b'};
|
|
|
|
case 'f': return {'\f'};
|
|
|
|
case 'n': return {'\n'};
|
|
|
|
case 'r': return {'\r'};
|
|
|
|
case 't': return {'\t'};
|
|
|
|
case 'v': return {'\v'};
|
|
|
|
case '"':
|
|
|
|
case '\'':
|
|
|
|
case '\\': return {ch};
|
2018-07-26 00:13:40 +02:00
|
|
|
default: return std::nullopt;
|
2018-02-27 23:02:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-19 22:51:25 +02:00
|
|
|
inline constexpr std::optional<char> BackslashEscapeChar(char ch) {
|
2018-02-27 23:02:10 +01:00
|
|
|
switch (ch) {
|
2018-11-14 20:57:47 +01:00
|
|
|
// case '\a': return {'a'}; // pgf90 has no \a
|
2018-02-27 23:02:10 +01:00
|
|
|
case '\b': return {'b'};
|
|
|
|
case '\f': return {'f'};
|
|
|
|
case '\n': return {'n'};
|
|
|
|
case '\r': return {'r'};
|
|
|
|
case '\t': return {'t'};
|
|
|
|
case '\v': return {'v'};
|
|
|
|
case '"':
|
|
|
|
case '\'':
|
|
|
|
case '\\': return {ch};
|
2018-07-26 00:13:40 +02:00
|
|
|
default: return std::nullopt;
|
2018-02-27 23:02:10 +01:00
|
|
|
}
|
|
|
|
}
|
2018-03-01 01:56:10 +01:00
|
|
|
|
|
|
|
template<typename NORMAL, typename INSERTED>
|
2018-11-01 19:18:12 +01:00
|
|
|
void EmitQuotedChar(char32_t ch, const NORMAL &emit, const INSERTED &insert,
|
2018-03-01 01:56:10 +01:00
|
|
|
bool doubleDoubleQuotes = true, bool doubleBackslash = true) {
|
|
|
|
if (ch == '"') {
|
|
|
|
if (doubleDoubleQuotes) {
|
|
|
|
insert('"');
|
|
|
|
}
|
|
|
|
emit('"');
|
|
|
|
} else if (ch == '\\') {
|
|
|
|
if (doubleBackslash) {
|
|
|
|
insert('\\');
|
|
|
|
}
|
|
|
|
emit('\\');
|
2018-11-14 20:12:09 +01:00
|
|
|
} else if (ch < ' ' || (ch >= 0x80 && ch <= 0xff)) {
|
2018-03-01 01:56:10 +01:00
|
|
|
insert('\\');
|
2018-12-05 22:03:39 +01:00
|
|
|
if (std::optional<char> escape{BackslashEscapeChar(ch)}) {
|
2018-03-01 01:56:10 +01:00
|
|
|
emit(*escape);
|
|
|
|
} else {
|
|
|
|
// octal escape sequence
|
|
|
|
insert('0' + ((ch >> 6) & 3));
|
|
|
|
insert('0' + ((ch >> 3) & 7));
|
|
|
|
insert('0' + (ch & 7));
|
|
|
|
}
|
2018-11-01 19:18:12 +01:00
|
|
|
} else if (ch <= 0x7f) {
|
2018-03-01 01:56:10 +01:00
|
|
|
emit(ch);
|
2018-11-01 19:18:12 +01:00
|
|
|
} else if (ch <= 0x7ff) {
|
|
|
|
emit(0xc0 | ((ch >> 6) & 0x1f));
|
|
|
|
emit(0x80 | (ch & 0x3f));
|
|
|
|
} else if (ch <= 0xffff) {
|
|
|
|
emit(0xe0 | ((ch >> 12) & 0x0f));
|
|
|
|
emit(0x80 | ((ch >> 6) & 0x3f));
|
|
|
|
emit(0x80 | (ch & 0x3f));
|
|
|
|
} else {
|
|
|
|
emit(0xf0 | ((ch >> 18) & 0x07));
|
|
|
|
emit(0x80 | ((ch >> 12) & 0x3f));
|
|
|
|
emit(0x80 | ((ch >> 6) & 0x3f));
|
|
|
|
emit(0x80 | (ch & 0x3f));
|
2018-03-01 01:56:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-20 00:35:55 +02:00
|
|
|
std::string QuoteCharacterLiteral(const std::string &,
|
2018-07-20 00:42:00 +02:00
|
|
|
bool doubleDoubleQuotes = true, bool doubleBackslash = true);
|
2018-11-01 19:18:12 +01:00
|
|
|
std::string QuoteCharacterLiteral(const std::u16string &,
|
|
|
|
bool doubleDoubleQuotes = true, bool doubleBackslash = true);
|
|
|
|
std::string QuoteCharacterLiteral(const std::u32string &,
|
|
|
|
bool doubleDoubleQuotes = true, bool doubleBackslash = true);
|
2018-07-07 00:12:33 +02:00
|
|
|
|
2018-03-01 01:56:10 +01:00
|
|
|
std::optional<int> UTF8CharacterBytes(const char *);
|
|
|
|
std::optional<int> EUC_JPCharacterBytes(const char *);
|
2018-03-20 18:59:07 +01:00
|
|
|
std::optional<std::size_t> CountCharacters(
|
|
|
|
const char *, std::size_t bytes, std::optional<int> (*)(const char *));
|
2018-11-05 22:48:00 +01:00
|
|
|
std::optional<std::u32string> DecodeUTF8(const std::string &);
|
2018-10-25 14:55:23 +02:00
|
|
|
}
|
2018-02-27 23:02:10 +01:00
|
|
|
#endif // FORTRAN_PARSER_CHARACTERS_H_
|