llvm/flang/lib/parser/characters.cc
peter klausler 424ec7b35b [flang] Handle empty files gracefully.
Create interval.h.  Use std::size_t instead of bare size_t.  Redefine parser::Name to not be just a bare string.

Break out and rename CharBlock from token-sequence.h for use in the parse tree.

Incremental replacement of name strings with pointers to cooked characters.

Fix case sensitivity problem.

Use new CharBlock encoding to replace strings for real literal constants.

Normalized cooked character stream to lower case.

Simplify parsing now that cooked stream is lower case.  Replace Keyword in parse tree.

Add static_asserts to || and recovery parsers to enforce same result types.

Remove needless TODO comment inserted earlier.

Fix case conversion on prefixed character literals (f90_correct/dc04.f90).

Use CharBlock in user-state.h.

Complete transition from nextChar to nextCh (i.e., always use pointers).

Document extensions.  Begin work on compiler directive lines.

More documentation work.

Reformat prescan.cc.

More work on compiler directive scanning.

Original-commit: flang-compiler/f18@38d0404e16
Reviewed-on: https://github.com/flang-compiler/f18/pull/29
Tree-same-pre-rewrite: false
2018-03-23 13:32:55 -07:00

72 lines
1.6 KiB
C++

#include "characters.h"
#include <cstddef>
#include <optional>
namespace Fortran {
namespace parser {
std::optional<int> UTF8CharacterBytes(const char *p) {
if ((*p & 0x80) == 0) {
return {1};
}
if ((*p & 0xf8) == 0xf0) {
if ((p[1] & 0xc0) == 0x80 && (p[2] & 0xc0) == 0x80 &&
(p[3] & 0xc0) == 0x80) {
return {4};
}
} else if ((*p & 0xf0) == 0xe0) {
if ((p[1] & 0xc0) == 0x80 && (p[2] & 0xc0) == 0x80) {
return {3};
}
} else if ((*p & 0xe0) == 0xc0) {
if ((p[1] & 0xc0) == 0x80) {
return {2};
}
}
return {};
}
std::optional<int> EUC_JPCharacterBytes(const char *p) {
int b1 = *p & 0xff;
if (b1 <= 0x7f) {
return {1};
}
if (b1 >= 0xa1 && b1 <= 0xfe) {
int b2 = p[1] & 0xff;
if (b2 >= 0xa1 && b2 <= 0xfe) {
// JIS X 0208 (code set 1)
return {2};
}
} else if (b1 == 0x8e) {
int b2 = p[1] & 0xff;
if (b2 >= 0xa1 && b2 <= 0xdf) {
// upper half JIS 0201 (half-width kana, code set 2)
return {2};
}
} else if (b1 == 0x8f) {
int b2 = p[1] & 0xff;
int b3 = p[2] & 0xff;
if (b2 >= 0xa1 && b2 <= 0xfe && b3 >= 0xa1 && b3 <= 0xfe) {
// JIS X 0212 (code set 3)
return {3};
}
}
return {};
}
std::optional<std::size_t> CountCharacters(
const char *p, std::size_t bytes, std::optional<int> (*cbf)(const char *)) {
std::size_t chars{0};
const char *limit{p + bytes};
while (p < limit) {
++chars;
std::optional<int> cb{cbf(p)};
if (!cb.has_value()) {
return {};
}
p += *cb;
}
return {chars};
}
} // namespace parser
} // namespace Fortran