llvm/flang/runtime/format-implementation.h
Peter Klausler 44ff4df6de [flang] Extension: don't require commas between most edit descriptors in formats
The standard explicitly allows a comma to be omitted between a 'P'
edit descriptor and a following numeric edit descriptor (e.g., 1PE10.1),
and before and after a '/' edit descriptor, but otherwise requires them
between edit descriptors.  Most implementations, however, only require
commas where they prevent ambiguity, and accept things like 1XI10.
This extension is already assumed by the static FORMAT checker in
semantics.  Patch the runtime to behave accordingly.

Differential Revision: https://reviews.llvm.org/D120747
2022-03-01 15:15:59 -08:00

448 lines
14 KiB
C++

//===-- runtime/format-implementation.h -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Implements out-of-line member functions of template class FormatControl
#ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
#define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
#include "format.h"
#include "io-stmt.h"
#include "flang/Common/format.h"
#include "flang/Decimal/decimal.h"
#include "flang/Runtime/main.h"
#include <algorithm>
#include <limits>
namespace Fortran::runtime::io {
template <typename CONTEXT>
FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
const CharType *format, std::size_t formatLength, int maxHeight)
: maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format},
formatLength_{static_cast<int>(formatLength)} {
RUNTIME_CHECK(terminator, maxHeight == maxHeight_);
RUNTIME_CHECK(
terminator, formatLength == static_cast<std::size_t>(formatLength_));
stack_[0].start = offset_;
stack_[0].remaining = Iteration::unlimited; // 13.4(8)
}
template <typename CONTEXT>
int FormatControl<CONTEXT>::GetIntField(
IoErrorHandler &handler, CharType firstCh) {
CharType ch{firstCh ? firstCh : PeekNext()};
if (ch != '-' && ch != '+' && (ch < '0' || ch > '9')) {
handler.SignalError(IostatErrorInFormat,
"Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch));
return 0;
}
int result{0};
bool negate{ch == '-'};
if (negate || ch == '+') {
if (firstCh) {
firstCh = '\0';
} else {
++offset_;
}
ch = PeekNext();
}
while (ch >= '0' && ch <= '9') {
if (result >
std::numeric_limits<int>::max() / 10 - (static_cast<int>(ch) - '0')) {
handler.SignalError(
IostatErrorInFormat, "FORMAT integer field out of range");
return result;
}
result = 10 * result + ch - '0';
if (firstCh) {
firstCh = '\0';
} else {
++offset_;
}
ch = PeekNext();
}
if (negate && (result *= -1) > 0) {
handler.SignalError(
IostatErrorInFormat, "FORMAT integer field out of range");
}
return result;
}
template <typename CONTEXT>
static void HandleControl(CONTEXT &context, char ch, char next, int n) {
MutableModes &modes{context.mutableModes()};
switch (ch) {
case 'B':
if (next == 'Z') {
modes.editingFlags |= blankZero;
return;
}
if (next == 'N') {
modes.editingFlags &= ~blankZero;
return;
}
break;
case 'D':
if (next == 'C') {
modes.editingFlags |= decimalComma;
return;
}
if (next == 'P') {
modes.editingFlags &= ~decimalComma;
return;
}
break;
case 'P':
if (!next) {
modes.scale = n; // kP - decimal scaling by 10**k
return;
}
break;
case 'R':
switch (next) {
case 'N':
modes.round = decimal::RoundNearest;
return;
case 'Z':
modes.round = decimal::RoundToZero;
return;
case 'U':
modes.round = decimal::RoundUp;
return;
case 'D':
modes.round = decimal::RoundDown;
return;
case 'C':
modes.round = decimal::RoundCompatible;
return;
case 'P':
modes.round = executionEnvironment.defaultOutputRoundingMode;
return;
default:
break;
}
break;
case 'X':
if (!next) {
context.HandleRelativePosition(n);
return;
}
break;
case 'S':
if (next == 'P') {
modes.editingFlags |= signPlus;
return;
}
if (!next || next == 'S') {
modes.editingFlags &= ~signPlus;
return;
}
break;
case 'T': {
if (!next) { // Tn
context.HandleAbsolutePosition(n - 1); // convert 1-based to 0-based
return;
}
if (next == 'L' || next == 'R') { // TLn & TRn
context.HandleRelativePosition(next == 'L' ? -n : n);
return;
}
} break;
default:
break;
}
if (next) {
context.SignalError(IostatErrorInFormat,
"Unknown '%c%c' edit descriptor in FORMAT", ch, next);
} else {
context.SignalError(
IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch);
}
}
// Locates the next data edit descriptor in the format.
// Handles all repetition counts and control edit descriptors.
// Generally assumes that the format string has survived the common
// format validator gauntlet.
template <typename CONTEXT>
int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
int unlimitedLoopCheck{-1};
// Do repetitions remain on an unparenthesized data edit?
while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') {
offset_ = stack_[height_ - 1].start;
int repeat{stack_[height_ - 1].remaining};
--height_;
if (repeat > 0) {
return repeat;
}
}
while (true) {
std::optional<int> repeat;
bool unlimited{false};
auto maybeReversionPoint{offset_};
CharType ch{GetNextChar(context)};
while (ch == ',' || ch == ':') {
// Skip commas, and don't complain if they're missing; the format
// validator does that.
if (stop && ch == ':') {
return 0;
}
ch = GetNextChar(context);
}
if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) {
repeat = GetIntField(context, ch);
ch = GetNextChar(context);
} else if (ch == '*') {
unlimited = true;
ch = GetNextChar(context);
if (ch != '(') {
ReportBadFormat(context,
"Invalid FORMAT: '*' may appear only before '('",
maybeReversionPoint);
return 0;
}
}
ch = Capitalize(ch);
if (ch == '(') {
if (height_ >= maxHeight_) {
ReportBadFormat(context,
"FORMAT stack overflow: too many nested parentheses",
maybeReversionPoint);
return 0;
}
stack_[height_].start = offset_ - 1; // the '('
RUNTIME_CHECK(context, format_[stack_[height_].start] == '(');
if (unlimited || height_ == 0) {
stack_[height_].remaining = Iteration::unlimited;
unlimitedLoopCheck = offset_ - 1;
} else if (repeat) {
if (*repeat <= 0) {
*repeat = 1; // error recovery
}
stack_[height_].remaining = *repeat - 1;
} else {
stack_[height_].remaining = 0;
}
if (height_ == 1) {
// Subtle point (F'2018 13.4 para 9): tha last parenthesized group
// at height 1 becomes the restart point after control reaches the
// end of the format, including its repeat count.
stack_[0].start = maybeReversionPoint;
}
++height_;
} else if (height_ == 0) {
ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint);
return 0;
} else if (ch == ')') {
if (height_ == 1) {
if (stop) {
return 0; // end of FORMAT and no data items remain
}
context.AdvanceRecord(); // implied / before rightmost )
}
auto restart{stack_[height_ - 1].start};
if (format_[restart] == '(') {
++restart;
}
if (stack_[height_ - 1].remaining == Iteration::unlimited) {
offset_ = restart;
if (offset_ == unlimitedLoopCheck) {
ReportBadFormat(context,
"Unlimited repetition in FORMAT lacks data edit descriptors",
restart);
}
} else if (stack_[height_ - 1].remaining-- > 0) {
offset_ = restart;
} else {
--height_;
}
} else if (ch == '\'' || ch == '"') {
// Quoted 'character literal'
CharType quote{ch};
auto start{offset_};
while (offset_ < formatLength_ && format_[offset_] != quote) {
++offset_;
}
if (offset_ >= formatLength_) {
ReportBadFormat(context,
"FORMAT missing closing quote on character literal",
maybeReversionPoint);
return 0;
}
++offset_;
std::size_t chars{
static_cast<std::size_t>(&format_[offset_] - &format_[start])};
if (PeekNext() == quote) {
// subtle: handle doubled quote character in a literal by including
// the first in the output, then treating the second as the start
// of another character literal.
} else {
--chars;
}
context.Emit(format_ + start, chars);
} else if (ch == 'H') {
// 9HHOLLERITH
if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) {
ReportBadFormat(context, "Invalid width on Hollerith in FORMAT",
maybeReversionPoint);
return 0;
}
context.Emit(format_ + offset_, static_cast<std::size_t>(*repeat));
offset_ += *repeat;
} else if (ch >= 'A' && ch <= 'Z') {
int start{offset_ - 1};
CharType next{'\0'};
if (ch != 'P') { // 1PE5.2 - comma not required (C1302)
CharType peek{Capitalize(PeekNext())};
if (peek >= 'A' && peek <= 'Z') {
if (ch == 'A' /* anticipate F'202X AT editing */ || ch == 'B' ||
ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' || ch == 'T') {
// Assume a two-letter edit descriptor
next = peek;
++offset_;
} else {
// extension: assume a comma between 'ch' and 'peek'
}
}
}
if ((!next &&
(ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' ||
ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' ||
ch == 'L')) ||
(ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) ||
(ch == 'D' && next == 'T')) {
// Data edit descriptor found
offset_ = start;
return repeat && *repeat > 0 ? *repeat : 1;
} else {
// Control edit descriptor
if (ch == 'T') { // Tn, TLn, TRn
repeat = GetIntField(context);
}
HandleControl(context, static_cast<char>(ch), static_cast<char>(next),
repeat ? *repeat : 1);
}
} else if (ch == '/') {
context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1);
} else if (ch == '$' || ch == '\\') {
context.mutableModes().nonAdvancing = true;
} else if (ch == '\t' || ch == '\v') {
// Tabs (extension)
// TODO: any other raw characters?
context.Emit(format_ + offset_ - 1, 1);
} else {
ReportBadFormat(
context, "Invalid character in FORMAT", maybeReversionPoint);
return 0;
}
}
}
// Returns the next data edit descriptor
template <typename CONTEXT>
DataEdit FormatControl<CONTEXT>::GetNextDataEdit(
Context &context, int maxRepeat) {
int repeat{CueUpNextDataEdit(context)};
auto start{offset_};
DataEdit edit;
edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context)));
if (edit.descriptor == 'E') {
if (auto next{static_cast<char>(Capitalize(PeekNext()))};
next == 'N' || next == 'S' || next == 'X') {
edit.variation = next;
++offset_;
}
} else if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') {
// DT['iotype'][(v_list)] user-defined derived type I/O
edit.descriptor = DataEdit::DefinedDerivedType;
++offset_;
if (auto quote{static_cast<char>(PeekNext())};
quote == '\'' || quote == '"') {
// Capture the quoted 'iotype'
bool ok{false}, tooLong{false};
for (++offset_; offset_ < formatLength_;) {
auto ch{static_cast<char>(format_[offset_++])};
if (ch == quote &&
(offset_ == formatLength_ ||
static_cast<char>(format_[offset_]) != quote)) {
ok = true;
break; // that was terminating quote
} else if (edit.ioTypeChars >= edit.maxIoTypeChars) {
tooLong = true;
} else {
edit.ioType[edit.ioTypeChars++] = ch;
if (ch == quote) {
++offset_;
}
}
}
if (!ok) {
ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start);
} else if (tooLong) {
ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start);
}
}
if (PeekNext() == '(') {
// Capture the v_list arguments
bool ok{false}, tooLong{false};
for (++offset_; offset_ < formatLength_;) {
int n{GetIntField(context)};
if (edit.vListEntries >= edit.maxVListEntries) {
tooLong = true;
} else {
edit.vList[edit.vListEntries++] = n;
}
auto ch{static_cast<char>(GetNextChar(context))};
if (ch != ',') {
ok = ch == ')';
break;
}
}
if (!ok) {
ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start);
} else if (tooLong) {
ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start);
}
}
}
if (edit.descriptor == 'A') { // width is optional for A[w]
auto ch{PeekNext()};
if (ch >= '0' && ch <= '9') {
edit.width = GetIntField(context);
}
} else if (edit.descriptor != DataEdit::DefinedDerivedType) {
edit.width = GetIntField(context);
}
if (edit.descriptor != DataEdit::DefinedDerivedType && PeekNext() == '.') {
++offset_;
edit.digits = GetIntField(context);
CharType ch{PeekNext()};
if (ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
++offset_;
edit.expoDigits = GetIntField(context);
}
}
edit.modes = context.mutableModes();
// Handle repeated nonparenthesized edit descriptors
edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0
if (repeat > maxRepeat) {
stack_[height_].start = start; // after repeat count
stack_[height_].remaining = repeat - edit.repeat;
++height_;
}
return edit;
}
template <typename CONTEXT>
void FormatControl<CONTEXT>::Finish(Context &context) {
CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */);
}
} // namespace Fortran::runtime::io
#endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_