llvm/flang/lib/parser/source.cc
Tim Keith 18cee3e8e6 [flang] Add copyright notices.
For source files (C++, Fortran, CMake) add copyright and license.
For documentation files add just copyright.

Original-commit: flang-compiler/f18@38381aed83
Reviewed-on: https://github.com/flang-compiler/f18/pull/74
2018-05-01 12:50:34 -07:00

262 lines
7.5 KiB
C++

// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "source.h"
#include "char-buffer.h"
#include "idioms.h"
#include <algorithm>
#include <cerrno>
#include <cstddef>
#include <cstring>
#include <fcntl.h>
#include <memory>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <vector>
// TODO: Port to Windows &c.
namespace Fortran {
namespace parser {
static constexpr bool useMMap{true};
static constexpr int minMapFileBytes{1}; // i.e., no minimum requirement
static constexpr int maxMapOpenFileDescriptors{100};
static int openFileDescriptors{0};
SourceFile::~SourceFile() { Close(); }
static std::vector<std::size_t> FindLineStarts(
const char *source, std::size_t bytes) {
if (bytes == 0) {
return {};
}
CHECK(source[bytes - 1] == '\n' && "missing ultimate newline");
std::vector<std::size_t> result;
std::size_t at{0};
do {
result.push_back(at);
const void *vp{static_cast<const void *>(&source[at])};
const void *vnl{std::memchr(vp, '\n', bytes - at)};
const char *nl{static_cast<const char *>(vnl)};
at = nl + 1 - source;
} while (at < bytes);
result.shrink_to_fit();
return result;
}
std::string DirectoryName(std::string path) {
auto lastSlash = path.rfind("/");
return lastSlash == std::string::npos ? path : path.substr(0, lastSlash);
}
std::string LocateSourceFile(
std::string name, const std::vector<std::string> &searchPath) {
if (name.empty() || name == "-" || name[0] == '/') {
return name;
}
for (const std::string &dir : searchPath) {
std::string path{dir + '/' + name};
struct stat statbuf;
if (stat(path.c_str(), &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) {
return path;
}
}
return name;
}
static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) {
std::size_t wrote{0};
char *p{buffer};
while (bytes > 0) {
void *vp{static_cast<void *>(p)};
void *crvp{std::memchr(vp, '\r', bytes)};
char *crcp{static_cast<char *>(crvp)};
if (crcp == nullptr) {
std::memcpy(buffer + wrote, p, bytes);
wrote += bytes;
break;
}
std::size_t chunk = crcp - p;
std::memcpy(buffer + wrote, p, chunk);
wrote += chunk;
p += chunk + 1;
bytes -= chunk + 1;
}
return wrote;
}
bool SourceFile::Open(std::string path, std::stringstream *error) {
Close();
path_ = path;
std::string errorPath{"'"s + path + "'"};
errno = 0;
fileDescriptor_ = open(path.c_str(), O_RDONLY);
if (fileDescriptor_ < 0) {
*error << "could not open " << errorPath << ": " << std::strerror(errno);
return false;
}
++openFileDescriptors;
return ReadFile(errorPath, error);
}
bool SourceFile::ReadStandardInput(std::stringstream *error) {
Close();
path_ = "standard input";
fileDescriptor_ = 0;
return ReadFile(path_, error);
}
bool SourceFile::ReadFile(std::string errorPath, std::stringstream *error) {
struct stat statbuf;
if (fstat(fileDescriptor_, &statbuf) != 0) {
*error << "fstat failed on " << errorPath << ": " << std::strerror(errno);
Close();
return false;
}
if (S_ISDIR(statbuf.st_mode)) {
*error << errorPath << " is a directory";
Close();
return false;
}
// Try to map a large source file into the process' address space.
// Don't bother with small ones. This also helps keep the number
// of open file descriptors from getting out of hand.
if (useMMap && S_ISREG(statbuf.st_mode)) {
bytes_ = static_cast<std::size_t>(statbuf.st_size);
if (bytes_ >= minMapFileBytes &&
openFileDescriptors <= maxMapOpenFileDescriptors) {
void *vp = mmap(0, bytes_, PROT_READ, MAP_SHARED, fileDescriptor_, 0);
if (vp != MAP_FAILED) {
content_ = static_cast<const char *>(const_cast<const void *>(vp));
if (content_[bytes_ - 1] == '\n' &&
std::memchr(vp, '\r', bytes_) == nullptr) {
isMemoryMapped_ = true;
lineStart_ = FindLineStarts(content_, bytes_);
return true;
}
// The file needs to have its line endings normalized to simple
// newlines. Remap it for a private rewrite in place.
vp = mmap(vp, bytes_, PROT_READ | PROT_WRITE, MAP_PRIVATE,
fileDescriptor_, 0);
if (vp != MAP_FAILED) {
auto mutableContent = static_cast<char *>(vp);
bytes_ = RemoveCarriageReturns(mutableContent, bytes_);
if (bytes_ > 0) {
if (mutableContent[bytes_ - 1] == '\n' ||
(bytes_ & 0xfff) != 0 /* don't cross into next page */) {
if (mutableContent[bytes_ - 1] != '\n') {
// Append a final newline.
mutableContent[bytes_++] = '\n';
}
bool isNowReadOnly{mprotect(vp, bytes_, PROT_READ) == 0};
CHECK(isNowReadOnly);
content_ = mutableContent;
isMemoryMapped_ = true;
lineStart_ = FindLineStarts(content_, bytes_);
return true;
}
}
}
munmap(vp, bytes_);
content_ = nullptr;
}
}
}
// Read it into an expandable buffer, then marshal its content into a single
// contiguous block.
CharBuffer buffer;
while (true) {
std::size_t count;
char *to{buffer.FreeSpace(&count)};
ssize_t got{read(fileDescriptor_, to, count)};
if (got < 0) {
*error << "could not read " << errorPath << ": " << std::strerror(errno);
Close();
return false;
}
if (got == 0) {
break;
}
buffer.Claim(got);
}
if (fileDescriptor_ > 0) {
close(fileDescriptor_);
--openFileDescriptors;
}
fileDescriptor_ = -1;
bytes_ = buffer.size();
if (bytes_ == 0) {
// empty file
content_ = nullptr;
return true;
}
char *contig{new char[bytes_ + 1 /* for extra newline if needed */]};
content_ = contig;
char *to{contig};
for (char ch : buffer) {
if (ch != '\r') {
*to++ = ch;
}
}
if (to == contig || to[-1] != '\n') {
*to++ = '\n'; // supply a missing terminal newline
}
bytes_ = to - contig;
lineStart_ = FindLineStarts(content_, bytes_);
return true;
}
void SourceFile::Close() {
if (useMMap && isMemoryMapped_) {
munmap(reinterpret_cast<void *>(const_cast<char *>(content_)), bytes_);
isMemoryMapped_ = false;
} else if (content_ != nullptr) {
delete[] content_;
}
content_ = nullptr;
bytes_ = 0;
if (fileDescriptor_ > 0) {
close(fileDescriptor_);
--openFileDescriptors;
}
fileDescriptor_ = -1;
path_.clear();
}
std::pair<int, int> SourceFile::FindOffsetLineAndColumn(std::size_t at) const {
CHECK(at < bytes_);
if (lineStart_.empty()) {
return {1, static_cast<int>(at + 1)};
}
std::size_t low{0}, count{lineStart_.size()};
while (count > 1) {
std::size_t mid{low + (count >> 1)};
if (lineStart_[mid] > at) {
count = mid - low;
} else {
count -= mid - low;
low = mid;
}
}
return {
static_cast<int>(low + 1), static_cast<int>(at - lineStart_[low] + 1)};
}
} // namespace parser
} // namespace Fortran