// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "source.h" #include "char-buffer.h" #include "../common/idioms.h" #include #include #include #include #include #include #include #include #include #include #include // TODO: Port to Windows &c. namespace Fortran::parser { static constexpr bool useMMap{true}; static constexpr int minMapFileBytes{1}; // i.e., no minimum requirement static constexpr int maxMapOpenFileDescriptors{100}; static int openFileDescriptors{0}; SourceFile::~SourceFile() { Close(); } static std::vector FindLineStarts( const char *source, std::size_t bytes) { if (bytes == 0) { return {}; } CHECK(source[bytes - 1] == '\n' && "missing ultimate newline"); std::vector result; std::size_t at{0}; do { result.push_back(at); const void *vp{static_cast(&source[at])}; const void *vnl{std::memchr(vp, '\n', bytes - at)}; const char *nl{static_cast(vnl)}; at = nl + 1 - source; } while (at < bytes); result.shrink_to_fit(); return result; } std::string DirectoryName(std::string path) { auto lastSlash{path.rfind("/")}; return lastSlash == std::string::npos ? path : path.substr(0, lastSlash); } std::string LocateSourceFile( std::string name, const std::vector &searchPath) { if (name.empty() || name == "-" || name[0] == '/') { return name; } for (const std::string &dir : searchPath) { std::string path{dir + '/' + name}; struct stat statbuf; if (stat(path.c_str(), &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) { return path; } } return name; } static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) { std::size_t wrote{0}; char *p{buffer}; while (bytes > 0) { void *vp{static_cast(p)}; void *crvp{std::memchr(vp, '\r', bytes)}; char *crcp{static_cast(crvp)}; if (crcp == nullptr) { std::memmove(buffer + wrote, p, bytes); wrote += bytes; break; } std::size_t chunk = crcp - p; std::memmove(buffer + wrote, p, chunk); wrote += chunk; p += chunk + 1; bytes -= chunk + 1; } return wrote; } bool SourceFile::Open(std::string path, std::stringstream *error) { Close(); path_ = path; std::string errorPath{"'"s + path + "'"}; errno = 0; fileDescriptor_ = open(path.c_str(), O_RDONLY); if (fileDescriptor_ < 0) { *error << "could not open " << errorPath << ": " << std::strerror(errno); return false; } ++openFileDescriptors; return ReadFile(errorPath, error); } bool SourceFile::ReadStandardInput(std::stringstream *error) { Close(); path_ = "standard input"; fileDescriptor_ = 0; return ReadFile(path_, error); } bool SourceFile::ReadFile(std::string errorPath, std::stringstream *error) { struct stat statbuf; if (fstat(fileDescriptor_, &statbuf) != 0) { *error << "fstat failed on " << errorPath << ": " << std::strerror(errno); Close(); return false; } if (S_ISDIR(statbuf.st_mode)) { *error << errorPath << " is a directory"; Close(); return false; } // Try to map a large source file into the process' address space. // Don't bother with small ones. This also helps keep the number // of open file descriptors from getting out of hand. if (useMMap && S_ISREG(statbuf.st_mode)) { bytes_ = static_cast(statbuf.st_size); if (bytes_ >= minMapFileBytes && openFileDescriptors <= maxMapOpenFileDescriptors) { void *vp = mmap(0, bytes_, PROT_READ, MAP_SHARED, fileDescriptor_, 0); if (vp != MAP_FAILED) { content_ = static_cast(const_cast(vp)); if (content_[bytes_ - 1] == '\n' && std::memchr(vp, '\r', bytes_) == nullptr) { isMemoryMapped_ = true; lineStart_ = FindLineStarts(content_, bytes_); return true; } // The file needs to have its line endings normalized to simple // newlines. Remap it for a private rewrite in place. vp = mmap(vp, bytes_, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileDescriptor_, 0); if (vp != MAP_FAILED) { auto mutableContent{static_cast(vp)}; bytes_ = RemoveCarriageReturns(mutableContent, bytes_); if (bytes_ > 0) { if (mutableContent[bytes_ - 1] == '\n' || (bytes_ & 0xfff) != 0 /* don't cross into next page */) { if (mutableContent[bytes_ - 1] != '\n') { // Append a final newline. mutableContent[bytes_++] = '\n'; } bool isNowReadOnly{mprotect(vp, bytes_, PROT_READ) == 0}; CHECK(isNowReadOnly); content_ = mutableContent; isMemoryMapped_ = true; lineStart_ = FindLineStarts(content_, bytes_); return true; } } } munmap(vp, bytes_); content_ = nullptr; } } } // Read it into an expandable buffer, then marshal its content into a single // contiguous block. CharBuffer buffer; while (true) { std::size_t count; char *to{buffer.FreeSpace(&count)}; ssize_t got{read(fileDescriptor_, to, count)}; if (got < 0) { *error << "could not read " << errorPath << ": " << std::strerror(errno); Close(); return false; } if (got == 0) { break; } buffer.Claim(got); } if (fileDescriptor_ > 0) { close(fileDescriptor_); --openFileDescriptors; } fileDescriptor_ = -1; bytes_ = buffer.size(); if (bytes_ == 0) { // empty file content_ = nullptr; } else { normalized_ = buffer.MarshalNormalized(); content_ = normalized_.data(); bytes_ = normalized_.size(); lineStart_ = FindLineStarts(content_, bytes_); } return true; } void SourceFile::Close() { if (useMMap && isMemoryMapped_) { munmap(reinterpret_cast(const_cast(content_)), bytes_); isMemoryMapped_ = false; } else if (!normalized_.empty()) { normalized_.clear(); } else if (content_ != nullptr) { delete[] content_; } content_ = nullptr; bytes_ = 0; if (fileDescriptor_ > 0) { close(fileDescriptor_); --openFileDescriptors; } fileDescriptor_ = -1; path_.clear(); } std::pair SourceFile::FindOffsetLineAndColumn(std::size_t at) const { CHECK(at < bytes_); if (lineStart_.empty()) { return {1, static_cast(at + 1)}; } std::size_t low{0}, count{lineStart_.size()}; while (count > 1) { std::size_t mid{low + (count >> 1)}; if (lineStart_[mid] > at) { count = mid - low; } else { count -= mid - low; low = mid; } } return { static_cast(low + 1), static_cast(at - lineStart_[low] + 1)}; } } // namespace Fortran::parser