Auto merge of #42593 - ibabushkin:on-demand-external-source, r=eddyb

Implement lazy loading of external crates' sources. Fixes #38875

Fixes #38875. This is a follow-up to #42507. When a (now correctly translated) span from an external crate is referenced in a error, warning or info message, we still don't have the source code being referenced.
Since stuffing the source in the serialized metadata of an rlib is extremely wasteful, the following scheme has been implemented:

* File maps now contain a source hash that gets serialized as well.
* When a span is rendered in a message, the source hash in the corresponding file map(s) is used to try and load the source from the corresponding file on disk. If the file is not found or the hashes don't match, the failed attempt is recorded (and not retried).
* The machinery fetching source lines from file maps is augmented to use the lazily loaded external source as a secondary fallback for file maps belonging to external crates.

This required a small change to the expected stderr of one UI test (it now renders a span, where previously was none).

Further work can be done based on this - some of the machinery previously used to hide external spans is possibly obsolete and the hashing code can be reused in different places as well.

r? @eddyb
This commit is contained in:
bors 2017-06-18 10:41:05 +00:00
commit 28cc0c5a7b
14 changed files with 255 additions and 79 deletions

1
src/Cargo.lock generated
View file

@ -1682,6 +1682,7 @@ dependencies = [
name = "syntax_pos"
version = "0.0.0"
dependencies = [
"rustc_data_structures 0.0.0",
"serialize 0.0.0",
]

View file

@ -336,6 +336,8 @@ impl<'a, 'gcx, 'tcx> HashStable<StableHashingContext<'a, 'gcx, 'tcx>> for FileMa
crate_of_origin,
// Do not hash the source as it is not encoded
src: _,
src_hash,
external_src: _,
start_pos,
end_pos: _,
ref lines,
@ -350,6 +352,8 @@ impl<'a, 'gcx, 'tcx> HashStable<StableHashingContext<'a, 'gcx, 'tcx>> for FileMa
index: CRATE_DEF_INDEX,
}.hash_stable(hcx, hasher);
src_hash.hash_stable(hcx, hasher);
// We only hash the relative position within this filemap
let lines = lines.borrow();
lines.len().hash_stable(hcx, hasher);

View file

@ -78,6 +78,17 @@ impl StableHasherResult for [u8; 20] {
}
}
impl StableHasherResult for u128 {
fn finish(mut hasher: StableHasher<Self>) -> Self {
let hash_bytes: &[u8] = hasher.finalize();
assert!(hash_bytes.len() >= mem::size_of::<u128>());
unsafe {
::std::ptr::read_unaligned(hash_bytes.as_ptr() as *const u128)
}
}
}
impl StableHasherResult for u64 {
fn finish(mut hasher: StableHasher<Self>) -> Self {
hasher.state.finalize();

View file

@ -17,6 +17,7 @@ use RenderSpan::*;
use snippet::{Annotation, AnnotationType, Line, MultilineAnnotation, StyledString, Style};
use styled_buffer::StyledBuffer;
use std::borrow::Cow;
use std::io::prelude::*;
use std::io;
use std::rc::Rc;
@ -131,7 +132,7 @@ impl EmitterWriter {
}
}
fn preprocess_annotations(&self, msp: &MultiSpan) -> Vec<FileWithAnnotatedLines> {
fn preprocess_annotations(&mut self, msp: &MultiSpan) -> Vec<FileWithAnnotatedLines> {
fn add_annotation_to_file(file_vec: &mut Vec<FileWithAnnotatedLines>,
file: Rc<FileMap>,
line_index: usize,
@ -175,6 +176,7 @@ impl EmitterWriter {
if span_label.span == DUMMY_SP {
continue;
}
let lo = cm.lookup_char_pos(span_label.span.lo);
let mut hi = cm.lookup_char_pos(span_label.span.hi);
@ -890,10 +892,10 @@ impl EmitterWriter {
let mut annotated_files = self.preprocess_annotations(msp);
// Make sure our primary file comes first
let primary_lo = if let (Some(ref cm), Some(ref primary_span)) =
let (primary_lo, cm) = if let (Some(cm), Some(ref primary_span)) =
(self.cm.as_ref(), msp.primary_span().as_ref()) {
if primary_span != &&DUMMY_SP {
cm.lookup_char_pos(primary_span.lo)
(cm.lookup_char_pos(primary_span.lo), cm)
} else {
emit_to_destination(&buffer.render(), level, &mut self.dst)?;
return Ok(());
@ -911,7 +913,7 @@ impl EmitterWriter {
// Print out the annotate source lines that correspond with the error
for annotated_file in annotated_files {
// we can't annotate anything if the source is unavailable.
if annotated_file.file.src.is_none() {
if !cm.ensure_filemap_source_present(annotated_file.file.clone()) {
continue;
}
@ -1012,7 +1014,7 @@ impl EmitterWriter {
} else if line_idx_delta == 2 {
let unannotated_line = annotated_file.file
.get_line(annotated_file.lines[line_idx].line_index)
.unwrap_or("");
.unwrap_or_else(|| Cow::from(""));
let last_buffer_line_num = buffer.num_lines();

View file

@ -37,6 +37,7 @@ use self::Level::*;
use emitter::{Emitter, EmitterWriter};
use std::borrow::Cow;
use std::cell::{RefCell, Cell};
use std::{error, fmt};
use std::rc::Rc;
@ -49,7 +50,7 @@ pub mod registry;
pub mod styled_buffer;
mod lock;
use syntax_pos::{BytePos, Loc, FileLinesResult, FileName, MultiSpan, Span, NO_EXPANSION};
use syntax_pos::{BytePos, Loc, FileLinesResult, FileMap, FileName, MultiSpan, Span, NO_EXPANSION};
#[derive(Clone, Debug, PartialEq, RustcEncodable, RustcDecodable)]
pub enum RenderSpan {
@ -103,6 +104,7 @@ pub trait CodeMapper {
fn span_to_filename(&self, sp: Span) -> FileName;
fn merge_spans(&self, sp_lhs: Span, sp_rhs: Span) -> Option<Span>;
fn call_span_if_macro(&self, sp: Span) -> Span;
fn ensure_filemap_source_present(&self, file_map: Rc<FileMap>) -> bool;
}
impl CodeSuggestion {
@ -121,7 +123,7 @@ impl CodeSuggestion {
use syntax_pos::{CharPos, Loc, Pos};
fn push_trailing(buf: &mut String,
line_opt: Option<&str>,
line_opt: Option<&Cow<str>>,
lo: &Loc,
hi_opt: Option<&Loc>) {
let (lo, hi_opt) = (lo.col.to_usize(), hi_opt.map(|hi| hi.col.to_usize()));
@ -183,13 +185,13 @@ impl CodeSuggestion {
let cur_lo = cm.lookup_char_pos(sp.lo);
for (buf, substitute) in bufs.iter_mut().zip(substitutes) {
if prev_hi.line == cur_lo.line {
push_trailing(buf, prev_line, &prev_hi, Some(&cur_lo));
push_trailing(buf, prev_line.as_ref(), &prev_hi, Some(&cur_lo));
} else {
push_trailing(buf, prev_line, &prev_hi, None);
push_trailing(buf, prev_line.as_ref(), &prev_hi, None);
// push lines between the previous and current span (if any)
for idx in prev_hi.line..(cur_lo.line - 1) {
if let Some(line) = fm.get_line(idx) {
buf.push_str(line);
buf.push_str(line.as_ref());
buf.push('\n');
}
}
@ -205,7 +207,7 @@ impl CodeSuggestion {
for buf in &mut bufs {
// if the replacement already ends with a newline, don't print the next line
if !buf.ends_with('\n') {
push_trailing(buf, prev_line, &prev_hi, None);
push_trailing(buf, prev_line.as_ref(), &prev_hi, None);
}
// remove trailing newline
buf.pop();

View file

@ -765,7 +765,7 @@ impl<'a, 'tcx> CrateMetadata {
assert!(!self.is_proc_macro(id));
let ast = self.entry(id).ast.unwrap();
let def_id = self.local_def_id(id);
let body = ast.decode(self).body.decode(self);
let body = ast.decode((self, tcx)).body.decode((self, tcx));
tcx.hir.intern_inlined_body(def_id, body)
}
@ -1149,6 +1149,7 @@ impl<'a, 'tcx> CrateMetadata {
// containing the information we need.
let syntax_pos::FileMap { name,
name_was_remapped,
src_hash,
start_pos,
end_pos,
lines,
@ -1174,6 +1175,7 @@ impl<'a, 'tcx> CrateMetadata {
let local_version = local_codemap.new_imported_filemap(name,
name_was_remapped,
self.cnum.as_u32(),
src_hash,
source_length,
lines,
multibyte_chars);

View file

@ -158,29 +158,13 @@ impl CodeMap {
/// Creates a new filemap without setting its line information. If you don't
/// intend to set the line information yourself, you should use new_filemap_and_lines.
pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc<FileMap> {
pub fn new_filemap(&self, filename: FileName, src: String) -> Rc<FileMap> {
let start_pos = self.next_start_pos();
let mut files = self.files.borrow_mut();
// Remove utf-8 BOM if any.
if src.starts_with("\u{feff}") {
src.drain(..3);
}
let end_pos = start_pos + src.len();
let (filename, was_remapped) = self.path_mapping.map_prefix(filename);
let filemap = Rc::new(FileMap {
name: filename,
name_was_remapped: was_remapped,
crate_of_origin: 0,
src: Some(Rc::new(src)),
start_pos: Pos::from_usize(start_pos),
end_pos: Pos::from_usize(end_pos),
lines: RefCell::new(Vec::new()),
multibyte_chars: RefCell::new(Vec::new()),
});
let filemap =
Rc::new(FileMap::new(filename, was_remapped, src, Pos::from_usize(start_pos)));
files.push(filemap.clone());
@ -210,6 +194,7 @@ impl CodeMap {
filename: FileName,
name_was_remapped: bool,
crate_of_origin: u32,
src_hash: u128,
source_len: usize,
mut file_local_lines: Vec<BytePos>,
mut file_local_multibyte_chars: Vec<MultiByteChar>)
@ -233,6 +218,8 @@ impl CodeMap {
name_was_remapped: name_was_remapped,
crate_of_origin: crate_of_origin,
src: None,
src_hash: src_hash,
external_src: RefCell::new(ExternalSource::AbsentOk),
start_pos: start_pos,
end_pos: end_pos,
lines: RefCell::new(file_local_lines),
@ -428,30 +415,31 @@ impl CodeMap {
local_end.fm.start_pos)
}));
} else {
match local_begin.fm.src {
Some(ref src) => {
let start_index = local_begin.pos.to_usize();
let end_index = local_end.pos.to_usize();
let source_len = (local_begin.fm.end_pos -
local_begin.fm.start_pos).to_usize();
self.ensure_filemap_source_present(local_begin.fm.clone());
if start_index > end_index || end_index > source_len {
return Err(SpanSnippetError::MalformedForCodemap(
MalformedCodemapPositions {
name: local_begin.fm.name.clone(),
source_len: source_len,
begin_pos: local_begin.pos,
end_pos: local_end.pos,
}));
}
let start_index = local_begin.pos.to_usize();
let end_index = local_end.pos.to_usize();
let source_len = (local_begin.fm.end_pos -
local_begin.fm.start_pos).to_usize();
return Ok((&src[start_index..end_index]).to_string())
}
None => {
return Err(SpanSnippetError::SourceNotAvailable {
filename: local_begin.fm.name.clone()
});
}
if start_index > end_index || end_index > source_len {
return Err(SpanSnippetError::MalformedForCodemap(
MalformedCodemapPositions {
name: local_begin.fm.name.clone(),
source_len: source_len,
begin_pos: local_begin.pos,
end_pos: local_end.pos,
}));
}
if let Some(ref src) = local_begin.fm.src {
return Ok((&src[start_index..end_index]).to_string());
} else if let Some(src) = local_begin.fm.external_src.borrow().get_source() {
return Ok((&src[start_index..end_index]).to_string());
} else {
return Err(SpanSnippetError::SourceNotAvailable {
filename: local_begin.fm.name.clone()
});
}
}
}
@ -572,6 +560,10 @@ impl CodeMapper for CodeMap {
}
sp
}
fn ensure_filemap_source_present(&self, file_map: Rc<FileMap>) -> bool {
let src = self.file_loader.read_file(Path::new(&file_map.name)).ok();
return file_map.add_external_src(src)
}
}
#[derive(Clone)]
@ -617,6 +609,7 @@ impl FilePathMapping {
#[cfg(test)]
mod tests {
use super::*;
use std::borrow::Cow;
use std::rc::Rc;
#[test]
@ -626,12 +619,12 @@ mod tests {
"first line.\nsecond line".to_string());
fm.next_line(BytePos(0));
// Test we can get lines with partial line info.
assert_eq!(fm.get_line(0), Some("first line."));
assert_eq!(fm.get_line(0), Some(Cow::from("first line.")));
// TESTING BROKEN BEHAVIOR: line break declared before actual line break.
fm.next_line(BytePos(10));
assert_eq!(fm.get_line(1), Some("."));
assert_eq!(fm.get_line(1), Some(Cow::from(".")));
fm.next_line(BytePos(12));
assert_eq!(fm.get_line(2), Some("second line"));
assert_eq!(fm.get_line(2), Some(Cow::from("second line")));
}
#[test]

View file

@ -314,7 +314,7 @@ impl DiagnosticSpanLine {
h_end: usize)
-> DiagnosticSpanLine {
DiagnosticSpanLine {
text: fm.get_line(index).unwrap_or("").to_owned(),
text: fm.get_line(index).map_or(String::new(), |l| l.into_owned()),
highlight_start: h_start,
highlight_end: h_end,
}

View file

@ -10,3 +10,4 @@ crate-type = ["dylib"]
[dependencies]
serialize = { path = "../libserialize" }
rustc_data_structures = { path = "../librustc_data_structures" }

View file

@ -24,6 +24,7 @@
#![feature(const_fn)]
#![feature(custom_attribute)]
#![feature(i128_type)]
#![feature(optin_builtin_traits)]
#![allow(unused_attributes)]
#![feature(specialization)]
@ -32,12 +33,17 @@
#![cfg_attr(stage0, feature(rustc_private))]
#![cfg_attr(stage0, feature(staged_api))]
use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::ops::{Add, Sub};
use std::rc::Rc;
use std::cmp;
use std::fmt;
use std::hash::Hasher;
use rustc_data_structures::stable_hasher::StableHasher;
extern crate rustc_data_structures;
use serialize::{Encodable, Decodable, Encoder, Decoder};
@ -369,6 +375,35 @@ pub struct MultiByteChar {
pub bytes: usize,
}
/// The state of the lazy external source loading mechanism of a FileMap.
#[derive(PartialEq, Eq, Clone)]
pub enum ExternalSource {
/// The external source has been loaded already.
Present(String),
/// No attempt has been made to load the external source.
AbsentOk,
/// A failed attempt has been made to load the external source.
AbsentErr,
/// No external source has to be loaded, since the FileMap represents a local crate.
Unneeded,
}
impl ExternalSource {
pub fn is_absent(&self) -> bool {
match *self {
ExternalSource::Present(_) => false,
_ => true,
}
}
pub fn get_source(&self) -> Option<&str> {
match *self {
ExternalSource::Present(ref src) => Some(src),
_ => None,
}
}
}
/// A single source in the CodeMap.
#[derive(Clone)]
pub struct FileMap {
@ -382,6 +417,11 @@ pub struct FileMap {
pub crate_of_origin: u32,
/// The complete source code
pub src: Option<Rc<String>>,
/// The source code's hash
pub src_hash: u128,
/// The external source code (used for external crates, which will have a `None`
/// value as `self.src`.
pub external_src: RefCell<ExternalSource>,
/// The start position of this source in the CodeMap
pub start_pos: BytePos,
/// The end position of this source in the CodeMap
@ -394,9 +434,10 @@ pub struct FileMap {
impl Encodable for FileMap {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_struct("FileMap", 6, |s| {
s.emit_struct("FileMap", 7, |s| {
s.emit_struct_field("name", 0, |s| self.name.encode(s))?;
s.emit_struct_field("name_was_remapped", 1, |s| self.name_was_remapped.encode(s))?;
s.emit_struct_field("src_hash", 6, |s| self.src_hash.encode(s))?;
s.emit_struct_field("start_pos", 2, |s| self.start_pos.encode(s))?;
s.emit_struct_field("end_pos", 3, |s| self.end_pos.encode(s))?;
s.emit_struct_field("lines", 4, |s| {
@ -459,7 +500,10 @@ impl Decodable for FileMap {
let name: String = d.read_struct_field("name", 0, |d| Decodable::decode(d))?;
let name_was_remapped: bool =
d.read_struct_field("name_was_remapped", 1, |d| Decodable::decode(d))?;
let start_pos: BytePos = d.read_struct_field("start_pos", 2, |d| Decodable::decode(d))?;
let src_hash: u128 =
d.read_struct_field("src_hash", 6, |d| Decodable::decode(d))?;
let start_pos: BytePos =
d.read_struct_field("start_pos", 2, |d| Decodable::decode(d))?;
let end_pos: BytePos = d.read_struct_field("end_pos", 3, |d| Decodable::decode(d))?;
let lines: Vec<BytePos> = d.read_struct_field("lines", 4, |d| {
let num_lines: u32 = Decodable::decode(d)?;
@ -501,6 +545,8 @@ impl Decodable for FileMap {
start_pos: start_pos,
end_pos: end_pos,
src: None,
src_hash: src_hash,
external_src: RefCell::new(ExternalSource::AbsentOk),
lines: RefCell::new(lines),
multibyte_chars: RefCell::new(multibyte_chars)
})
@ -515,6 +561,32 @@ impl fmt::Debug for FileMap {
}
impl FileMap {
pub fn new(name: FileName,
name_was_remapped: bool,
mut src: String,
start_pos: BytePos) -> FileMap {
remove_bom(&mut src);
let mut hasher: StableHasher<u128> = StableHasher::new();
hasher.write(src.as_bytes());
let src_hash = hasher.finish();
let end_pos = start_pos.to_usize() + src.len();
FileMap {
name: name,
name_was_remapped: name_was_remapped,
crate_of_origin: 0,
src: Some(Rc::new(src)),
src_hash: src_hash,
external_src: RefCell::new(ExternalSource::Unneeded),
start_pos: start_pos,
end_pos: Pos::from_usize(end_pos),
lines: RefCell::new(Vec::new()),
multibyte_chars: RefCell::new(Vec::new()),
}
}
/// EFFECT: register a start-of-line offset in the
/// table of line-beginnings.
/// UNCHECKED INVARIANT: these offsets must be added in the right
@ -532,26 +604,60 @@ impl FileMap {
lines.push(pos);
}
/// get a line from the list of pre-computed line-beginnings.
/// line-number here is 0-based.
pub fn get_line(&self, line_number: usize) -> Option<&str> {
match self.src {
Some(ref src) => {
let lines = self.lines.borrow();
lines.get(line_number).map(|&line| {
let begin: BytePos = line - self.start_pos;
let begin = begin.to_usize();
// We can't use `lines.get(line_number+1)` because we might
// be parsing when we call this function and thus the current
// line is the last one we have line info for.
let slice = &src[begin..];
match slice.find('\n') {
Some(e) => &slice[..e],
None => slice
}
})
/// Add externally loaded source.
/// If the hash of the input doesn't match or no input is supplied via None,
/// it is interpreted as an error and the corresponding enum variant is set.
/// The return value signifies whether some kind of source is present.
pub fn add_external_src(&self, src: Option<String>) -> bool {
if *self.external_src.borrow() == ExternalSource::AbsentOk {
let mut external_src = self.external_src.borrow_mut();
if let Some(src) = src {
let mut hasher: StableHasher<u128> = StableHasher::new();
hasher.write(src.as_bytes());
if hasher.finish() == self.src_hash {
*external_src = ExternalSource::Present(src);
return true;
}
} else {
*external_src = ExternalSource::AbsentErr;
}
None => None
false
} else {
self.src.is_some() || self.external_src.borrow().get_source().is_some()
}
}
/// Get a line from the list of pre-computed line-beginnings.
/// The line number here is 0-based.
pub fn get_line(&self, line_number: usize) -> Option<Cow<str>> {
fn get_until_newline(src: &str, begin: usize) -> &str {
// We can't use `lines.get(line_number+1)` because we might
// be parsing when we call this function and thus the current
// line is the last one we have line info for.
let slice = &src[begin..];
match slice.find('\n') {
Some(e) => &slice[..e],
None => slice
}
}
let lines = self.lines.borrow();
let line = if let Some(line) = lines.get(line_number) {
line
} else {
return None;
};
let begin: BytePos = *line - self.start_pos;
let begin = begin.to_usize();
if let Some(ref src) = self.src {
Some(Cow::from(get_until_newline(src, begin)))
} else if let Some(src) = self.external_src.borrow().get_source() {
Some(Cow::Owned(String::from(get_until_newline(src, begin))))
} else {
None
}
}
@ -614,6 +720,13 @@ impl FileMap {
}
}
/// Remove utf-8 BOM if any.
fn remove_bom(src: &mut String) {
if src.starts_with("\u{feff}") {
src.drain(..3);
}
}
// _____________________________________________________________________________
// Pos, BytePos, CharPos
//

View file

@ -0,0 +1,11 @@
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
pub const FOO: usize = *&0;

View file

@ -0,0 +1,17 @@
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// aux-build:issue_38875_b.rs
extern crate issue_38875_b;
fn main() {
let test_x = [0; issue_38875_b::FOO];
}

View file

@ -0,0 +1,14 @@
error[E0080]: constant evaluation error
--> $DIR/auxiliary/issue_38875_b.rs:11:24
|
11 | pub const FOO: usize = *&0;
| ^^^ unimplemented constant expression: deref operation
|
note: for repeat count here
--> $DIR/issue_38875.rs:16:22
|
16 | let test_x = [0; issue_38875_b::FOO];
| ^^^^^^^^^^^^^^^^^^
error: aborting due to previous error(s)

View file

@ -6,6 +6,11 @@ error[E0599]: no method named `f` found for type `{integer}` in the current scop
|
= note: found the following associated functions; to be used as methods, functions must have a `self` parameter
note: candidate #1 is defined in the trait `issue_41652_b::Tr`
--> $DIR/auxiliary/issue_41652_b.rs:14:5
|
14 | / fn f()
15 | | where Self: Sized;
| |__________________________^
= help: to disambiguate the method call, write `issue_41652_b::Tr::f(3)` instead
error: aborting due to previous error(s)