From f20ceb1c6fd7fa29a91677be548d6f8ca6c4b172 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sat, 6 Aug 2022 22:33:06 +0200 Subject: [PATCH 1/8] Encode index of SourceFile along with span. --- compiler/rustc_metadata/src/rmeta/decoder.rs | 33 +++---- compiler/rustc_metadata/src/rmeta/encoder.rs | 96 ++++++++++---------- compiler/rustc_span/src/lib.rs | 2 + compiler/rustc_span/src/source_map.rs | 2 + compiler/rustc_span/src/source_map/tests.rs | 1 + 5 files changed, 65 insertions(+), 69 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 40dc4fb052d..444b5884187 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -527,6 +527,9 @@ impl<'a, 'tcx> Decodable> for Span { bug!("Cannot decode Span without Session.") }; + // Index of the file in the corresponding crate's list of encoded files. + let metadata_index = usize::decode(decoder); + // There are two possibilities here: // 1. This is a 'local span', which is located inside a `SourceFile` // that came from this crate. In this case, we use the source map data @@ -587,27 +590,9 @@ impl<'a, 'tcx> Decodable> for Span { foreign_data.imported_source_files(sess) }; - let source_file = { - // Optimize for the case that most spans within a translated item - // originate from the same source_file. - let last_source_file = &imported_source_files[decoder.last_source_file_index]; - - if lo >= last_source_file.original_start_pos && lo <= last_source_file.original_end_pos - { - last_source_file - } else { - let index = imported_source_files - .binary_search_by_key(&lo, |source_file| source_file.original_start_pos) - .unwrap_or_else(|index| index - 1); - - // Don't try to cache the index for foreign spans, - // as this would require a map from CrateNums to indices - if tag == TAG_VALID_SPAN_LOCAL { - decoder.last_source_file_index = index; - } - &imported_source_files[index] - } - }; + // Optimize for the case that most spans within a translated item + // originate from the same source_file. + let source_file = &imported_source_files[metadata_index]; // Make sure our binary search above is correct. debug_assert!( @@ -1545,7 +1530,8 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { let external_source_map = self.root.source_map.decode(self); external_source_map - .map(|source_file_to_import| { + .enumerate() + .map(|(source_file_index, source_file_to_import)| { // We can't reuse an existing SourceFile, so allocate a new one // containing the information we need. let rustc_span::SourceFile { @@ -1605,6 +1591,9 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { normalized_pos, start_pos, end_pos, + source_file_index + .try_into() + .expect("cannot import more than U32_MAX files"), ); debug!( "CrateMetaData::imported_source_files alloc \ diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 33278367ce3..de78ab41f05 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -17,7 +17,6 @@ use rustc_hir::definitions::DefPathData; use rustc_hir::intravisit::{self, Visitor}; use rustc_hir::lang_items; use rustc_hir::{AnonConst, GenericParamKind}; -use rustc_index::bit_set::GrowableBitSet; use rustc_middle::hir::nested_filter; use rustc_middle::middle::dependency_format::Linkage; use rustc_middle::middle::exported_symbols::{ @@ -66,13 +65,10 @@ pub(super) struct EncodeContext<'a, 'tcx> { // The indices (into the `SourceMap`'s `MonotonicVec`) // of all of the `SourceFiles` that we need to serialize. // When we serialize a `Span`, we insert the index of its - // `SourceFile` into the `GrowableBitSet`. - // - // This needs to be a `GrowableBitSet` and not a - // regular `BitSet` because we may actually import new `SourceFiles` - // during metadata encoding, due to executing a query - // with a result containing a foreign `Span`. - required_source_files: Option>, + // `SourceFile` into the `FxIndexSet`. + // The order inside the `FxIndexSet` is used as on-disk + // order of `SourceFiles`, and encoded inside `Span`s. + required_source_files: Option>, is_proc_macro: bool, hygiene_ctxt: &'a HygieneEncodeContext, } @@ -245,10 +241,6 @@ impl<'a, 'tcx> Encodable> for Span { return TAG_PARTIAL_SPAN.encode(s); } - let source_files = s.required_source_files.as_mut().expect("Already encoded SourceMap!"); - // Record the fact that we need to encode the data for this `SourceFile` - source_files.insert(s.source_file_cache.1); - // There are two possible cases here: // 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the // crate we are writing metadata for. When the metadata for *this* crate gets @@ -265,30 +257,38 @@ impl<'a, 'tcx> Encodable> for Span { // if we're a proc-macro crate. // This allows us to avoid loading the dependencies of proc-macro crates: all of // the information we need to decode `Span`s is stored in the proc-macro crate. - let (tag, lo, hi) = if s.source_file_cache.0.is_imported() && !s.is_proc_macro { - // To simplify deserialization, we 'rebase' this span onto the crate it originally came from - // (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values - // are relative to the source map information for the 'foreign' crate whose CrateNum - // we write into the metadata. This allows `imported_source_files` to binary - // search through the 'foreign' crate's source map information, using the - // deserialized 'lo' and 'hi' values directly. - // - // All of this logic ensures that the final result of deserialization is a 'normal' - // Span that can be used without any additional trouble. - let external_start_pos = { - // Introduce a new scope so that we drop the 'lock()' temporary - match &*s.source_file_cache.0.external_src.lock() { - ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos, - src => panic!("Unexpected external source {:?}", src), - } - }; - let lo = (span.lo - s.source_file_cache.0.start_pos) + external_start_pos; - let hi = (span.hi - s.source_file_cache.0.start_pos) + external_start_pos; + let (tag, lo, hi, metadata_index) = + if s.source_file_cache.0.is_imported() && !s.is_proc_macro { + // To simplify deserialization, we 'rebase' this span onto the crate it originally came from + // (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values + // are relative to the source map information for the 'foreign' crate whose CrateNum + // we write into the metadata. This allows `imported_source_files` to binary + // search through the 'foreign' crate's source map information, using the + // deserialized 'lo' and 'hi' values directly. + // + // All of this logic ensures that the final result of deserialization is a 'normal' + // Span that can be used without any additional trouble. + let (external_start_pos, metadata_index) = { + // Introduce a new scope so that we drop the 'lock()' temporary + match &*s.source_file_cache.0.external_src.lock() { + ExternalSource::Foreign { original_start_pos, metadata_index, .. } => { + (*original_start_pos, *metadata_index as usize) + } + src => panic!("Unexpected external source {:?}", src), + } + }; + let lo = (span.lo - s.source_file_cache.0.start_pos) + external_start_pos; + let hi = (span.hi - s.source_file_cache.0.start_pos) + external_start_pos; - (TAG_VALID_SPAN_FOREIGN, lo, hi) - } else { - (TAG_VALID_SPAN_LOCAL, span.lo, span.hi) - }; + (TAG_VALID_SPAN_FOREIGN, lo, hi, metadata_index) + } else { + // Record the fact that we need to encode the data for this `SourceFile` + let source_files = + s.required_source_files.as_mut().expect("Already encoded SourceMap!"); + let (source_file_index, _) = source_files.insert_full(s.source_file_cache.1); + + (TAG_VALID_SPAN_LOCAL, span.lo, span.hi, source_file_index) + }; tag.encode(s); lo.encode(s); @@ -298,6 +298,9 @@ impl<'a, 'tcx> Encodable> for Span { let len = hi - lo; len.encode(s); + // Encode the index of the `SourceFile` for the span, in order to make decoding faster. + metadata_index.encode(s); + if tag == TAG_VALID_SPAN_FOREIGN { // This needs to be two lines to avoid holding the `s.source_file_cache` // while calling `cnum.encode(s)` @@ -460,18 +463,17 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let working_directory = &self.tcx.sess.opts.working_dir; - let adapted = all_source_files + // Only serialize `SourceFile`s that were used during the encoding of a `Span`. + // + // The order in which we encode source files is important here: the on-disk format for + // `Span` contains the index of the corresponding `SourceFile`. + let adapted = required_source_files .iter() - .enumerate() - .filter(|(idx, source_file)| { - // Only serialize `SourceFile`s that were used - // during the encoding of a `Span` - required_source_files.contains(*idx) && - // Don't serialize imported `SourceFile`s, unless - // we're in a proc-macro crate. - (!source_file.is_imported() || self.is_proc_macro) - }) - .map(|(_, source_file)| { + .map(|&source_file_index| &all_source_files[source_file_index]) + .map(|source_file| { + // Don't serialize imported `SourceFile`s, unless we're in a proc-macro crate. + assert!(!source_file.is_imported() || self.is_proc_macro); + // At export time we expand all source file paths to absolute paths because // downstream compilation sessions can have a different compiler working // directory, so relative paths from this or any other upstream crate @@ -2228,7 +2230,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) { let source_map_files = tcx.sess.source_map().files(); let source_file_cache = (source_map_files[0].clone(), 0); - let required_source_files = Some(GrowableBitSet::with_capacity(source_map_files.len())); + let required_source_files = Some(FxIndexSet::default()); drop(source_map_files); let hygiene_ctxt = HygieneEncodeContext::default(); diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index cf306928151..bae05f2f02e 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -1098,6 +1098,8 @@ pub enum ExternalSource { original_start_pos: BytePos, /// The end of this SourceFile within the source_map of its original crate. original_end_pos: BytePos, + /// Index of the file inside metadata. + metadata_index: u32, }, } diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 28381157d50..387777f7af6 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -337,6 +337,7 @@ impl SourceMap { mut file_local_normalized_pos: Vec, original_start_pos: BytePos, original_end_pos: BytePos, + metadata_index: u32, ) -> Lrc { let start_pos = self .allocate_address_space(source_len) @@ -383,6 +384,7 @@ impl SourceMap { kind: ExternalSourceKind::AbsentOk, original_start_pos, original_end_pos, + metadata_index, }), start_pos, end_pos, diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index be827cea874..2cada019b7f 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -252,6 +252,7 @@ fn t10() { normalized_pos, start_pos, end_pos, + 0, ); assert!( From a09e9c99a4c58ab6208aa44c4061df9aa2e40197 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sat, 6 Aug 2022 23:00:49 +0200 Subject: [PATCH 2/8] Decode SourceFile out of order. --- compiler/rustc_metadata/src/rmeta/decoder.rs | 179 +++++++++--------- .../src/rmeta/decoder/cstore_impl.rs | 5 +- compiler/rustc_metadata/src/rmeta/encoder.rs | 105 +++++----- compiler/rustc_metadata/src/rmeta/mod.rs | 2 +- 4 files changed, 148 insertions(+), 143 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 444b5884187..29acacbf3b3 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -37,6 +37,7 @@ use rustc_span::symbol::{sym, Ident, Symbol}; use rustc_span::{self, BytePos, ExpnId, Pos, Span, SyntaxContext, DUMMY_SP}; use proc_macro::bridge::client::ProcMacro; +use std::cell::RefCell; use std::io; use std::iter::TrustedLen; use std::mem; @@ -99,7 +100,7 @@ pub(crate) struct CrateMetadata { /// Proc macro descriptions for this crate, if it's a proc macro crate. raw_proc_macros: Option<&'static [ProcMacro]>, /// Source maps for code from the crate. - source_map_import_info: OnceCell>, + source_map_import_info: RefCell>>, /// For every definition in this crate, maps its `DefPathHash` to its `DefIndex`. def_path_hash_map: DefPathHashMapRef<'static>, /// Likewise for ExpnHash. @@ -143,7 +144,8 @@ pub(crate) struct CrateMetadata { } /// Holds information about a rustc_span::SourceFile imported from another crate. -/// See `imported_source_files()` for more information. +/// See `imported_source_file()` for more information. +#[derive(Clone)] struct ImportedSourceFile { /// This SourceFile's byte-offset within the source_map of its original crate original_start_pos: rustc_span::BytePos, @@ -528,7 +530,7 @@ impl<'a, 'tcx> Decodable> for Span { }; // Index of the file in the corresponding crate's list of encoded files. - let metadata_index = usize::decode(decoder); + let metadata_index = u32::decode(decoder); // There are two possibilities here: // 1. This is a 'local span', which is located inside a `SourceFile` @@ -556,10 +558,10 @@ impl<'a, 'tcx> Decodable> for Span { // to be based on the *foreign* crate (e.g. crate C), not the crate // we are writing metadata for (e.g. crate B). This allows us to // treat the 'local' and 'foreign' cases almost identically during deserialization: - // we can call `imported_source_files` for the proper crate, and binary search + // we can call `imported_source_file` for the proper crate, and binary search // through the returned slice using our span. - let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL { - decoder.cdata().imported_source_files(sess) + let source_file = if tag == TAG_VALID_SPAN_LOCAL { + decoder.cdata().imported_source_file(metadata_index, sess) } else { // When we encode a proc-macro crate, all `Span`s should be encoded // with `TAG_VALID_SPAN_LOCAL` @@ -587,13 +589,9 @@ impl<'a, 'tcx> Decodable> for Span { decoder.last_source_file_index = 0; let foreign_data = decoder.cdata().cstore.get_crate_data(cnum); - foreign_data.imported_source_files(sess) + foreign_data.imported_source_file(metadata_index, sess) }; - // Optimize for the case that most spans within a translated item - // originate from the same source_file. - let source_file = &imported_source_files[metadata_index]; - // Make sure our binary search above is correct. debug_assert!( lo >= source_file.original_start_pos && lo <= source_file.original_end_pos, @@ -1438,7 +1436,7 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { /// /// Proc macro crates don't currently export spans, so this function does not have /// to work for them. - fn imported_source_files(self, sess: &Session) -> &'a [ImportedSourceFile] { + fn imported_source_file(self, source_file_index: u32, sess: &Session) -> ImportedSourceFile { fn filter<'a>(sess: &Session, path: Option<&'a Path>) -> Option<&'a Path> { path.filter(|_| { // Only spend time on further checks if we have what to translate *to*. @@ -1526,94 +1524,97 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { } }; - self.cdata.source_map_import_info.get_or_init(|| { - let external_source_map = self.root.source_map.decode(self); + let mut import_info = self.cdata.source_map_import_info.borrow_mut(); + for _ in import_info.len()..=(source_file_index as usize) { + import_info.push(None); + } + import_info[source_file_index as usize] + .get_or_insert_with(|| { + let source_file_to_import = self + .root + .source_map + .get(self, source_file_index) + .expect("missing source file") + .decode(self); - external_source_map - .enumerate() - .map(|(source_file_index, source_file_to_import)| { - // We can't reuse an existing SourceFile, so allocate a new one - // containing the information we need. - let rustc_span::SourceFile { - mut name, - src_hash, - start_pos, - end_pos, - lines, - multibyte_chars, - non_narrow_chars, - normalized_pos, - name_hash, - .. - } = source_file_to_import; + // We can't reuse an existing SourceFile, so allocate a new one + // containing the information we need. + let rustc_span::SourceFile { + mut name, + src_hash, + start_pos, + end_pos, + lines, + multibyte_chars, + non_narrow_chars, + normalized_pos, + name_hash, + .. + } = source_file_to_import; - // If this file is under $sysroot/lib/rustlib/src/ but has not been remapped - // during rust bootstrapping by `remap-debuginfo = true`, and the user - // wish to simulate that behaviour by -Z simulate-remapped-rust-src-base, - // then we change `name` to a similar state as if the rust was bootstrapped - // with `remap-debuginfo = true`. - // This is useful for testing so that tests about the effects of - // `try_to_translate_virtual_to_real` don't have to worry about how the - // compiler is bootstrapped. - if let Some(virtual_dir) = - &sess.opts.unstable_opts.simulate_remapped_rust_src_base - { - if let Some(real_dir) = &sess.opts.real_rust_source_base_dir { - if let rustc_span::FileName::Real(ref mut old_name) = name { - if let rustc_span::RealFileName::LocalPath(local) = old_name { - if let Ok(rest) = local.strip_prefix(real_dir) { - *old_name = rustc_span::RealFileName::Remapped { - local_path: None, - virtual_name: virtual_dir.join(rest), - }; - } + // If this file is under $sysroot/lib/rustlib/src/ but has not been remapped + // during rust bootstrapping by `remap-debuginfo = true`, and the user + // wish to simulate that behaviour by -Z simulate-remapped-rust-src-base, + // then we change `name` to a similar state as if the rust was bootstrapped + // with `remap-debuginfo = true`. + // This is useful for testing so that tests about the effects of + // `try_to_translate_virtual_to_real` don't have to worry about how the + // compiler is bootstrapped. + if let Some(virtual_dir) = &sess.opts.unstable_opts.simulate_remapped_rust_src_base + { + if let Some(real_dir) = &sess.opts.real_rust_source_base_dir { + if let rustc_span::FileName::Real(ref mut old_name) = name { + if let rustc_span::RealFileName::LocalPath(local) = old_name { + if let Ok(rest) = local.strip_prefix(real_dir) { + *old_name = rustc_span::RealFileName::Remapped { + local_path: None, + virtual_name: virtual_dir.join(rest), + }; } } } } + } - // If this file's path has been remapped to `/rustc/$hash`, - // we might be able to reverse that (also see comments above, - // on `try_to_translate_virtual_to_real`). - try_to_translate_virtual_to_real(&mut name); + // If this file's path has been remapped to `/rustc/$hash`, + // we might be able to reverse that (also see comments above, + // on `try_to_translate_virtual_to_real`). + try_to_translate_virtual_to_real(&mut name); - let source_length = (end_pos - start_pos).to_usize(); + let source_length = (end_pos - start_pos).to_usize(); - let local_version = sess.source_map().new_imported_source_file( - name, - src_hash, - name_hash, - source_length, - self.cnum, - lines, - multibyte_chars, - non_narrow_chars, - normalized_pos, - start_pos, - end_pos, - source_file_index - .try_into() - .expect("cannot import more than U32_MAX files"), - ); - debug!( - "CrateMetaData::imported_source_files alloc \ + let local_version = sess.source_map().new_imported_source_file( + name, + src_hash, + name_hash, + source_length, + self.cnum, + lines, + multibyte_chars, + non_narrow_chars, + normalized_pos, + start_pos, + end_pos, + source_file_index, + ); + debug!( + "CrateMetaData::imported_source_files alloc \ source_file {:?} original (start_pos {:?} end_pos {:?}) \ translated (start_pos {:?} end_pos {:?})", - local_version.name, - start_pos, - end_pos, - local_version.start_pos, - local_version.end_pos - ); + local_version.name, + start_pos, + end_pos, + local_version.start_pos, + local_version.end_pos + ); - ImportedSourceFile { - original_start_pos: start_pos, - original_end_pos: end_pos, - translated_source_file: local_version, - } - }) - .collect() - }) + ImportedSourceFile { + original_start_pos: start_pos, + original_end_pos: end_pos, + translated_source_file: local_version, + } + }) + .clone() } fn get_generator_diagnostic_data( @@ -1676,7 +1677,7 @@ impl CrateMetadata { trait_impls, incoherent_impls: Default::default(), raw_proc_macros, - source_map_import_info: OnceCell::new(), + source_map_import_info: RefCell::new(Vec::new()), def_path_hash_map, expn_hash_map: Default::default(), alloc_decoding_state, diff --git a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs index 38ce50e8323..8b4220d4492 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs @@ -675,6 +675,9 @@ impl CrateStore for CStore { } fn import_source_files(&self, sess: &Session, cnum: CrateNum) { - self.get_crate_data(cnum).imported_source_files(sess); + let cdata = self.get_crate_data(cnum); + for file_index in 0..cdata.root.source_map.size() { + cdata.imported_source_file(file_index as u32, sess); + } } } diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index de78ab41f05..576875f08c2 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -272,7 +272,7 @@ impl<'a, 'tcx> Encodable> for Span { // Introduce a new scope so that we drop the 'lock()' temporary match &*s.source_file_cache.0.external_src.lock() { ExternalSource::Foreign { original_start_pos, metadata_index, .. } => { - (*original_start_pos, *metadata_index as usize) + (*original_start_pos, *metadata_index) } src => panic!("Unexpected external source {:?}", src), } @@ -286,6 +286,8 @@ impl<'a, 'tcx> Encodable> for Span { let source_files = s.required_source_files.as_mut().expect("Already encoded SourceMap!"); let (source_file_index, _) = source_files.insert_full(s.source_file_cache.1); + let source_file_index: u32 = + source_file_index.try_into().expect("cannot export more than U32_MAX files"); (TAG_VALID_SPAN_LOCAL, span.lo, span.hi, source_file_index) }; @@ -452,7 +454,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { self.lazy(DefPathHashMapRef::BorrowedFromTcx(self.tcx.def_path_hash_to_def_index_map())) } - fn encode_source_map(&mut self) -> LazyArray { + fn encode_source_map(&mut self) -> LazyTable> { let source_map = self.tcx.sess.source_map(); let all_source_files = source_map.files(); @@ -463,65 +465,64 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let working_directory = &self.tcx.sess.opts.working_dir; + let mut adapted = TableBuilder::default(); + // Only serialize `SourceFile`s that were used during the encoding of a `Span`. // // The order in which we encode source files is important here: the on-disk format for // `Span` contains the index of the corresponding `SourceFile`. - let adapted = required_source_files - .iter() - .map(|&source_file_index| &all_source_files[source_file_index]) - .map(|source_file| { - // Don't serialize imported `SourceFile`s, unless we're in a proc-macro crate. - assert!(!source_file.is_imported() || self.is_proc_macro); + for (on_disk_index, &source_file_index) in required_source_files.iter().enumerate() { + let source_file = &all_source_files[source_file_index]; + // Don't serialize imported `SourceFile`s, unless we're in a proc-macro crate. + assert!(!source_file.is_imported() || self.is_proc_macro); - // At export time we expand all source file paths to absolute paths because - // downstream compilation sessions can have a different compiler working - // directory, so relative paths from this or any other upstream crate - // won't be valid anymore. - // - // At this point we also erase the actual on-disk path and only keep - // the remapped version -- as is necessary for reproducible builds. - match source_file.name { - FileName::Real(ref original_file_name) => { - let adapted_file_name = - source_map.path_mapping().to_embeddable_absolute_path( - original_file_name.clone(), - working_directory, - ); + // At export time we expand all source file paths to absolute paths because + // downstream compilation sessions can have a different compiler working + // directory, so relative paths from this or any other upstream crate + // won't be valid anymore. + // + // At this point we also erase the actual on-disk path and only keep + // the remapped version -- as is necessary for reproducible builds. + let mut source_file = match source_file.name { + FileName::Real(ref original_file_name) => { + let adapted_file_name = source_map + .path_mapping() + .to_embeddable_absolute_path(original_file_name.clone(), working_directory); - if adapted_file_name != *original_file_name { - let mut adapted: SourceFile = (**source_file).clone(); - adapted.name = FileName::Real(adapted_file_name); - adapted.name_hash = { - let mut hasher: StableHasher = StableHasher::new(); - adapted.name.hash(&mut hasher); - hasher.finish::() - }; - Lrc::new(adapted) - } else { - // Nothing to adapt - source_file.clone() - } + if adapted_file_name != *original_file_name { + let mut adapted: SourceFile = (**source_file).clone(); + adapted.name = FileName::Real(adapted_file_name); + adapted.name_hash = { + let mut hasher: StableHasher = StableHasher::new(); + adapted.name.hash(&mut hasher); + hasher.finish::() + }; + Lrc::new(adapted) + } else { + // Nothing to adapt + source_file.clone() } - // expanded code, not from a file - _ => source_file.clone(), } - }) - .map(|mut source_file| { - // We're serializing this `SourceFile` into our crate metadata, - // so mark it as coming from this crate. - // This also ensures that we don't try to deserialize the - // `CrateNum` for a proc-macro dependency - since proc macro - // dependencies aren't loaded when we deserialize a proc-macro, - // trying to remap the `CrateNum` would fail. - if self.is_proc_macro { - Lrc::make_mut(&mut source_file).cnum = LOCAL_CRATE; - } - source_file - }) - .collect::>(); + // expanded code, not from a file + _ => source_file.clone(), + }; - self.lazy_array(adapted.iter().map(|rc| &**rc)) + // We're serializing this `SourceFile` into our crate metadata, + // so mark it as coming from this crate. + // This also ensures that we don't try to deserialize the + // `CrateNum` for a proc-macro dependency - since proc macro + // dependencies aren't loaded when we deserialize a proc-macro, + // trying to remap the `CrateNum` would fail. + if self.is_proc_macro { + Lrc::make_mut(&mut source_file).cnum = LOCAL_CRATE; + } + + let on_disk_index: u32 = + on_disk_index.try_into().expect("cannot export more than U32_MAX files"); + adapted.set(on_disk_index, self.lazy(source_file)); + } + + adapted.encode(&mut self.opaque) } fn encode_crate_root(&mut self) -> LazyValue { diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index 66bdecc30db..7a39f4d4e6b 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -249,7 +249,7 @@ pub(crate) struct CrateRoot { def_path_hash_map: LazyValue>, - source_map: LazyArray, + source_map: LazyTable>, compiler_builtins: bool, needs_allocator: bool, From d74af405eb349922e7aca4ecc510991bb5ae9ab4 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sat, 6 Aug 2022 23:09:15 +0200 Subject: [PATCH 3/8] Remove unused cache. --- compiler/rustc_metadata/src/rmeta/decoder.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 29acacbf3b3..be5c7669b01 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -162,9 +162,6 @@ pub(super) struct DecodeContext<'a, 'tcx> { sess: Option<&'tcx Session>, tcx: Option>, - // Cache the last used source_file for translating spans as an optimization. - last_source_file_index: usize, - lazy_state: LazyState, // Used for decoding interpret::AllocIds in a cached & thread-safe manner. @@ -193,7 +190,6 @@ pub(super) trait Metadata<'a, 'tcx>: Copy { blob: self.blob(), sess: self.sess().or(tcx.map(|tcx| tcx.sess)), tcx, - last_source_file_index: 0, lazy_state: LazyState::NoNode, alloc_decoding_session: self .cdata() @@ -582,12 +578,6 @@ impl<'a, 'tcx> Decodable> for Span { cnum ); - // Decoding 'foreign' spans should be rare enough that it's - // not worth it to maintain a per-CrateNum cache for `last_source_file_index`. - // We just set it to 0, to ensure that we don't try to access something out - // of bounds for our initial 'guess' - decoder.last_source_file_index = 0; - let foreign_data = decoder.cdata().cstore.get_crate_data(cnum); foreign_data.imported_source_file(metadata_index, sess) }; From 7c2d722fb0b9ef5e0a2c3db8a8e35789f6ff8dc2 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sat, 6 Aug 2022 23:13:09 +0200 Subject: [PATCH 4/8] Simplify encoding a bit. --- compiler/rustc_metadata/src/rmeta/encoder.rs | 69 ++++++++++---------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 576875f08c2..463cfece715 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -234,13 +234,17 @@ impl<'a, 'tcx> Encodable> for Span { s.source_file_cache = (source_map.files()[source_file_index].clone(), source_file_index); } + let (ref source_file, source_file_index) = s.source_file_cache; - if !s.source_file_cache.0.contains(span.hi) { + if !source_file.contains(span.hi) { // Unfortunately, macro expansion still sometimes generates Spans // that malformed in this way. return TAG_PARTIAL_SPAN.encode(s); } + // Length is independent of the span provenance. + let len = span.hi - span.lo; + // There are two possible cases here: // 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the // crate we are writing metadata for. When the metadata for *this* crate gets @@ -257,47 +261,44 @@ impl<'a, 'tcx> Encodable> for Span { // if we're a proc-macro crate. // This allows us to avoid loading the dependencies of proc-macro crates: all of // the information we need to decode `Span`s is stored in the proc-macro crate. - let (tag, lo, hi, metadata_index) = - if s.source_file_cache.0.is_imported() && !s.is_proc_macro { - // To simplify deserialization, we 'rebase' this span onto the crate it originally came from - // (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values - // are relative to the source map information for the 'foreign' crate whose CrateNum - // we write into the metadata. This allows `imported_source_files` to binary - // search through the 'foreign' crate's source map information, using the - // deserialized 'lo' and 'hi' values directly. - // - // All of this logic ensures that the final result of deserialization is a 'normal' - // Span that can be used without any additional trouble. - let (external_start_pos, metadata_index) = { - // Introduce a new scope so that we drop the 'lock()' temporary - match &*s.source_file_cache.0.external_src.lock() { - ExternalSource::Foreign { original_start_pos, metadata_index, .. } => { - (*original_start_pos, *metadata_index) - } - src => panic!("Unexpected external source {:?}", src), + let (tag, lo, metadata_index) = if source_file.is_imported() && !s.is_proc_macro { + // To simplify deserialization, we 'rebase' this span onto the crate it originally came from + // (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values + // are relative to the source map information for the 'foreign' crate whose CrateNum + // we write into the metadata. This allows `imported_source_files` to binary + // search through the 'foreign' crate's source map information, using the + // deserialized 'lo' and 'hi' values directly. + // + // All of this logic ensures that the final result of deserialization is a 'normal' + // Span that can be used without any additional trouble. + let (external_start_pos, metadata_index) = { + // Introduce a new scope so that we drop the 'lock()' temporary + match &*source_file.external_src.lock() { + ExternalSource::Foreign { original_start_pos, metadata_index, .. } => { + (*original_start_pos, *metadata_index) } - }; - let lo = (span.lo - s.source_file_cache.0.start_pos) + external_start_pos; - let hi = (span.hi - s.source_file_cache.0.start_pos) + external_start_pos; - - (TAG_VALID_SPAN_FOREIGN, lo, hi, metadata_index) - } else { - // Record the fact that we need to encode the data for this `SourceFile` - let source_files = - s.required_source_files.as_mut().expect("Already encoded SourceMap!"); - let (source_file_index, _) = source_files.insert_full(s.source_file_cache.1); - let source_file_index: u32 = - source_file_index.try_into().expect("cannot export more than U32_MAX files"); - - (TAG_VALID_SPAN_LOCAL, span.lo, span.hi, source_file_index) + src => panic!("Unexpected external source {:?}", src), + } }; + let lo = (span.lo - source_file.start_pos) + external_start_pos; + + (TAG_VALID_SPAN_FOREIGN, lo, metadata_index) + } else { + // Record the fact that we need to encode the data for this `SourceFile` + let source_files = + s.required_source_files.as_mut().expect("Already encoded SourceMap!"); + let (metadata_index, _) = source_files.insert_full(source_file_index); + let metadata_index: u32 = + metadata_index.try_into().expect("cannot export more than U32_MAX files"); + + (TAG_VALID_SPAN_LOCAL, span.lo, metadata_index) + }; tag.encode(s); lo.encode(s); // Encode length which is usually less than span.hi and profits more // from the variable-length integer encoding that we use. - let len = hi - lo; len.encode(s); // Encode the index of the `SourceFile` for the span, in order to make decoding faster. From 448c25599747a777ff23cbb5d419021d6994c5cb Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sat, 6 Aug 2022 23:16:27 +0200 Subject: [PATCH 5/8] Bless ui tests. --- src/test/ui/issues/issue-31173.stderr | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/test/ui/issues/issue-31173.stderr b/src/test/ui/issues/issue-31173.stderr index 68337a715e1..85ad950f389 100644 --- a/src/test/ui/issues/issue-31173.stderr +++ b/src/test/ui/issues/issue-31173.stderr @@ -18,16 +18,16 @@ error[E0599]: the method `collect` exists for struct `Cloned, [closure@$DIR/issue-31173.rs:6:39: 6:43]>>` due to unsatisfied trait bounds | - ::: $SRC_DIR/core/src/iter/adapters/cloned.rs:LL:COL - | -LL | pub struct Cloned { - | -------------------- doesn't satisfy `_: Iterator` - | ::: $SRC_DIR/core/src/iter/adapters/take_while.rs:LL:COL | LL | pub struct TakeWhile { | -------------------------- doesn't satisfy `<_ as Iterator>::Item = &_` | + ::: $SRC_DIR/core/src/iter/adapters/cloned.rs:LL:COL + | +LL | pub struct Cloned { + | -------------------- doesn't satisfy `_: Iterator` + | = note: the following trait bounds were not satisfied: `, [closure@$DIR/issue-31173.rs:6:39: 6:43]> as Iterator>::Item = &_` which is required by `Cloned, [closure@$DIR/issue-31173.rs:6:39: 6:43]>>: Iterator` From 16ba778c12e3303ab4d334c1c3dbfa5173248a80 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sat, 6 Aug 2022 23:27:45 +0200 Subject: [PATCH 6/8] Support parallel compiler. --- compiler/rustc_metadata/src/rmeta/decoder.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index be5c7669b01..a58f6e6e1de 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -37,7 +37,6 @@ use rustc_span::symbol::{sym, Ident, Symbol}; use rustc_span::{self, BytePos, ExpnId, Pos, Span, SyntaxContext, DUMMY_SP}; use proc_macro::bridge::client::ProcMacro; -use std::cell::RefCell; use std::io; use std::iter::TrustedLen; use std::mem; @@ -100,7 +99,7 @@ pub(crate) struct CrateMetadata { /// Proc macro descriptions for this crate, if it's a proc macro crate. raw_proc_macros: Option<&'static [ProcMacro]>, /// Source maps for code from the crate. - source_map_import_info: RefCell>>, + source_map_import_info: Lock>>, /// For every definition in this crate, maps its `DefPathHash` to its `DefIndex`. def_path_hash_map: DefPathHashMapRef<'static>, /// Likewise for ExpnHash. @@ -1514,7 +1513,7 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { } }; - let mut import_info = self.cdata.source_map_import_info.borrow_mut(); + let mut import_info = self.cdata.source_map_import_info.lock(); for _ in import_info.len()..=(source_file_index as usize) { import_info.push(None); } @@ -1667,7 +1666,7 @@ impl CrateMetadata { trait_impls, incoherent_impls: Default::default(), raw_proc_macros, - source_map_import_info: RefCell::new(Vec::new()), + source_map_import_info: Lock::new(Vec::new()), def_path_hash_map, expn_hash_map: Default::default(), alloc_decoding_state, From bacb4db48c7fe109a7d480b3b5b1cd03898db4a6 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sun, 7 Aug 2022 12:27:38 +0200 Subject: [PATCH 7/8] Only encode position from start of file. --- compiler/rustc_metadata/src/rmeta/decoder.rs | 18 ++++++-------- compiler/rustc_metadata/src/rmeta/encoder.rs | 26 ++++++++++---------- compiler/rustc_span/src/lib.rs | 4 --- compiler/rustc_span/src/source_map.rs | 4 +-- 4 files changed, 22 insertions(+), 30 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index a58f6e6e1de..4b2dcc7a70b 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -581,28 +581,26 @@ impl<'a, 'tcx> Decodable> for Span { foreign_data.imported_source_file(metadata_index, sess) }; - // Make sure our binary search above is correct. + // Make sure our span is well-formed. debug_assert!( - lo >= source_file.original_start_pos && lo <= source_file.original_end_pos, - "Bad binary search: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}", + lo + source_file.original_start_pos <= source_file.original_end_pos, + "Malformed encoded span: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}", lo, source_file.original_start_pos, source_file.original_end_pos ); - // Make sure we correctly filtered out invalid spans during encoding + // Make sure we correctly filtered out invalid spans during encoding. debug_assert!( - hi >= source_file.original_start_pos && hi <= source_file.original_end_pos, - "Bad binary search: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}", + hi + source_file.original_start_pos <= source_file.original_end_pos, + "Malformed encoded span: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}", hi, source_file.original_start_pos, source_file.original_end_pos ); - let lo = - (lo + source_file.translated_source_file.start_pos) - source_file.original_start_pos; - let hi = - (hi + source_file.translated_source_file.start_pos) - source_file.original_start_pos; + let lo = lo + source_file.translated_source_file.start_pos; + let hi = hi + source_file.translated_source_file.start_pos; // Do not try to decode parent for foreign spans. Span::new(lo, hi, ctxt, None) diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 463cfece715..6b3118e7152 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -235,6 +235,7 @@ impl<'a, 'tcx> Encodable> for Span { (source_map.files()[source_file_index].clone(), source_file_index); } let (ref source_file, source_file_index) = s.source_file_cache; + debug_assert!(source_file.contains(span.lo)); if !source_file.contains(span.hi) { // Unfortunately, macro expansion still sometimes generates Spans @@ -242,9 +243,6 @@ impl<'a, 'tcx> Encodable> for Span { return TAG_PARTIAL_SPAN.encode(s); } - // Length is independent of the span provenance. - let len = span.hi - span.lo; - // There are two possible cases here: // 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the // crate we are writing metadata for. When the metadata for *this* crate gets @@ -261,7 +259,7 @@ impl<'a, 'tcx> Encodable> for Span { // if we're a proc-macro crate. // This allows us to avoid loading the dependencies of proc-macro crates: all of // the information we need to decode `Span`s is stored in the proc-macro crate. - let (tag, lo, metadata_index) = if source_file.is_imported() && !s.is_proc_macro { + let (tag, metadata_index) = if source_file.is_imported() && !s.is_proc_macro { // To simplify deserialization, we 'rebase' this span onto the crate it originally came from // (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values // are relative to the source map information for the 'foreign' crate whose CrateNum @@ -271,18 +269,15 @@ impl<'a, 'tcx> Encodable> for Span { // // All of this logic ensures that the final result of deserialization is a 'normal' // Span that can be used without any additional trouble. - let (external_start_pos, metadata_index) = { + let metadata_index = { // Introduce a new scope so that we drop the 'lock()' temporary match &*source_file.external_src.lock() { - ExternalSource::Foreign { original_start_pos, metadata_index, .. } => { - (*original_start_pos, *metadata_index) - } + ExternalSource::Foreign { metadata_index, .. } => *metadata_index, src => panic!("Unexpected external source {:?}", src), } }; - let lo = (span.lo - source_file.start_pos) + external_start_pos; - (TAG_VALID_SPAN_FOREIGN, lo, metadata_index) + (TAG_VALID_SPAN_FOREIGN, metadata_index) } else { // Record the fact that we need to encode the data for this `SourceFile` let source_files = @@ -291,14 +286,19 @@ impl<'a, 'tcx> Encodable> for Span { let metadata_index: u32 = metadata_index.try_into().expect("cannot export more than U32_MAX files"); - (TAG_VALID_SPAN_LOCAL, span.lo, metadata_index) + (TAG_VALID_SPAN_LOCAL, metadata_index) }; - tag.encode(s); - lo.encode(s); + // Encode the start position relative to the file start, so we profit more from the + // variable-length integer encoding. + let lo = span.lo - source_file.start_pos; // Encode length which is usually less than span.hi and profits more // from the variable-length integer encoding that we use. + let len = span.hi - span.lo; + + tag.encode(s); + lo.encode(s); len.encode(s); // Encode the index of the `SourceFile` for the span, in order to make decoding faster. diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index bae05f2f02e..8d26cd6bee3 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -1094,10 +1094,6 @@ pub enum ExternalSource { Unneeded, Foreign { kind: ExternalSourceKind, - /// This SourceFile's byte-offset within the source_map of its original crate. - original_start_pos: BytePos, - /// The end of this SourceFile within the source_map of its original crate. - original_end_pos: BytePos, /// Index of the file inside metadata. metadata_index: u32, }, diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 387777f7af6..c0fbb7f2c9f 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -336,7 +336,7 @@ impl SourceMap { mut file_local_non_narrow_chars: Vec, mut file_local_normalized_pos: Vec, original_start_pos: BytePos, - original_end_pos: BytePos, + _original_end_pos: BytePos, metadata_index: u32, ) -> Lrc { let start_pos = self @@ -382,8 +382,6 @@ impl SourceMap { src_hash, external_src: Lock::new(ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, - original_start_pos, - original_end_pos, metadata_index, }), start_pos, From 0d41f9145ce4f884a53fddefe0624060eb22609b Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Mon, 8 Aug 2022 21:12:04 +0200 Subject: [PATCH 8/8] Remove unused parameter. --- compiler/rustc_metadata/src/rmeta/decoder.rs | 1 - compiler/rustc_span/src/source_map.rs | 1 - compiler/rustc_span/src/source_map/tests.rs | 1 - 3 files changed, 3 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 4b2dcc7a70b..2a3693a360f 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -1581,7 +1581,6 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { non_narrow_chars, normalized_pos, start_pos, - end_pos, source_file_index, ); debug!( diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index c0fbb7f2c9f..108b169e306 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -336,7 +336,6 @@ impl SourceMap { mut file_local_non_narrow_chars: Vec, mut file_local_normalized_pos: Vec, original_start_pos: BytePos, - _original_end_pos: BytePos, metadata_index: u32, ) -> Lrc { let start_pos = self diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index 2cada019b7f..3058ec45a64 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -251,7 +251,6 @@ fn t10() { non_narrow_chars, normalized_pos, start_pos, - end_pos, 0, );