Auto merge of #47269 - michaelwoerister:mangled-cgu-names, r=alexcrichton

Shorten names of some compiler generated artifacts.

This PR makes the compiler mangle codegen unit names by default. The name of every codegen unit name will now be a random string of 16 characters. It also makes the file extensions of some intermediate compiler products shorter. Hopefully, these changes will reduce the pressure on tools with path length restrictions like buildbot. The change should also solve problems with case-insensitive file system.

cc #47186 and #47222

r? @alexcrichton
This commit is contained in:
bors 2018-01-09 16:04:21 +00:00
commit 61452e506f
10 changed files with 70 additions and 28 deletions

View file

@ -12,9 +12,11 @@ use syntax::ast::NodeId;
use syntax::symbol::InternedString; use syntax::symbol::InternedString;
use ty::Instance; use ty::Instance;
use util::nodemap::FxHashMap; use util::nodemap::FxHashMap;
use rustc_data_structures::base_n;
use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult, use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
StableHasher}; StableHasher};
use ich::{Fingerprint, StableHashingContext, NodeIdHashingMode}; use ich::{Fingerprint, StableHashingContext, NodeIdHashingMode};
use std::hash::Hash;
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)] #[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)]
pub enum MonoItem<'tcx> { pub enum MonoItem<'tcx> {
@ -119,6 +121,16 @@ impl<'tcx> CodegenUnit<'tcx> {
{ {
&mut self.items &mut self.items
} }
pub fn mangle_name(human_readable_name: &str) -> String {
// We generate a 80 bit hash from the name. This should be enough to
// avoid collisions and is still reasonably short for filenames.
let mut hasher = StableHasher::new();
human_readable_name.hash(&mut hasher);
let hash: u128 = hasher.finish();
let hash = hash & ((1u128 << 80) - 1);
base_n::encode(hash, base_n::CASE_INSENSITIVE)
}
} }
impl<'tcx> HashStable<StableHashingContext<'tcx>> for CodegenUnit<'tcx> { impl<'tcx> HashStable<StableHashingContext<'tcx>> for CodegenUnit<'tcx> {

View file

@ -1234,6 +1234,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
"rewrite operators on i128 and u128 into lang item calls (typically provided \ "rewrite operators on i128 and u128 into lang item calls (typically provided \
by compiler-builtins) so translation doesn't need to support them, by compiler-builtins) so translation doesn't need to support them,
overriding the default for the current target"), overriding the default for the current target"),
human_readable_cgu_names: bool = (false, parse_bool, [TRACKED],
"generate human-readable, predictable names for codegen units"),
} }
pub fn default_lib_output() -> CrateType { pub fn default_lib_output() -> CrateType {

View file

@ -13,18 +13,21 @@
use std::str; use std::str;
pub const MAX_BASE: u64 = 64; pub const MAX_BASE: usize = 64;
pub const ALPHANUMERIC_ONLY: u64 = 62; pub const ALPHANUMERIC_ONLY: usize = 62;
pub const CASE_INSENSITIVE: usize = 36;
const BASE_64: &'static [u8; MAX_BASE as usize] = const BASE_64: &'static [u8; MAX_BASE as usize] =
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$"; b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
#[inline] #[inline]
pub fn push_str(mut n: u64, base: u64, output: &mut String) { pub fn push_str(mut n: u128, base: usize, output: &mut String) {
debug_assert!(base >= 2 && base <= MAX_BASE); debug_assert!(base >= 2 && base <= MAX_BASE);
let mut s = [0u8; 64]; let mut s = [0u8; 128];
let mut index = 0; let mut index = 0;
let base = base as u128;
loop { loop {
s[index] = BASE_64[(n % base) as usize]; s[index] = BASE_64[(n % base) as usize];
index += 1; index += 1;
@ -39,16 +42,16 @@ pub fn push_str(mut n: u64, base: u64, output: &mut String) {
} }
#[inline] #[inline]
pub fn encode(n: u64, base: u64) -> String { pub fn encode(n: u128, base: usize) -> String {
let mut s = String::with_capacity(13); let mut s = String::new();
push_str(n, base, &mut s); push_str(n, base, &mut s);
s s
} }
#[test] #[test]
fn test_encode() { fn test_encode() {
fn test(n: u64, base: u64) { fn test(n: u128, base: usize) {
assert_eq!(Ok(n), u64::from_str_radix(&encode(n, base), base as u32)); assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
} }
for base in 2..37 { for base in 2..37 {
@ -57,7 +60,8 @@ fn test_encode() {
test(35, base); test(35, base);
test(36, base); test(36, base);
test(37, base); test(37, base);
test(u64::max_value(), base); test(u64::max_value() as u128, base);
test(u128::max_value(), base);
for i in 0 .. 1_000 { for i in 0 .. 1_000 {
test(i * 983, base); test(i * 983, base);

View file

@ -137,7 +137,7 @@ const QUERY_CACHE_FILENAME: &'static str = "query-cache.bin";
// or hexadecimal numbers (we want short file and directory names). Since these // or hexadecimal numbers (we want short file and directory names). Since these
// numbers will be used in file names, we choose an encoding that is not // numbers will be used in file names, we choose an encoding that is not
// case-sensitive (as opposed to base64, for example). // case-sensitive (as opposed to base64, for example).
const INT_ENCODE_BASE: u64 = 36; const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE;
pub fn dep_graph_path(sess: &Session) -> PathBuf { pub fn dep_graph_path(sess: &Session) -> PathBuf {
in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME) in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
@ -357,7 +357,7 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) {
let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]); let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]);
// Append the svh // Append the svh
base_n::push_str(svh.as_u64(), INT_ENCODE_BASE, &mut new_sub_dir_name); base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name);
// Create the full path // Create the full path
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name); let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
@ -465,7 +465,7 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
let directory_name = format!("s-{}-{}-working", let directory_name = format!("s-{}-{}-working",
timestamp, timestamp,
base_n::encode(random_number as u64, base_n::encode(random_number as u128,
INT_ENCODE_BASE)); INT_ENCODE_BASE));
debug!("generate_session_dir_path: directory_name = {}", directory_name); debug!("generate_session_dir_path: directory_name = {}", directory_name);
let directory_path = crate_dir.join(directory_name); let directory_path = crate_dir.join(directory_name);
@ -599,7 +599,7 @@ fn timestamp_to_string(timestamp: SystemTime) -> String {
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap(); let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
let micros = duration.as_secs() * 1_000_000 + let micros = duration.as_secs() * 1_000_000 +
(duration.subsec_nanos() as u64) / 1000; (duration.subsec_nanos() as u64) / 1000;
base_n::encode(micros, INT_ENCODE_BASE) base_n::encode(micros as u128, INT_ENCODE_BASE)
} }
fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> { fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
@ -626,7 +626,8 @@ fn crate_path(sess: &Session,
// The full crate disambiguator is really long. 64 bits of it should be // The full crate disambiguator is really long. 64 bits of it should be
// sufficient. // sufficient.
let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash(); let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash();
let crate_disambiguator = base_n::encode(crate_disambiguator, INT_ENCODE_BASE); let crate_disambiguator = base_n::encode(crate_disambiguator as u128,
INT_ENCODE_BASE);
let crate_name = format!("{}-{}", crate_name, crate_disambiguator); let crate_name = format!("{}-{}", crate_name, crate_disambiguator);
incr_dir.join(crate_name) incr_dir.join(crate_name)

View file

@ -35,9 +35,9 @@ pub fn save_trans_partition(sess: &Session,
let extension = match kind { let extension = match kind {
WorkProductFileKind::Object => "o", WorkProductFileKind::Object => "o",
WorkProductFileKind::Bytecode => "bc", WorkProductFileKind::Bytecode => "bc",
WorkProductFileKind::BytecodeCompressed => "bc-compressed", WorkProductFileKind::BytecodeCompressed => "bc.z",
}; };
let file_name = format!("cgu-{}.{}", cgu_name, extension); let file_name = format!("{}.{}", cgu_name, extension);
let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name); let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name);
match link_or_copy(path, &path_in_incr_dir) { match link_or_copy(path, &path_in_incr_dir) {
Ok(_) => Some((kind, file_name)), Ok(_) => Some((kind, file_name)),

View file

@ -200,7 +200,16 @@ impl<'tcx> CodegenUnitExt<'tcx> for CodegenUnit<'tcx> {
} }
// Anything we can't find a proper codegen unit for goes into this. // Anything we can't find a proper codegen unit for goes into this.
const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit"; fn fallback_cgu_name(tcx: TyCtxt) -> InternedString {
const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit";
if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str()
} else {
Symbol::intern(&CodegenUnit::mangle_name(FALLBACK_CODEGEN_UNIT)).as_str()
}
}
pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>, pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
trans_items: I, trans_items: I,
@ -297,7 +306,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
let codegen_unit_name = match characteristic_def_id { let codegen_unit_name = match characteristic_def_id {
Some(def_id) => compute_codegen_unit_name(tcx, def_id, is_volatile), Some(def_id) => compute_codegen_unit_name(tcx, def_id, is_volatile),
None => Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(), None => fallback_cgu_name(tcx),
}; };
let make_codegen_unit = || { let make_codegen_unit = || {
@ -381,7 +390,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// always ensure we have at least one CGU; otherwise, if we have a // always ensure we have at least one CGU; otherwise, if we have a
// crate with just types (for example), we could wind up with no CGU // crate with just types (for example), we could wind up with no CGU
if codegen_units.is_empty() { if codegen_units.is_empty() {
let codegen_unit_name = Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(); let codegen_unit_name = fallback_cgu_name(tcx);
codegen_units.insert(codegen_unit_name.clone(), codegen_units.insert(codegen_unit_name.clone(),
CodegenUnit::new(codegen_unit_name.clone())); CodegenUnit::new(codegen_unit_name.clone()));
} }
@ -630,10 +639,10 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// Unfortunately we cannot just use the `ty::item_path` infrastructure here // Unfortunately we cannot just use the `ty::item_path` infrastructure here
// because we need paths to modules and the DefIds of those are not // because we need paths to modules and the DefIds of those are not
// available anymore for external items. // available anymore for external items.
let mut mod_path = String::with_capacity(64); let mut cgu_name = String::with_capacity(64);
let def_path = tcx.def_path(def_id); let def_path = tcx.def_path(def_id);
mod_path.push_str(&tcx.crate_name(def_path.krate).as_str()); cgu_name.push_str(&tcx.crate_name(def_path.krate).as_str());
for part in tcx.def_path(def_id) for part in tcx.def_path(def_id)
.data .data
@ -644,15 +653,21 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
_ => false, _ => false,
} }
}) { }) {
mod_path.push_str("-"); cgu_name.push_str("-");
mod_path.push_str(&part.data.as_interned_str()); cgu_name.push_str(&part.data.as_interned_str());
} }
if volatile { if volatile {
mod_path.push_str(".volatile"); cgu_name.push_str(".volatile");
} }
return Symbol::intern(&mod_path[..]).as_str(); let cgu_name = if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
cgu_name
} else {
CodegenUnit::mangle_name(&cgu_name)
};
Symbol::intern(&cgu_name[..]).as_str()
} }
fn numbered_codegen_unit_name(crate_name: &str, index: usize) -> InternedString { fn numbered_codegen_unit_name(crate_name: &str, index: usize) -> InternedString {

View file

@ -28,8 +28,10 @@
//! perturb the reuse results. //! perturb the reuse results.
use rustc::dep_graph::{DepNode, DepConstructor}; use rustc::dep_graph::{DepNode, DepConstructor};
use rustc::mir::mono::CodegenUnit;
use rustc::ty::TyCtxt; use rustc::ty::TyCtxt;
use syntax::ast; use syntax::ast;
use syntax_pos::symbol::Symbol;
use rustc::ich::{ATTR_PARTITION_REUSED, ATTR_PARTITION_TRANSLATED}; use rustc::ich::{ATTR_PARTITION_REUSED, ATTR_PARTITION_TRANSLATED};
const MODULE: &'static str = "module"; const MODULE: &'static str = "module";
@ -71,9 +73,11 @@ impl<'a, 'tcx> AssertModuleSource<'a, 'tcx> {
} }
let mname = self.field(attr, MODULE); let mname = self.field(attr, MODULE);
let mangled_cgu_name = CodegenUnit::mangle_name(&mname.as_str());
let mangled_cgu_name = Symbol::intern(&mangled_cgu_name).as_str();
let dep_node = DepNode::new(self.tcx, let dep_node = DepNode::new(self.tcx,
DepConstructor::CompileCodegenUnit(mname.as_str())); DepConstructor::CompileCodegenUnit(mangled_cgu_name));
if let Some(loaded_from_cache) = self.tcx.dep_graph.was_loaded_from_cache(&dep_node) { if let Some(loaded_from_cache) = self.tcx.dep_graph.was_loaded_from_cache(&dep_node) {
match (disposition, loaded_from_cache) { match (disposition, loaded_from_cache) {

View file

@ -47,7 +47,7 @@ pub const RLIB_BYTECODE_OBJECT_MAGIC: &'static [u8] = b"RUST_OBJECT";
// The version number this compiler will write to bytecode objects in rlibs // The version number this compiler will write to bytecode objects in rlibs
pub const RLIB_BYTECODE_OBJECT_VERSION: u8 = 2; pub const RLIB_BYTECODE_OBJECT_VERSION: u8 = 2;
pub const RLIB_BYTECODE_EXTENSION: &str = "bytecode.encoded"; pub const RLIB_BYTECODE_EXTENSION: &str = "bc.z";
pub fn encode(identifier: &str, bytecode: &[u8]) -> Vec<u8> { pub fn encode(identifier: &str, bytecode: &[u8]) -> Vec<u8> {
let mut encoded = Vec::new(); let mut encoded = Vec::new();

View file

@ -572,7 +572,7 @@ impl<'b, 'tcx> CrateContext<'b, 'tcx> {
let mut name = String::with_capacity(prefix.len() + 6); let mut name = String::with_capacity(prefix.len() + 6);
name.push_str(prefix); name.push_str(prefix);
name.push_str("."); name.push_str(".");
base_n::push_str(idx as u64, base_n::ALPHANUMERIC_ONLY, &mut name); base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
name name
} }

View file

@ -1520,6 +1520,10 @@ impl<'test> TestCx<'test> {
rustc.args(&["-Z", "incremental-queries"]); rustc.args(&["-Z", "incremental-queries"]);
} }
if self.config.mode == CodegenUnits {
rustc.args(&["-Z", "human_readable_cgu_names"]);
}
match self.config.mode { match self.config.mode {
CompileFail | ParseFail | Incremental => { CompileFail | ParseFail | Incremental => {
// If we are extracting and matching errors in the new // If we are extracting and matching errors in the new