From ed938f08a949cb487f7513d4f22f92355a40d2f3 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 25 Aug 2017 20:16:51 -0700 Subject: [PATCH] rustc: Attempt to handle super long linker invocations This commit adds logic to the compiler to attempt to handle super long linker invocations by falling back to the `@`-file syntax if the invoked command is too large. Each OS has a limit on how many arguments and how large the arguments can be when spawning a new process, and linkers tend to be one of those programs that can hit the limit! The logic implemented here is to unconditionally attempt to spawn a linker and then if it fails to spawn with an error from the OS that indicates the command line is too big we attempt a fallback. The fallback is roughly the same for all linkers where an argument pointing to a file, prepended with `@`, is passed. This file then contains all the various arguments that we want to pass to the linker. Closes #41190 --- src/librustc_trans/back/command.rs | 114 ++++++++++++++++++ src/librustc_trans/back/link.rs | 111 ++++++++++++++++- src/librustc_trans/back/linker.rs | 2 +- src/librustc_trans/lib.rs | 1 + .../long-linker-command-lines/Makefile | 5 + .../run-make/long-linker-command-lines/foo.rs | 88 ++++++++++++++ src/test/run-make/tools.mk | 1 + 7 files changed, 316 insertions(+), 6 deletions(-) create mode 100644 src/librustc_trans/back/command.rs create mode 100644 src/test/run-make/long-linker-command-lines/Makefile create mode 100644 src/test/run-make/long-linker-command-lines/foo.rs diff --git a/src/librustc_trans/back/command.rs b/src/librustc_trans/back/command.rs new file mode 100644 index 00000000000..ea68e3b28b6 --- /dev/null +++ b/src/librustc_trans/back/command.rs @@ -0,0 +1,114 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! A thin wrapper around `Command` in the standard library which allows us to +//! read the arguments that are built up. + +use std::ffi::{OsStr, OsString}; +use std::fmt; +use std::io; +use std::process::{self, Output, Child}; + +pub struct Command { + program: OsString, + args: Vec, + env: Vec<(OsString, OsString)>, +} + +impl Command { + pub fn new>(program: P) -> Command { + Command::_new(program.as_ref()) + } + + fn _new(program: &OsStr) -> Command { + Command { + program: program.to_owned(), + args: Vec::new(), + env: Vec::new(), + } + } + + pub fn arg>(&mut self, arg: P) -> &mut Command { + self._arg(arg.as_ref()); + self + } + + pub fn args(&mut self, args: I) -> &mut Command + where I: IntoIterator, + I::Item: AsRef, + { + for arg in args { + self._arg(arg.as_ref()); + } + self + } + + fn _arg(&mut self, arg: &OsStr) { + self.args.push(arg.to_owned()); + } + + pub fn env(&mut self, key: K, value: V) -> &mut Command + where K: AsRef, + V: AsRef + { + self._env(key.as_ref(), value.as_ref()); + self + } + + pub fn envs(&mut self, envs: I) -> &mut Command + where I: IntoIterator, + K: AsRef, + V: AsRef + { + for (key, value) in envs { + self._env(key.as_ref(), value.as_ref()); + } + self + } + + fn _env(&mut self, key: &OsStr, value: &OsStr) { + self.env.push((key.to_owned(), value.to_owned())); + } + + pub fn output(&mut self) -> io::Result { + self.command().output() + } + + pub fn spawn(&mut self) -> io::Result { + self.command().spawn() + } + + pub fn command(&self) -> process::Command { + let mut ret = process::Command::new(&self.program); + ret.args(&self.args); + ret.envs(self.env.clone()); + return ret + } + + // extensions + + pub fn get_program(&self) -> &OsStr { + &self.program + } + + pub fn get_args(&self) -> &[OsString] { + &self.args + } + + pub fn get_env(&self) -> &[(OsString, OsString)] { + &self.env + } +} + +impl fmt::Debug for Command { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.command().fmt(f) + } +} diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs index 4b56376ad9b..ff95acf5d98 100644 --- a/src/librustc_trans/back/link.rs +++ b/src/librustc_trans/back/link.rs @@ -12,6 +12,7 @@ extern crate rustc_trans_utils; use super::archive::{ArchiveBuilder, ArchiveConfig}; use super::linker::Linker; +use super::command::Command; use super::rpath::RPathConfig; use super::rpath; use metadata::METADATA_FILENAME; @@ -38,11 +39,12 @@ use std::ascii; use std::char; use std::env; use std::ffi::OsString; -use std::fs; -use std::io::{self, Read, Write}; +use std::fmt; +use std::fs::{self, File}; +use std::io::{self, Read, Write, BufWriter}; use std::mem; use std::path::{Path, PathBuf}; -use std::process::Command; +use std::process::{Output, Stdio}; use std::str; use flate2::Compression; use flate2::write::DeflateEncoder; @@ -125,8 +127,13 @@ pub fn msvc_link_exe_cmd(sess: &Session) -> (Command, Vec<(OsString, OsString)>) let tool = windows_registry::find_tool(target, "link.exe"); if let Some(tool) = tool { + let mut cmd = Command::new(tool.path()); + cmd.args(tool.args()); + for &(ref k, ref v) in tool.env() { + cmd.env(k, v); + } let envs = tool.env().to_vec(); - (tool.to_command(), envs) + (cmd, envs) } else { debug!("Failed to locate linker."); (Command::new("link.exe"), vec![]) @@ -797,7 +804,9 @@ fn link_natively(sess: &Session, let mut i = 0; loop { i += 1; - prog = time(sess.time_passes(), "running linker", || cmd.output()); + prog = time(sess.time_passes(), "running linker", || { + exec_linker(sess, &mut cmd, tmpdir) + }); if !retry_on_segfault || i > 3 { break } @@ -875,6 +884,98 @@ fn link_natively(sess: &Session, } } +fn exec_linker(sess: &Session, cmd: &mut Command, tmpdir: &Path) + -> io::Result +{ + // When attempting to spawn the linker we run a risk of blowing out the + // size limits for spawning a new process with respect to the arguments + // we pass on the command line. + // + // Here we attempt to handle errors from the OS saying "your list of + // arguments is too big" by reinvoking the linker again with an `@`-file + // that contains all the arguments. The theory is that this is then + // accepted on all linkers and the linker will read all its options out of + // there instead of looking at the command line. + match cmd.command().stdout(Stdio::piped()).stderr(Stdio::piped()).spawn() { + Ok(child) => return child.wait_with_output(), + Err(ref e) if command_line_too_big(e) => {} + Err(e) => return Err(e) + } + + let file = tmpdir.join("linker-arguments"); + let mut cmd2 = Command::new(cmd.get_program()); + cmd2.arg(format!("@{}", file.display())); + for &(ref k, ref v) in cmd.get_env() { + cmd2.env(k, v); + } + let mut f = BufWriter::new(File::create(&file)?); + for arg in cmd.get_args() { + writeln!(f, "{}", Escape { + arg: arg.to_str().unwrap(), + is_like_msvc: sess.target.target.options.is_like_msvc, + })?; + } + f.into_inner()?; + return cmd2.output(); + + #[cfg(unix)] + fn command_line_too_big(err: &io::Error) -> bool { + err.raw_os_error() == Some(::libc::E2BIG) + } + + #[cfg(windows)] + fn command_line_too_big(err: &io::Error) -> bool { + const ERROR_FILENAME_EXCED_RANGE: i32 = 206; + err.raw_os_error() == Some(ERROR_FILENAME_EXCED_RANGE) + } + + struct Escape<'a> { + arg: &'a str, + is_like_msvc: bool, + } + + impl<'a> fmt::Display for Escape<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_like_msvc { + // This is "documented" at + // https://msdn.microsoft.com/en-us/library/4xdcbak7.aspx + // + // Unfortunately there's not a great specification of the + // syntax I could find online (at least) but some local + // testing showed that this seemed sufficient-ish to catch + // at least a few edge cases. + write!(f, "\"")?; + for c in self.arg.chars() { + match c { + '"' => write!(f, "\\{}", c)?, + c => write!(f, "{}", c)?, + } + } + write!(f, "\"")?; + } else { + // This is documented at https://linux.die.net/man/1/ld, namely: + // + // > Options in file are separated by whitespace. A whitespace + // > character may be included in an option by surrounding the + // > entire option in either single or double quotes. Any + // > character (including a backslash) may be included by + // > prefixing the character to be included with a backslash. + // + // We put an argument on each line, so all we need to do is + // ensure the line is interpreted as one whole argument. + for c in self.arg.chars() { + match c { + '\\' | + ' ' => write!(f, "\\{}", c)?, + c => write!(f, "{}", c)?, + } + } + } + Ok(()) + } + } +} + fn link_args(cmd: &mut Linker, sess: &Session, crate_type: config::CrateType, diff --git a/src/librustc_trans/back/linker.rs b/src/librustc_trans/back/linker.rs index 9b0a5e3f4a5..487d9e05945 100644 --- a/src/librustc_trans/back/linker.rs +++ b/src/librustc_trans/back/linker.rs @@ -14,11 +14,11 @@ use std::fs::{self, File}; use std::io::prelude::*; use std::io::{self, BufWriter}; use std::path::{Path, PathBuf}; -use std::process::Command; use context::SharedCrateContext; use back::archive; +use back::command::Command; use back::symbol_export::ExportedSymbols; use rustc::middle::dependency_format::Linkage; use rustc::hir::def_id::{LOCAL_CRATE, CrateNum}; diff --git a/src/librustc_trans/lib.rs b/src/librustc_trans/lib.rs index 1758e331129..1bb9ce432df 100644 --- a/src/librustc_trans/lib.rs +++ b/src/librustc_trans/lib.rs @@ -68,6 +68,7 @@ pub use llvm_util::{init, target_features, print_version, print_passes, print, e pub mod back { mod archive; + mod command; pub(crate) mod linker; pub mod link; mod lto; diff --git a/src/test/run-make/long-linker-command-lines/Makefile b/src/test/run-make/long-linker-command-lines/Makefile new file mode 100644 index 00000000000..309a27fe503 --- /dev/null +++ b/src/test/run-make/long-linker-command-lines/Makefile @@ -0,0 +1,5 @@ +-include ../tools.mk + +all: + $(RUSTC) foo.rs -g + RUSTC="$(RUSTC_ORIGINAL)" $(call RUN,foo) diff --git a/src/test/run-make/long-linker-command-lines/foo.rs b/src/test/run-make/long-linker-command-lines/foo.rs new file mode 100644 index 00000000000..e6fd6b65366 --- /dev/null +++ b/src/test/run-make/long-linker-command-lines/foo.rs @@ -0,0 +1,88 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This is a test which attempts to blow out the system limit with how many +// arguments can be passed to a process. This'll successively call rustc with +// larger and larger argument lists in an attempt to find one that's way too +// big for the system at hand. This file itself is then used as a "linker" to +// detect when the process creation succeeds. +// +// Eventually we should see an argument that looks like `@` as we switch from +// passing literal arguments to passing everything in the file. + +use std::env; +use std::fs::{self, File}; +use std::io::{BufWriter, Write, Read}; +use std::path::PathBuf; +use std::process::Command; + +fn main() { + let tmpdir = PathBuf::from(env::var_os("TMPDIR").unwrap()); + let ok = tmpdir.join("ok"); + if env::var("YOU_ARE_A_LINKER").is_ok() { + if let Some(file) = env::args().find(|a| a.contains("@")) { + fs::copy(&file[1..], &ok).unwrap(); + } + return + } + + let rustc = env::var_os("RUSTC").unwrap_or("rustc".into()); + let me_as_linker = format!("linker={}", env::current_exe().unwrap().display()); + for i in (1..).map(|i| i * 100) { + println!("attempt: {}", i); + let file = tmpdir.join("bar.rs"); + let mut f = BufWriter::new(File::create(&file).unwrap()); + let mut lib_name = String::new(); + for _ in 0..i { + lib_name.push_str("foo"); + } + for j in 0..i { + writeln!(f, "#[link(name = \"{}{}\")]", lib_name, j).unwrap(); + } + writeln!(f, "extern {{}}\nfn main() {{}}").unwrap(); + f.into_inner().unwrap(); + + drop(fs::remove_file(&ok)); + let output = Command::new(&rustc) + .arg(&file) + .arg("-C").arg(&me_as_linker) + .arg("--out-dir").arg(&tmpdir) + .env("YOU_ARE_A_LINKER", "1") + .output() + .unwrap(); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + panic!("status: {}\nstdout:\n{}\nstderr:\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + stderr.lines().map(|l| { + if l.len() > 200 { + format!("{}...\n", &l[..200]) + } else { + format!("{}\n", l) + } + }).collect::()); + } + + if !ok.exists() { + continue + } + + let mut contents = String::new(); + File::open(&ok).unwrap().read_to_string(&mut contents).unwrap(); + + for j in 0..i { + assert!(contents.contains(&format!("{}{}", lib_name, j))); + } + + break + } +} diff --git a/src/test/run-make/tools.mk b/src/test/run-make/tools.mk index d13ba11e96a..27f235d54d4 100644 --- a/src/test/run-make/tools.mk +++ b/src/test/run-make/tools.mk @@ -5,6 +5,7 @@ HOST_RPATH_ENV = \ TARGET_RPATH_ENV = \ $(LD_LIB_PATH_ENVVAR)="$(TMPDIR):$(TARGET_RPATH_DIR):$($(LD_LIB_PATH_ENVVAR))" +RUSTC_ORIGINAL := $(RUSTC) BARE_RUSTC := $(HOST_RPATH_ENV) '$(RUSTC)' RUSTC := $(BARE_RUSTC) --out-dir $(TMPDIR) -L $(TMPDIR) $(RUSTFLAGS) #CC := $(CC) -L $(TMPDIR)