Serialize modules into ThinBuffer after initial optimization

Instead of keeping all modules in memory until thin LTO and only serializing them then, serialize the module immediately after it finishes optimizing.
2018-12-04 16:24:20 +01:00 · 2018-12-04 16:24:20 +01:00 · 8128d0d1a9
commit 8128d0d1a9
parent bc2db43b9e
4 changed files with 64 additions and 49 deletions
--- a/src/librustc_codegen_llvm/back/lto.rs
+++ b/src/librustc_codegen_llvm/back/lto.rs
@ -159,7 +159,7 @@ pub(crate) fn run_fat(cgcx: &CodegenContext<LlvmCodegenBackend>,
 /// lists, one of the modules that need optimization and another for modules that
 /// can simply be copied over from the incr. comp. cache.
 pub(crate) fn run_thin(cgcx: &CodegenContext<LlvmCodegenBackend>,
-                       modules: Vec<ModuleCodegen<ModuleLlvm>>,
+                       modules: Vec<(String, ThinBuffer)>,
                       cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
                       timeline: &mut Timeline)
    -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
@ -182,6 +182,31 @@ pub(crate) fn run_thin(cgcx: &CodegenContext<LlvmCodegenBackend>,
             timeline)
 }

+pub(crate) fn prepare_thin(
+    cgcx: &CodegenContext<LlvmCodegenBackend>,
+    module: ModuleCodegen<ModuleLlvm>
+) -> (String, ThinBuffer) {
+    let name = module.name.clone();
+    let buffer = ThinBuffer::new(module.module_llvm.llmod());
+
+    // We emit the module after having serialized it into a ThinBuffer
+    // because only then it will contain the ThinLTO module summary.
+    if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
+        if cgcx.config(module.kind).emit_pre_thin_lto_bc {
+            let path = incr_comp_session_dir
+                .join(pre_lto_bitcode_filename(&name));
+
+            fs::write(&path, buffer.data()).unwrap_or_else(|e| {
+                panic!("Error writing pre-lto-bitcode file `{}`: {}",
+                       path.display(),
+                       e);
+            });
+        }
+    }
+
+    (name, buffer)
+}
+
 fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
           diag_handler: &Handler,
           mut modules: Vec<ModuleCodegen<ModuleLlvm>>,
@ -341,7 +366,7 @@ impl Drop for Linker<'a> {
 /// they all go out of scope.
 fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
            diag_handler: &Handler,
-            modules: Vec<ModuleCodegen<ModuleLlvm>>,
+            modules: Vec<(String, ThinBuffer)>,
            serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
            cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
            symbol_white_list: &[*const libc::c_char],
@ -361,41 +386,17 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
        let mut module_names = Vec::with_capacity(full_scope_len);
        let mut thin_modules = Vec::with_capacity(full_scope_len);

-        // FIXME: right now, like with fat LTO, we serialize all in-memory
-        //        modules before working with them and ThinLTO. We really
-        //        shouldn't do this, however, and instead figure out how to
-        //        extract a summary from an in-memory module and then merge that
-        //        into the global index. It turns out that this loop is by far
-        //        the most expensive portion of this small bit of global
-        //        analysis!
-        for (i, module) in modules.into_iter().enumerate() {
-            info!("local module: {} - {}", i, module.name);
-            let name = CString::new(module.name.clone()).unwrap();
-            let buffer = ThinBuffer::new(module.module_llvm.llmod());
-
-            // We emit the module after having serialized it into a ThinBuffer
-            // because only then it will contain the ThinLTO module summary.
-            if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
-                if cgcx.config(module.kind).emit_pre_thin_lto_bc {
-                    let path = incr_comp_session_dir
-                        .join(pre_lto_bitcode_filename(&module.name));
-
-                    fs::write(&path, buffer.data()).unwrap_or_else(|e| {
-                        panic!("Error writing pre-lto-bitcode file `{}`: {}",
-                               path.display(),
-                               e);
-                    });
-                }
-            }
-
+        for (i, (name, buffer)) in modules.into_iter().enumerate() {
+            info!("local module: {} - {}", i, name);
+            let cname = CString::new(name.clone()).unwrap();
            thin_modules.push(llvm::ThinLTOModule {
-                identifier: name.as_ptr(),
+                identifier: cname.as_ptr(),
                data: buffer.data().as_ptr(),
                len: buffer.data().len(),
            });
            thin_buffers.push(buffer);
-            module_names.push(name);
-            timeline.record(&module.name);
+            module_names.push(cname);
+            timeline.record(&name);
        }

        // FIXME: All upstream crates are deserialized internally in the
--- a/src/librustc_codegen_llvm/lib.rs
+++ b/src/librustc_codegen_llvm/lib.rs
@ -185,7 +185,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
    }
    fn run_thin_lto(
        cgcx: &CodegenContext<Self>,
-        modules: Vec<ModuleCodegen<Self::Module>>,
+        modules: Vec<(String, Self::ThinBuffer)>,
        cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
        timeline: &mut Timeline
    ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
@ -216,6 +216,12 @@ impl WriteBackendMethods for LlvmCodegenBackend {
    ) -> Result<CompiledModule, FatalError> {
        back::write::codegen(cgcx, diag_handler, module, config, timeline)
    }
+    fn prepare_thin(
+        cgcx: &CodegenContext<Self>,
+        module: ModuleCodegen<Self::Module>
+    ) -> (String, Self::ThinBuffer) {
+        back::lto::prepare_thin(cgcx, module)
+    }
    fn run_lto_pass_manager(
        cgcx: &CodegenContext<Self>,
        module: &ModuleCodegen<Self::Module>,
--- a/src/librustc_codegen_ssa/back/write.rs
+++ b/src/librustc_codegen_ssa/back/write.rs
@ -253,7 +253,7 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
 fn generate_lto_work<B: ExtraBackendMethods>(
    cgcx: &CodegenContext<B>,
    needs_fat_lto: Vec<ModuleCodegen<B::Module>>,
-    needs_thin_lto: Vec<ModuleCodegen<B::Module>>,
+    needs_thin_lto: Vec<(String, B::ThinBuffer)>,
    import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>
 ) -> Vec<(WorkItem<B>, u64)> {
    let mut timeline = cgcx.time_graph.as_ref().map(|tg| {
@ -678,17 +678,17 @@ impl<B: WriteBackendMethods> WorkItem<B> {
    }
 }

-enum WorkItemResult<M> {
+enum WorkItemResult<B: WriteBackendMethods> {
    Compiled(CompiledModule),
-    NeedsFatLTO(ModuleCodegen<M>),
-    NeedsThinLTO(ModuleCodegen<M>),
+    NeedsFatLTO(ModuleCodegen<B::Module>),
+    NeedsThinLTO(String, B::ThinBuffer),
 }

 fn execute_work_item<B: ExtraBackendMethods>(
    cgcx: &CodegenContext<B>,
    work_item: WorkItem<B>,
    timeline: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
    let module_config = cgcx.config(work_item.module_kind());

    match work_item {
@ -716,7 +716,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
    module: ModuleCodegen<B::Module>,
    module_config: &ModuleConfig,
    timeline: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
    let diag_handler = cgcx.create_diag_handler();

    unsafe {
@ -772,7 +772,10 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
            };
            WorkItemResult::Compiled(module)
        }
-        ComputedLtoType::Thin => WorkItemResult::NeedsThinLTO(module),
+        ComputedLtoType::Thin => {
+            let (name, thin_buffer) = B::prepare_thin(cgcx, module);
+            WorkItemResult::NeedsThinLTO(name, thin_buffer)
+        }
        ComputedLtoType::Fat => WorkItemResult::NeedsFatLTO(module),
    })
 }
@ -782,7 +785,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
    module: CachedModuleCodegen,
    module_config: &ModuleConfig,
    _: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
    let incr_comp_session_dir = cgcx.incr_comp_session_dir
                                    .as_ref()
                                    .unwrap();
@ -844,7 +847,7 @@ fn execute_lto_work_item<B: ExtraBackendMethods>(
    mut module: lto::LtoModuleCodegen<B>,
    module_config: &ModuleConfig,
    timeline: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
    let diag_handler = cgcx.create_diag_handler();

    unsafe {
@ -861,7 +864,8 @@ pub enum Message<B: WriteBackendMethods> {
        worker_id: usize,
    },
    NeedsThinLTO {
-        result: ModuleCodegen<B::Module>,
+        name: String,
+        thin_buffer: B::ThinBuffer,
        worker_id: usize,
    },
    Done {
@ -1423,10 +1427,10 @@ fn start_executing_work<B: ExtraBackendMethods>(
                    free_worker(worker_id);
                    needs_fat_lto.push(result);
                }
-                Message::NeedsThinLTO { result, worker_id } => {
+                Message::NeedsThinLTO { name, thin_buffer, worker_id } => {
                    assert!(!started_lto);
                    free_worker(worker_id);
-                    needs_thin_lto.push(result);
+                    needs_thin_lto.push((name, thin_buffer));
                }
                Message::AddImportOnlyModule { module_data, work_product } => {
                    assert!(!started_lto);
@ -1514,7 +1518,7 @@ fn spawn_work<B: ExtraBackendMethods>(
        // we exit.
        struct Bomb<B: ExtraBackendMethods> {
            coordinator_send: Sender<Box<dyn Any + Send>>,
-            result: Option<WorkItemResult<B::Module>>,
+            result: Option<WorkItemResult<B>>,
            worker_id: usize,
        }
        impl<B: ExtraBackendMethods> Drop for Bomb<B> {
@ -1527,8 +1531,8 @@ fn spawn_work<B: ExtraBackendMethods>(
                    Some(WorkItemResult::NeedsFatLTO(m)) => {
                        Message::NeedsFatLTO::<B> { result: m, worker_id }
                    }
-                    Some(WorkItemResult::NeedsThinLTO(m)) => {
-                        Message::NeedsThinLTO::<B> { result: m, worker_id }
+                    Some(WorkItemResult::NeedsThinLTO(name, thin_buffer)) => {
+                        Message::NeedsThinLTO::<B> { name, thin_buffer, worker_id }
                    }
                    None => Message::Done::<B> { result: Err(()), worker_id }
                };
--- a/src/librustc_codegen_ssa/traits/write.rs
+++ b/src/librustc_codegen_ssa/traits/write.rs
@ -36,7 +36,7 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
    /// can simply be copied over from the incr. comp. cache.
    fn run_thin_lto(
        cgcx: &CodegenContext<Self>,
-        modules: Vec<ModuleCodegen<Self::Module>>,
+        modules: Vec<(String, Self::ThinBuffer)>,
        cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
        timeline: &mut Timeline,
    ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError>;
@ -60,6 +60,10 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
        config: &ModuleConfig,
        timeline: &mut Timeline,
    ) -> Result<CompiledModule, FatalError>;
+    fn prepare_thin(
+        cgcx: &CodegenContext<Self>,
+        module: ModuleCodegen<Self::Module>
+    ) -> (String, Self::ThinBuffer);
    fn run_lto_pass_manager(
        cgcx: &CodegenContext<Self>,
        llmod: &ModuleCodegen<Self::Module>,