Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Seunghoon Lee 2024-04-28 13:00:19 +09:00
commit c2710a88f1
No known key found for this signature in database
GPG key ID: 436E38F4E70BD152
13 changed files with 2955 additions and 232 deletions

View file

@ -1,13 +1,13 @@
name: Rust
on:
push:
branches: ["master"]
branches: [master]
pull_request:
branches: ["master"]
branches: [master]
env:
CARGO_TERM_COLOR: always
ROCM_VERSION: "5.7.3"
jobs:
release:
@ -33,92 +33,77 @@ jobs:
draft: false
prerelease: true
build-linux:
needs: release
runs-on: ubuntu-22.04
permissions:
contents: write
pull-requests: write
repository-projects: write
build_lin:
name: Build and publish (Linux)
runs-on: ubuntu-20.04
steps:
- name: Prepare for build
- uses: jlumbroso/free-disk-space@main
- name: Install ROCm
run: |
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Checkout repository
uses: actions/checkout@v3
- name: Initialize submodule
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
sudo sh -c 'wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null'
sudo sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ env.ROCM_VERSION }} focal main > /etc/apt/sources.list.d/rocm.list'
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends rocm-smi-lib hip-runtime-amd comgr hipblaslt-dev hipfft-dev rocblas-dev rocsolver-dev rocsparse-dev miopen-hip-dev rocm-device-libs
echo 'export PATH="$PATH:/opt/rocm/bin"' | sudo tee /etc/profile.d/rocm.sh
echo '/opt/rocm/lib' | sudo tee /etc/ld.so.conf.d/rocm.conf
sudo ldconfig
- uses: actions/checkout@v4
with:
submodules: true
- uses: Swatinem/rust-cache@v2
- name: Build
# We use tar to unpack .tar.gz we've created because Github actions/upload-artifact
# is broken and will _always_ zip your artifact (even if it is a single file).
# See here: https://github.com/actions/upload-artifact/issues/39
# and here: https://github.com/actions/upload-artifact/issues/109
run: |
git submodule init
git submodule update
- name: Prepare AMD HIP SDK
run: |
sudo apt update
wget https://repo.radeon.com/amdgpu-install/5.7.1/ubuntu/jammy/amdgpu-install_5.7.50701-1_all.deb
sudo apt install ./amdgpu-install_5.7.50701-1_all.deb
sudo apt update
- name: Install AMD HIP SDK
# can fail at dkms. ignore and continue
continue-on-error: true
run: sudo amdgpu-install --usecase=rocm,hip,hiplibsdk
- name: Build for Linux
run: |
cargo xtask --release
- name: Prepare for release
run: |
cd ./target/release
tar -czvf ZLUDA-linux-amd64.tar.gz *.so*
- name: Upload Linux artifact
cargo xtask zip -r
tar -xzf target/release/zluda.tar.gz -C target/release
# https://stackoverflow.com/a/64195658
- name: Set revision hash
run: echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
- name: Upload (artifact)
uses: actions/upload-artifact@v4
with:
name: zluda-linux-${{ env.SHORT_SHA }}
path: target/release/zluda
- name: Upload (release)
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.release.outputs.upload_url }}
asset_path: ./target/release/ZLUDA-linux-amd64.tar.gz
asset_path: target/release/zluda.tar.gz
asset_name: ZLUDA-linux-amd64.tar.gz
asset_content_type: application/gzip
build-windows:
needs: release
runs-on: windows-latest
permissions:
contents: write
pull-requests: write
repository-projects: write
build_win:
name: Build and publish (Windows)
runs-on: windows-2019
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Initialize submodule
- uses: actions/checkout@v4
with:
submodules: true
- uses: Swatinem/rust-cache@v2
- name: Build
run: |
git submodule init
git submodule update
- name: Install AMD HIP SDK
run: |
C:\msys64\usr\bin\wget.exe https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-Win10-Win11-For-HIP.exe -O "amdgpu-install.exe"
.\amdgpu-install.exe -Install -View:1
Start-Sleep -Seconds 60
$setupId = (Get-Process ATISetup).id
Wait-Process -Id $setupId
- name: Build for Windows
run: |
$ROCm_PATH = "C:\Program Files\AMD\ROCm"
$Env:HIP_PATH = "$ROCm_PATH\$(Get-ChildItem -Path $ROCm_PATH -Name)"
$Env:PATH = "$Env:HIP_PATH\bin;$Env:PATH"
$Env:HIP_PATH = "$Env:HIP_PATH\"
cargo xtask --release
- name: Prepare for release
run: |
cd .\target\release
7z a .\ZLUDA-windows-amd64.zip .\*.dll
7z a .\ZLUDA-windows-amd64.zip .\*.exe
- name: Upload Windows artifact
cargo xtask zip -r
Expand-Archive -Path target/release/zluda.zip -DestinationPath target/release
# https://stackoverflow.com/a/74033027
- name: Set revision hash
run: echo "SHORT_SHA=$("${{ github.sha }}".SubString(0, 7))" >> $env:GITHUB_ENV
- name: Upload (artifact)
uses: actions/upload-artifact@v4
with:
name: zluda-windows-${{ env.SHORT_SHA }}
path: target/release/zluda
- name: Upload (release)
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.release.outputs.upload_url }}
asset_path: ./target/release/ZLUDA-windows-amd64.zip
asset_path: target/release/zluda.zip
asset_name: ZLUDA-windows-amd64.zip
asset_content_type: application/zip

1
.gitignore vendored
View file

@ -1,5 +1,4 @@
target/
Cargo.lock
.vscode/
.idea/

2573
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -60,4 +60,4 @@ opt-level = 3
opt-level = 3
[profile.dev.package.xtask]
opt-level = 3
opt-level = 2

View file

@ -1,65 +0,0 @@
[config]
default_to_workspace = false
skip_core_tasks = true
[tasks.build]
run_task = [
{ name = "build-windows", condition = { platforms = ["windows"] } },
{ name = "build-linux", condition = { platforms = ["linux"] } },
]
[tasks.build-windows]
command = "cargo"
args = [
"build",
"-p", "offline_compiler",
"-p", "zluda_blas",
"-p", "zluda_ccl",
"-p", "zluda_dnn",
"-p", "zluda_dump",
"-p", "zluda_inject",
"-p", "zluda_fft",
"-p", "zluda_lib",
"-p", "zluda_ml",
"-p", "zluda_sparse",
"-p", "zluda_rtc",
"-p", "zluda_redirect",
]
[tasks.build-linux]
command = "cargo"
args = [
"build",
"-p", "offline_compiler",
"-p", "zluda_blas",
"-p", "zluda_blaslt",
"-p", "zluda_ccl",
"-p", "zluda_dnn",
"-p", "zluda_dump",
"-p", "zluda_fft",
"-p", "zluda_lib",
"-p", "zluda_ml",
"-p", "zluda_rtc",
"-p", "zluda_sparse",
]
[tasks.build-release]
command = "cargo"
args = [
"build",
"--release",
"-p", "offline_compiler",
"-p", "zluda_blas",
"-p", "zluda_blaslt",
"-p", "zluda_ccl",
"-p", "zluda_dnn",
"-p", "zluda_dump",
"-p", "zluda_fft",
"-p", "zluda_lib",
"-p", "zluda_ml",
"-p", "zluda_rtc",
"-p", "zluda_sparse",
]
[tasks.default]
alias = "build"

View file

@ -92,13 +92,16 @@ If you are dumping original CUDA use:
### Linux
Known bug: when dumping from original CUDA you should remove (or rename) all the files in `<ZLUDA_DIRECTORY>/dump` except `libcuda.so` and `libcuda.so.1`.
Use it like this:
If dumping from ZLUDA use it like this:
```
LD_LIBRARY_PATH="<ZLUDA_DIRECTORY>/dump:$LD_LIBRARY_PATH" <APPLICATION> <APPLICATION_ARGUMENTS>
```
If dumping from NVIDIA CUDA use it like this:
```
LD_LIBRARY_PATH="<ZLUDA_DIRECTORY>/dump_nvidia:$LD_LIBRARY_PATH" <APPLICATION> <APPLICATION_ARGUMENTS>
```
### Result
If all went well you should see lines like this in the console output and in the log file specified by `ZLUDA_DUMP_DIR`:

View file

@ -18,3 +18,4 @@ features = [
[package.metadata.zluda]
debug_only = true
skip_zip = true

View file

@ -12,3 +12,11 @@ cargo_metadata = "=0.17.0"
cargo-platform = "=0.1.5"
serde = "1.0.193"
serde_json = "1.0.108"
time = { version = "=0.3.23", features = ["local-offset"] }
[target.'cfg(windows)'.dependencies]
zip = { version = "0.6.6", features = ["deflate", "time"], default-features = false }
[target.'cfg(unix)'.dependencies]
flate2 = { version = "1.0.28", features = ["cloudflare_zlib"], default-features = false }
tar = "0.4"

View file

@ -1,4 +1,5 @@
use argh::{EarlyExit, FromArgs, TopLevelCommand};
use cargo_metadata::camino::Utf8PathBuf;
use serde::Deserialize;
use std::{
env,
@ -60,7 +61,7 @@ struct BuildCommand {
}
#[derive(FromArgs)]
/// Package build artifacts into an archive (.zip or .tar.gz)
/// Compile ZLUDA and package binaries into an archive (.zip or .tar.gz)
#[argh(subcommand, name = "zip")]
struct ZipCommand {
/// use artifacts from release mode
@ -73,10 +74,15 @@ fn main() -> Result<(), DynError> {
let args: Arguments = argh::from_env();
std::process::exit(match args.command {
Subcommand::Build(BuildCommand { release }) => build(!release)?,
Subcommand::Zip(_) => panic!(),
Subcommand::Zip(ZipCommand { release }) => build_and_zip(!release),
})
}
fn build_and_zip(is_debug: bool) -> i32 {
let workspace = build_impl(is_debug).unwrap();
os::zip(workspace)
}
#[derive(Deserialize)]
struct ZludaMetadata {
zluda: Project,
@ -92,8 +98,6 @@ struct Project {
#[serde(skip_deserializing)]
kind: TargetKind,
#[serde(default)]
top_level: bool,
#[serde(default)]
windows_only: bool,
#[serde(default)]
linux_only: bool,
@ -104,9 +108,13 @@ struct Project {
#[serde(default)]
skip_dump_link: bool,
#[serde(default)]
skip_zip: bool,
#[serde(default)]
linux_names: Vec<String>,
#[serde(default)]
dump_names: Vec<String>,
#[serde(default)]
dump_nvidia_names: Vec<String>,
}
#[derive(Clone, Copy, Default, PartialEq, Debug)]
@ -116,14 +124,56 @@ enum TargetKind {
Cdylib,
}
impl Project {
fn new(json_pkg: cargo_metadata::Package) -> Self {
let mut project = serde_json::from_value::<Option<ZludaMetadata>>(json_pkg.metadata)
.unwrap()
.map_or(Default::default(), |x| x.zluda);
if project != Default::default() {
project.top_level = true;
struct Workspace {
pub cargo: String,
pub project_root: PathBuf,
pub projects: Vec<Project>,
pub target_directory: Utf8PathBuf,
}
impl Workspace {
fn open(is_debug: bool) -> Result<Self, DynError> {
let cargo = env::var("CARGO").unwrap_or_else(|_| "cargo".to_string());
let project_root = Self::project_root()?;
let mut cmd = cargo_metadata::MetadataCommand::new();
cmd.cargo_path(&cargo).current_dir(&project_root).no_deps();
let cargo_metadata = cmd.exec()?;
let projects = cargo_metadata
.packages
.into_iter()
.filter_map(Project::new)
.filter(|p| !p.skip_build(is_debug))
.collect::<Vec<_>>();
let mut target_directory = cargo_metadata.target_directory;
target_directory.push(if is_debug { "debug" } else { "release" });
Ok(Workspace {
cargo,
project_root,
projects,
target_directory,
})
}
fn project_root() -> Result<PathBuf, DynError> {
Ok(Path::new(&env!("CARGO_MANIFEST_DIR"))
.ancestors()
.nth(1)
.ok_or::<DynError>("CARGO_MANIFEST_DIR".into())?
.to_path_buf())
}
fn cargo_command(&self) -> Command {
let mut command = Command::new(&self.cargo);
command.current_dir(&self.project_root);
command
}
}
impl Project {
fn new(json_pkg: cargo_metadata::Package) -> Option<Self> {
let project_metadata =
serde_json::from_value::<Option<ZludaMetadata>>(json_pkg.metadata).unwrap()?;
let mut project = project_metadata.zluda;
project.name = json_pkg.name;
if let Some((target_name, kind)) = json_pkg.targets.into_iter().find_map(|t| {
match t.kind.first().map(std::ops::Deref::deref) {
@ -135,13 +185,10 @@ impl Project {
project.target_name = target_name;
project.kind = kind;
}
project
Some(project)
}
fn skip_build(&self, is_debug: bool) -> bool {
if !self.top_level {
return true;
}
if self.broken {
return true;
}
@ -159,20 +206,18 @@ impl Project {
}
fn build(is_debug: bool) -> Result<i32, DynError> {
let cargo = env::var("CARGO").unwrap_or_else(|_| "cargo".to_string());
let project_root = project_root()?;
let mut cmd = cargo_metadata::MetadataCommand::new();
cmd.cargo_path(&cargo).current_dir(&project_root).no_deps();
let metadata = cmd.exec()?;
let projects = metadata
.packages
.into_iter()
.map(Project::new)
.filter(|p| !p.skip_build(is_debug))
.collect::<Vec<_>>();
let mut command = Command::new(&cargo);
command.current_dir(&project_root).arg("build");
projects.iter().fold(&mut command, |command, proj| {
build_impl(is_debug)?;
Ok(0)
}
fn build_impl(is_debug: bool) -> Result<Workspace, DynError> {
let workspace = Workspace::open(is_debug)?;
let mut command = workspace.cargo_command();
command.arg("build");
workspace
.projects
.iter()
.fold(&mut command, |command, proj| {
command.args(["-p", &proj.name])
});
if !is_debug {
@ -180,46 +225,57 @@ fn build(is_debug: bool) -> Result<i32, DynError> {
}
let build_result = command.status()?.code().unwrap();
if build_result != 0 {
return Ok(build_result);
return Err(format!("{command:?} failed with exit code {build_result}").into());
}
os::create_dump_dir_and_symlinks(is_debug, metadata.target_directory, projects);
Ok(0)
os::create_dump_dir_and_symlinks(&workspace);
Ok(workspace)
}
fn project_root() -> Result<PathBuf, DynError> {
Ok(Path::new(&env!("CARGO_MANIFEST_DIR"))
.ancestors()
.nth(1)
.ok_or::<DynError>("CARGO_MANIFEST_DIR".into())?
.to_path_buf())
}
impl TargetKind {
#[cfg(unix)]
fn prefix(self) -> &'static str {
match self {
TargetKind::Binary => "",
TargetKind::Cdylib => "lib",
}
}
#[cfg(not(unix))]
mod os {
use super::Project;
use cargo_metadata::camino::Utf8PathBuf;
#[cfg(unix)]
fn suffix(self) -> &'static str {
match self {
TargetKind::Binary => "",
TargetKind::Cdylib => ".so",
}
}
// This is 100% intentional, we don't want symlinks on Windows since
// we use completely different scheme for injections here
pub(crate) fn create_dump_dir_and_symlinks(_: bool, _: Utf8PathBuf, _: Vec<Project>) {}
#[cfg(windows)]
fn suffix(self) -> &'static str {
match self {
TargetKind::Binary => ".exe",
TargetKind::Cdylib => ".dll",
}
}
}
#[cfg(unix)]
mod os {
use super::{Project, TargetKind};
use crate::Workspace;
use cargo_metadata::camino::Utf8PathBuf;
use flate2::{write::GzEncoder, Compression};
use std::{
fs::File,
time::{Duration, SystemTime},
};
pub(crate) fn create_dump_dir_and_symlinks(
is_debug: bool,
mut target_directory: Utf8PathBuf,
projects: Vec<Project>,
) {
pub(crate) fn create_dump_dir_and_symlinks(workspace: &Workspace) {
use std::fs;
target_directory.push(if is_debug { "debug" } else { "release" });
let mut dump_dir = target_directory.clone();
let mut dump_dir = workspace.target_directory.clone();
dump_dir.push("dump");
fs::create_dir_all(&dump_dir).unwrap();
for project in projects {
let mut dump_nvidia_dir = dump_dir.clone();
dump_nvidia_dir.set_file_name("dump_nvidia");
fs::create_dir_all(&dump_nvidia_dir).unwrap();
for project in workspace.projects.iter() {
let dst = format!(
"{}{}{}",
project.kind.prefix(),
@ -227,15 +283,18 @@ mod os {
project.kind.suffix()
);
let dump_dst = format!("../{}", dst);
for src_file in project.linux_names {
force_symlink(&dst, &target_directory, &src_file);
for src_file in project.linux_names.iter() {
force_symlink(&dst, &workspace.target_directory, src_file);
if project.skip_dump_link {
continue;
}
force_symlink(&dump_dst, &dump_dir, &src_file);
force_symlink(&dump_dst, &dump_dir, src_file);
}
for src_file in project.dump_names {
force_symlink(&dump_dst, &dump_dir, &src_file);
for src_file in project.dump_names.iter() {
force_symlink(&dump_dst, &dump_dir, src_file);
}
for src_file in project.dump_nvidia_names.iter() {
force_symlink(&dump_dst, &dump_nvidia_dir, src_file);
}
}
}
@ -263,19 +322,128 @@ mod os {
}
}
impl TargetKind {
fn prefix(self) -> &'static str {
match self {
TargetKind::Binary => "",
TargetKind::Cdylib => "lib",
pub fn zip(workspace: Workspace) -> i32 {
let mut target_file = workspace.target_directory.clone();
target_file.push("zluda.tar.gz");
let gz_file = File::create(target_file).unwrap();
let gz = GzEncoder::new(gz_file, Compression::default());
let mut tar = tar::Builder::new(gz);
let time = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or(Duration::ZERO);
for project in workspace.projects {
if project.skip_zip {
continue;
}
let mut src_file = File::open(format!(
"{}/{}{}{}",
&workspace.target_directory,
project.kind.prefix(),
project.target_name,
project.kind.suffix()
))
.unwrap();
let file_name = format!(
"{}{}{}",
project.kind.prefix(),
project.target_name,
project.kind.suffix()
);
tar.append_file(format!("zluda/{file_name}"), &mut src_file)
.unwrap();
for linux_name in project.linux_names.iter() {
let mut header = tar_header_symlink(time);
tar.append_link(&mut header, format!("zluda/{}", linux_name), &file_name)
.unwrap();
if project.skip_dump_link {
continue;
}
let mut header = tar_header_symlink(time);
tar.append_link(
&mut header,
format!("zluda/dump/{}", linux_name),
format!("../{file_name}"),
)
.unwrap();
}
for dump_name in project.dump_names.iter() {
let mut header = tar_header_symlink(time);
tar.append_link(
&mut header,
format!("zluda/dump/{}", dump_name),
format!("../{file_name}"),
)
.unwrap();
}
for dump_name in project.dump_nvidia_names.iter() {
let mut header = tar_header_symlink(time);
tar.append_link(
&mut header,
format!("zluda/dump_nvidia/{}", dump_name),
format!("../{file_name}"),
)
.unwrap();
}
}
tar.finish().unwrap();
0
}
fn suffix(self) -> &'static str {
match self {
TargetKind::Binary => "",
TargetKind::Cdylib => ".so",
}
}
fn tar_header_symlink(time: Duration) -> tar::Header {
let mut header = tar::Header::new_gnu();
header.set_mtime(time.as_secs());
header.set_entry_type(tar::EntryType::Symlink);
header
}
}
#[cfg(windows)]
mod os {
use crate::Workspace;
use std::{convert::TryFrom, fs::File};
// This is 100% intentional, we don't want symlinks on Windows since
// we use a completely different scheme for injections there
pub(crate) fn create_dump_dir_and_symlinks(_: &Workspace) {}
pub(crate) fn zip(workspace: Workspace) -> i32 {
fn get_zip_entry_options(
f: &File,
time_offset: time::UtcOffset,
) -> Option<zip::write::FileOptions> {
let time = f.metadata().unwrap().modified().unwrap();
let time = time::OffsetDateTime::from(time).to_offset(time_offset);
Some(
zip::write::FileOptions::default()
.last_modified_time(zip::DateTime::try_from(time).unwrap()),
)
}
let mut target_file = workspace.target_directory.clone();
target_file.push("zluda.zip");
let zip_archive = File::create(target_file).unwrap();
let mut zip_writer = zip::write::ZipWriter::new(zip_archive);
let time_offset = time::UtcOffset::current_local_offset().unwrap_or(time::UtcOffset::UTC);
for p in workspace.projects {
if p.skip_zip {
continue;
}
let mut src_file = File::open(format!(
"{}/{}{}",
&workspace.target_directory,
p.target_name,
p.kind.suffix()
))
.unwrap();
zip_writer
.start_file(
format!("zluda/{}{}", p.target_name, p.kind.suffix()),
get_zip_entry_options(&src_file, time_offset)
.unwrap_or(zip::write::FileOptions::default()),
)
.unwrap();
std::io::copy(&mut src_file, &mut zip_writer).unwrap();
}
zip_writer.finish().unwrap();
0
}
}

View file

@ -26,3 +26,4 @@ features = [
[package.metadata.zluda]
debug_only = true
windows_only = true
skip_zip = true

View file

@ -44,3 +44,4 @@ rand = "0.8.5"
# Nominally debug_only, but useful for power users
[package.metadata.zluda]
dump_names = ["libcuda.so", "libcuda.so.1"]
dump_nvidia_names = ["libcuda.so", "libcuda.so.1"]

View file

@ -24,11 +24,13 @@ use winapi::um::winbase::{INFINITE, WAIT_FAILED};
static REDIRECT_DLL: &'static str = "zluda_redirect.dll";
static NCCL_DLL: &'static str = "nccl.dll";
static NVRTC_DLL: &'static str = "nvrtc.dll";
static NVRTC_DLL: &'static str = "nvrtc64.dll";
static NVCUDA_DLL: &'static str = "nvcuda.dll";
static NVML_DLL: &'static str = "nvml.dll";
static NVAPI_DLL: &'static str = "nvapi64.dll";
static NVOPTIX_DLL: &'static str = "optix.6.6.0.dll";
static CUBLAS_DLL: &'static str = "cublas64.dll";
static CUSPARSE_DLL: &'static str = "cusparse64.dll";
include!("../../zluda_redirect/src/payload_guid.rs");
@ -39,7 +41,7 @@ struct ProgramArguments {
#[argh(option)]
nccl: Option<PathBuf>,
/// DLL to be injected instead of system nvrtc.dll. If not provided {0}, will use nvrtc.dll from its own directory
/// DLL to be injected instead of system nvrtc64.dll. If not provided, no injection will take place
#[argh(option)]
nvrtc: Option<PathBuf>,
@ -59,6 +61,14 @@ struct ProgramArguments {
#[argh(option)]
nvoptix: Option<PathBuf>,
/// DLL to be injected instead of system cublas64.dll. If not provided, no injection will take place
#[argh(option)]
cublas: Option<PathBuf>,
/// DLL to be injected instead of system cusparse64.dll. If not provided, no injection will take place
#[argh(option)]
cusparse: Option<PathBuf>,
/// executable to be injected with custom CUDA libraries
#[argh(positional)]
exe: String,
@ -76,17 +86,25 @@ pub fn main_impl() -> Result<(), Box<dyn Error>> {
let mut proc_info = unsafe { mem::zeroed::<detours_sys::_PROCESS_INFORMATION>() };
let mut dlls_to_inject = vec![
environment.nccl_path_zero_terminated.as_ptr() as _,
environment.nvrtc_path_zero_terminated.as_ptr() as _,
environment.nvcuda_path_zero_terminated.as_ptr() as _,
environment.nvml_path_zero_terminated.as_ptr() as *const i8,
environment.redirect_path_zero_terminated.as_ptr() as _,
];
if let Some(ref nvrtc) = environment.nvrtc_path_zero_terminated {
dlls_to_inject.push(nvrtc.as_ptr() as _);
}
if let Some(ref nvapi) = environment.nvapi_path_zero_terminated {
dlls_to_inject.push(nvapi.as_ptr() as _);
}
if let Some(ref nvoptix) = environment.nvoptix_path_zero_terminated {
dlls_to_inject.push(nvoptix.as_ptr() as _);
}
if let Some(ref cublas) = environment.cublas_path_zero_terminated {
dlls_to_inject.push(cublas.as_ptr() as _);
}
if let Some(ref cusparse) = environment.cusparse_path_zero_terminated {
dlls_to_inject.push(cusparse.as_ptr() as _);
}
os_call!(
detours_sys::DetourCreateProcessWithDllsW(
ptr::null(),
@ -159,11 +177,13 @@ pub fn main_impl() -> Result<(), Box<dyn Error>> {
struct NormalizedArguments {
nccl_path: PathBuf,
nvrtc_path: PathBuf,
nvrtc_path: Option<PathBuf>,
nvcuda_path: PathBuf,
nvml_path: PathBuf,
nvapi_path: Option<PathBuf>,
nvoptix_path: Option<PathBuf>,
cublas_path: Option<PathBuf>,
cusparse_path: Option<PathBuf>,
redirect_path: PathBuf,
winapi_command_line_zero_terminated: Vec<u16>,
}
@ -173,13 +193,14 @@ impl NormalizedArguments {
let current_exe = env::current_exe()?;
let nccl_path =
Self::get_absolute_path_or_default(&current_exe, prog_args.nccl, NCCL_DLL)?;
let nvrtc_path =
Self::get_absolute_path_or_default(&current_exe, prog_args.nvrtc, NVRTC_DLL)?;
let nvrtc_path = prog_args.nvrtc.map(Self::get_absolute_path).transpose()?;
let nvcuda_path =
Self::get_absolute_path_or_default(&current_exe, prog_args.nvcuda, NVCUDA_DLL)?;
let nvml_path = Self::get_absolute_path_or_default(&current_exe, prog_args.nvml, NVML_DLL)?;
let nvapi_path = prog_args.nvapi.map(Self::get_absolute_path).transpose()?;
let nvoptix_path = prog_args.nvoptix.map(Self::get_absolute_path).transpose()?;
let cublas_path = prog_args.cublas.map(Self::get_absolute_path).transpose()?;
let cusparse_path = prog_args.cusparse.map(Self::get_absolute_path).transpose()?;
let winapi_command_line_zero_terminated =
construct_command_line(std::iter::once(prog_args.exe).chain(prog_args.args));
let mut redirect_path = current_exe.parent().unwrap().to_path_buf();
@ -191,6 +212,8 @@ impl NormalizedArguments {
nvml_path,
nvapi_path,
nvoptix_path,
cublas_path,
cusparse_path,
redirect_path,
winapi_command_line_zero_terminated,
})
@ -245,11 +268,13 @@ impl NormalizedArguments {
struct Environment {
nccl_path_zero_terminated: String,
nvrtc_path_zero_terminated: String,
nvrtc_path_zero_terminated: Option<String>,
nvcuda_path_zero_terminated: String,
nvml_path_zero_terminated: String,
nvapi_path_zero_terminated: Option<String>,
nvoptix_path_zero_terminated: Option<String>,
cublas_path_zero_terminated: Option<String>,
cusparse_path_zero_terminated: Option<String>,
redirect_path_zero_terminated: String,
winapi_command_line_zero_terminated: Vec<u16>,
_temp_dir: TempDir,
@ -266,11 +291,14 @@ impl Environment {
&_temp_dir,
NCCL_DLL,
)?);
let nvrtc_path_zero_terminated = Self::zero_terminate(Self::copy_to_correct_name(
args.nvrtc_path,
&_temp_dir,
NVRTC_DLL,
)?);
let nvrtc_path_zero_terminated = args
.nvrtc_path
.map(|nvrtc| {
Ok::<_, io::Error>(Self::zero_terminate(Self::copy_to_correct_name(
nvrtc, &_temp_dir, NVRTC_DLL,
)?))
})
.transpose()?;
let nvcuda_path_zero_terminated = Self::zero_terminate(Self::copy_to_correct_name(
args.nvcuda_path,
&_temp_dir,
@ -299,6 +327,26 @@ impl Environment {
)?))
})
.transpose()?;
let cublas_path_zero_terminated = args
.cublas_path
.map(|cublas| {
Ok::<_, io::Error>(Self::zero_terminate(Self::copy_to_correct_name(
cublas,
&_temp_dir,
CUBLAS_DLL,
)?))
})
.transpose()?;
let cusparse_path_zero_terminated = args
.cusparse_path
.map(|cusparse| {
Ok::<_, io::Error>(Self::zero_terminate(Self::copy_to_correct_name(
cusparse,
&_temp_dir,
CUSPARSE_DLL,
)?))
})
.transpose()?;
let redirect_path_zero_terminated = Self::zero_terminate(args.redirect_path);
Ok(Self {
nccl_path_zero_terminated,
@ -307,6 +355,8 @@ impl Environment {
nvml_path_zero_terminated,
nvapi_path_zero_terminated,
nvoptix_path_zero_terminated,
cublas_path_zero_terminated,
cusparse_path_zero_terminated,
redirect_path_zero_terminated,
winapi_command_line_zero_terminated: args.winapi_command_line_zero_terminated,
_temp_dir,

View file

@ -15,5 +15,4 @@ atiadlxx-sys = { path = "../atiadlxx-sys" }
rocm_smi-sys = { path = "../rocm_smi-sys" }
[package.metadata.zluda]
top_level = true
linux_names = ["libnvidia-ml.so", "libnvidia-ml.so.1"]