Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Seunghoon Lee 2024-04-28 13:00:19 +09:00
commit c2710a88f1
No known key found for this signature in database
GPG key ID: 436E38F4E70BD152
13 changed files with 2955 additions and 232 deletions

View file

@ -1,13 +1,13 @@
name: Rust name: Rust
on: on:
push: push:
branches: ["master"] branches: [master]
pull_request: pull_request:
branches: ["master"] branches: [master]
env: env:
CARGO_TERM_COLOR: always CARGO_TERM_COLOR: always
ROCM_VERSION: "5.7.3"
jobs: jobs:
release: release:
@ -33,92 +33,77 @@ jobs:
draft: false draft: false
prerelease: true prerelease: true
build-linux: build_lin:
needs: release name: Build and publish (Linux)
runs-on: ubuntu-22.04 runs-on: ubuntu-20.04
permissions:
contents: write
pull-requests: write
repository-projects: write
steps: steps:
- name: Prepare for build - uses: jlumbroso/free-disk-space@main
- name: Install ROCm
run: | run: |
sudo rm -rf "/usr/local/share/boost" sudo mkdir --parents --mode=0755 /etc/apt/keyrings
sudo rm -rf "$AGENT_TOOLSDIRECTORY" sudo sh -c 'wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null'
- name: Checkout repository sudo sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ env.ROCM_VERSION }} focal main > /etc/apt/sources.list.d/rocm.list'
uses: actions/checkout@v3 sudo apt-get update
- name: Initialize submodule sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends rocm-smi-lib hip-runtime-amd comgr hipblaslt-dev hipfft-dev rocblas-dev rocsolver-dev rocsparse-dev miopen-hip-dev rocm-device-libs
echo 'export PATH="$PATH:/opt/rocm/bin"' | sudo tee /etc/profile.d/rocm.sh
echo '/opt/rocm/lib' | sudo tee /etc/ld.so.conf.d/rocm.conf
sudo ldconfig
- uses: actions/checkout@v4
with:
submodules: true
- uses: Swatinem/rust-cache@v2
- name: Build
# We use tar to unpack .tar.gz we've created because Github actions/upload-artifact
# is broken and will _always_ zip your artifact (even if it is a single file).
# See here: https://github.com/actions/upload-artifact/issues/39
# and here: https://github.com/actions/upload-artifact/issues/109
run: | run: |
git submodule init cargo xtask zip -r
git submodule update tar -xzf target/release/zluda.tar.gz -C target/release
- name: Prepare AMD HIP SDK # https://stackoverflow.com/a/64195658
run: | - name: Set revision hash
sudo apt update run: echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
wget https://repo.radeon.com/amdgpu-install/5.7.1/ubuntu/jammy/amdgpu-install_5.7.50701-1_all.deb - name: Upload (artifact)
sudo apt install ./amdgpu-install_5.7.50701-1_all.deb uses: actions/upload-artifact@v4
sudo apt update with:
- name: Install AMD HIP SDK name: zluda-linux-${{ env.SHORT_SHA }}
# can fail at dkms. ignore and continue path: target/release/zluda
continue-on-error: true - name: Upload (release)
run: sudo amdgpu-install --usecase=rocm,hip,hiplibsdk
- name: Build for Linux
run: |
cargo xtask --release
- name: Prepare for release
run: |
cd ./target/release
tar -czvf ZLUDA-linux-amd64.tar.gz *.so*
- name: Upload Linux artifact
uses: actions/upload-release-asset@v1 uses: actions/upload-release-asset@v1
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with: with:
upload_url: ${{ needs.release.outputs.upload_url }} upload_url: ${{ needs.release.outputs.upload_url }}
asset_path: ./target/release/ZLUDA-linux-amd64.tar.gz asset_path: target/release/zluda.tar.gz
asset_name: ZLUDA-linux-amd64.tar.gz asset_name: ZLUDA-linux-amd64.tar.gz
asset_content_type: application/gzip asset_content_type: application/gzip
build-windows: build_win:
needs: release name: Build and publish (Windows)
runs-on: windows-latest runs-on: windows-2019
permissions:
contents: write
pull-requests: write
repository-projects: write
steps: steps:
- name: Checkout repository - uses: actions/checkout@v4
uses: actions/checkout@v3 with:
- name: Initialize submodule submodules: true
- uses: Swatinem/rust-cache@v2
- name: Build
run: | run: |
git submodule init cargo xtask zip -r
git submodule update Expand-Archive -Path target/release/zluda.zip -DestinationPath target/release
- name: Install AMD HIP SDK # https://stackoverflow.com/a/74033027
run: | - name: Set revision hash
C:\msys64\usr\bin\wget.exe https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-Win10-Win11-For-HIP.exe -O "amdgpu-install.exe" run: echo "SHORT_SHA=$("${{ github.sha }}".SubString(0, 7))" >> $env:GITHUB_ENV
.\amdgpu-install.exe -Install -View:1 - name: Upload (artifact)
Start-Sleep -Seconds 60 uses: actions/upload-artifact@v4
$setupId = (Get-Process ATISetup).id with:
Wait-Process -Id $setupId name: zluda-windows-${{ env.SHORT_SHA }}
- name: Build for Windows path: target/release/zluda
run: | - name: Upload (release)
$ROCm_PATH = "C:\Program Files\AMD\ROCm"
$Env:HIP_PATH = "$ROCm_PATH\$(Get-ChildItem -Path $ROCm_PATH -Name)"
$Env:PATH = "$Env:HIP_PATH\bin;$Env:PATH"
$Env:HIP_PATH = "$Env:HIP_PATH\"
cargo xtask --release
- name: Prepare for release
run: |
cd .\target\release
7z a .\ZLUDA-windows-amd64.zip .\*.dll
7z a .\ZLUDA-windows-amd64.zip .\*.exe
- name: Upload Windows artifact
uses: actions/upload-release-asset@v1 uses: actions/upload-release-asset@v1
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with: with:
upload_url: ${{ needs.release.outputs.upload_url }} upload_url: ${{ needs.release.outputs.upload_url }}
asset_path: ./target/release/ZLUDA-windows-amd64.zip asset_path: target/release/zluda.zip
asset_name: ZLUDA-windows-amd64.zip asset_name: ZLUDA-windows-amd64.zip
asset_content_type: application/zip asset_content_type: application/zip

7
.gitignore vendored
View file

@ -1,5 +1,4 @@
target/ target/
Cargo.lock
.vscode/
.vscode/
.idea/ .idea/

2573
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -60,4 +60,4 @@ opt-level = 3
opt-level = 3 opt-level = 3
[profile.dev.package.xtask] [profile.dev.package.xtask]
opt-level = 3 opt-level = 2

View file

@ -1,65 +0,0 @@
[config]
default_to_workspace = false
skip_core_tasks = true
[tasks.build]
run_task = [
{ name = "build-windows", condition = { platforms = ["windows"] } },
{ name = "build-linux", condition = { platforms = ["linux"] } },
]
[tasks.build-windows]
command = "cargo"
args = [
"build",
"-p", "offline_compiler",
"-p", "zluda_blas",
"-p", "zluda_ccl",
"-p", "zluda_dnn",
"-p", "zluda_dump",
"-p", "zluda_inject",
"-p", "zluda_fft",
"-p", "zluda_lib",
"-p", "zluda_ml",
"-p", "zluda_sparse",
"-p", "zluda_rtc",
"-p", "zluda_redirect",
]
[tasks.build-linux]
command = "cargo"
args = [
"build",
"-p", "offline_compiler",
"-p", "zluda_blas",
"-p", "zluda_blaslt",
"-p", "zluda_ccl",
"-p", "zluda_dnn",
"-p", "zluda_dump",
"-p", "zluda_fft",
"-p", "zluda_lib",
"-p", "zluda_ml",
"-p", "zluda_rtc",
"-p", "zluda_sparse",
]
[tasks.build-release]
command = "cargo"
args = [
"build",
"--release",
"-p", "offline_compiler",
"-p", "zluda_blas",
"-p", "zluda_blaslt",
"-p", "zluda_ccl",
"-p", "zluda_dnn",
"-p", "zluda_dump",
"-p", "zluda_fft",
"-p", "zluda_lib",
"-p", "zluda_ml",
"-p", "zluda_rtc",
"-p", "zluda_sparse",
]
[tasks.default]
alias = "build"

View file

@ -92,13 +92,16 @@ If you are dumping original CUDA use:
### Linux ### Linux
Known bug: when dumping from original CUDA you should remove (or rename) all the files in `<ZLUDA_DIRECTORY>/dump` except `libcuda.so` and `libcuda.so.1`. If dumping from ZLUDA use it like this:
Use it like this:
``` ```
LD_LIBRARY_PATH="<ZLUDA_DIRECTORY>/dump:$LD_LIBRARY_PATH" <APPLICATION> <APPLICATION_ARGUMENTS> LD_LIBRARY_PATH="<ZLUDA_DIRECTORY>/dump:$LD_LIBRARY_PATH" <APPLICATION> <APPLICATION_ARGUMENTS>
``` ```
If dumping from NVIDIA CUDA use it like this:
```
LD_LIBRARY_PATH="<ZLUDA_DIRECTORY>/dump_nvidia:$LD_LIBRARY_PATH" <APPLICATION> <APPLICATION_ARGUMENTS>
```
### Result ### Result
If all went well you should see lines like this in the console output and in the log file specified by `ZLUDA_DUMP_DIR`: If all went well you should see lines like this in the console output and in the log file specified by `ZLUDA_DUMP_DIR`:

View file

@ -18,3 +18,4 @@ features = [
[package.metadata.zluda] [package.metadata.zluda]
debug_only = true debug_only = true
skip_zip = true

View file

@ -11,4 +11,12 @@ cargo_metadata = "=0.17.0"
# cargo-platform is a cargo_metadata, version 0.1.6 requires rust 1.70 or higher # cargo-platform is a cargo_metadata, version 0.1.6 requires rust 1.70 or higher
cargo-platform = "=0.1.5" cargo-platform = "=0.1.5"
serde = "1.0.193" serde = "1.0.193"
serde_json = "1.0.108" serde_json = "1.0.108"
time = { version = "=0.3.23", features = ["local-offset"] }
[target.'cfg(windows)'.dependencies]
zip = { version = "0.6.6", features = ["deflate", "time"], default-features = false }
[target.'cfg(unix)'.dependencies]
flate2 = { version = "1.0.28", features = ["cloudflare_zlib"], default-features = false }
tar = "0.4"

View file

@ -1,4 +1,5 @@
use argh::{EarlyExit, FromArgs, TopLevelCommand}; use argh::{EarlyExit, FromArgs, TopLevelCommand};
use cargo_metadata::camino::Utf8PathBuf;
use serde::Deserialize; use serde::Deserialize;
use std::{ use std::{
env, env,
@ -60,7 +61,7 @@ struct BuildCommand {
} }
#[derive(FromArgs)] #[derive(FromArgs)]
/// Package build artifacts into an archive (.zip or .tar.gz) /// Compile ZLUDA and package binaries into an archive (.zip or .tar.gz)
#[argh(subcommand, name = "zip")] #[argh(subcommand, name = "zip")]
struct ZipCommand { struct ZipCommand {
/// use artifacts from release mode /// use artifacts from release mode
@ -73,10 +74,15 @@ fn main() -> Result<(), DynError> {
let args: Arguments = argh::from_env(); let args: Arguments = argh::from_env();
std::process::exit(match args.command { std::process::exit(match args.command {
Subcommand::Build(BuildCommand { release }) => build(!release)?, Subcommand::Build(BuildCommand { release }) => build(!release)?,
Subcommand::Zip(_) => panic!(), Subcommand::Zip(ZipCommand { release }) => build_and_zip(!release),
}) })
} }
fn build_and_zip(is_debug: bool) -> i32 {
let workspace = build_impl(is_debug).unwrap();
os::zip(workspace)
}
#[derive(Deserialize)] #[derive(Deserialize)]
struct ZludaMetadata { struct ZludaMetadata {
zluda: Project, zluda: Project,
@ -92,8 +98,6 @@ struct Project {
#[serde(skip_deserializing)] #[serde(skip_deserializing)]
kind: TargetKind, kind: TargetKind,
#[serde(default)] #[serde(default)]
top_level: bool,
#[serde(default)]
windows_only: bool, windows_only: bool,
#[serde(default)] #[serde(default)]
linux_only: bool, linux_only: bool,
@ -104,9 +108,13 @@ struct Project {
#[serde(default)] #[serde(default)]
skip_dump_link: bool, skip_dump_link: bool,
#[serde(default)] #[serde(default)]
skip_zip: bool,
#[serde(default)]
linux_names: Vec<String>, linux_names: Vec<String>,
#[serde(default)] #[serde(default)]
dump_names: Vec<String>, dump_names: Vec<String>,
#[serde(default)]
dump_nvidia_names: Vec<String>,
} }
#[derive(Clone, Copy, Default, PartialEq, Debug)] #[derive(Clone, Copy, Default, PartialEq, Debug)]
@ -116,14 +124,56 @@ enum TargetKind {
Cdylib, Cdylib,
} }
struct Workspace {
pub cargo: String,
pub project_root: PathBuf,
pub projects: Vec<Project>,
pub target_directory: Utf8PathBuf,
}
impl Workspace {
fn open(is_debug: bool) -> Result<Self, DynError> {
let cargo = env::var("CARGO").unwrap_or_else(|_| "cargo".to_string());
let project_root = Self::project_root()?;
let mut cmd = cargo_metadata::MetadataCommand::new();
cmd.cargo_path(&cargo).current_dir(&project_root).no_deps();
let cargo_metadata = cmd.exec()?;
let projects = cargo_metadata
.packages
.into_iter()
.filter_map(Project::new)
.filter(|p| !p.skip_build(is_debug))
.collect::<Vec<_>>();
let mut target_directory = cargo_metadata.target_directory;
target_directory.push(if is_debug { "debug" } else { "release" });
Ok(Workspace {
cargo,
project_root,
projects,
target_directory,
})
}
fn project_root() -> Result<PathBuf, DynError> {
Ok(Path::new(&env!("CARGO_MANIFEST_DIR"))
.ancestors()
.nth(1)
.ok_or::<DynError>("CARGO_MANIFEST_DIR".into())?
.to_path_buf())
}
fn cargo_command(&self) -> Command {
let mut command = Command::new(&self.cargo);
command.current_dir(&self.project_root);
command
}
}
impl Project { impl Project {
fn new(json_pkg: cargo_metadata::Package) -> Self { fn new(json_pkg: cargo_metadata::Package) -> Option<Self> {
let mut project = serde_json::from_value::<Option<ZludaMetadata>>(json_pkg.metadata) let project_metadata =
.unwrap() serde_json::from_value::<Option<ZludaMetadata>>(json_pkg.metadata).unwrap()?;
.map_or(Default::default(), |x| x.zluda); let mut project = project_metadata.zluda;
if project != Default::default() {
project.top_level = true;
}
project.name = json_pkg.name; project.name = json_pkg.name;
if let Some((target_name, kind)) = json_pkg.targets.into_iter().find_map(|t| { if let Some((target_name, kind)) = json_pkg.targets.into_iter().find_map(|t| {
match t.kind.first().map(std::ops::Deref::deref) { match t.kind.first().map(std::ops::Deref::deref) {
@ -135,13 +185,10 @@ impl Project {
project.target_name = target_name; project.target_name = target_name;
project.kind = kind; project.kind = kind;
} }
project Some(project)
} }
fn skip_build(&self, is_debug: bool) -> bool { fn skip_build(&self, is_debug: bool) -> bool {
if !self.top_level {
return true;
}
if self.broken { if self.broken {
return true; return true;
} }
@ -159,67 +206,76 @@ impl Project {
} }
fn build(is_debug: bool) -> Result<i32, DynError> { fn build(is_debug: bool) -> Result<i32, DynError> {
let cargo = env::var("CARGO").unwrap_or_else(|_| "cargo".to_string()); build_impl(is_debug)?;
let project_root = project_root()?; Ok(0)
let mut cmd = cargo_metadata::MetadataCommand::new(); }
cmd.cargo_path(&cargo).current_dir(&project_root).no_deps();
let metadata = cmd.exec()?; fn build_impl(is_debug: bool) -> Result<Workspace, DynError> {
let projects = metadata let workspace = Workspace::open(is_debug)?;
.packages let mut command = workspace.cargo_command();
.into_iter() command.arg("build");
.map(Project::new) workspace
.filter(|p| !p.skip_build(is_debug)) .projects
.collect::<Vec<_>>(); .iter()
let mut command = Command::new(&cargo); .fold(&mut command, |command, proj| {
command.current_dir(&project_root).arg("build"); command.args(["-p", &proj.name])
projects.iter().fold(&mut command, |command, proj| { });
command.args(["-p", &proj.name])
});
if !is_debug { if !is_debug {
command.arg("--release"); command.arg("--release");
} }
let build_result = command.status()?.code().unwrap(); let build_result = command.status()?.code().unwrap();
if build_result != 0 { if build_result != 0 {
return Ok(build_result); return Err(format!("{command:?} failed with exit code {build_result}").into());
} }
os::create_dump_dir_and_symlinks(is_debug, metadata.target_directory, projects); os::create_dump_dir_and_symlinks(&workspace);
Ok(0) Ok(workspace)
} }
fn project_root() -> Result<PathBuf, DynError> { impl TargetKind {
Ok(Path::new(&env!("CARGO_MANIFEST_DIR")) #[cfg(unix)]
.ancestors() fn prefix(self) -> &'static str {
.nth(1) match self {
.ok_or::<DynError>("CARGO_MANIFEST_DIR".into())? TargetKind::Binary => "",
.to_path_buf()) TargetKind::Cdylib => "lib",
} }
}
#[cfg(not(unix))] #[cfg(unix)]
mod os { fn suffix(self) -> &'static str {
use super::Project; match self {
use cargo_metadata::camino::Utf8PathBuf; TargetKind::Binary => "",
TargetKind::Cdylib => ".so",
}
}
// This is 100% intentional, we don't want symlinks on Windows since #[cfg(windows)]
// we use completely different scheme for injections here fn suffix(self) -> &'static str {
pub(crate) fn create_dump_dir_and_symlinks(_: bool, _: Utf8PathBuf, _: Vec<Project>) {} match self {
TargetKind::Binary => ".exe",
TargetKind::Cdylib => ".dll",
}
}
} }
#[cfg(unix)] #[cfg(unix)]
mod os { mod os {
use super::{Project, TargetKind}; use crate::Workspace;
use cargo_metadata::camino::Utf8PathBuf; use cargo_metadata::camino::Utf8PathBuf;
use flate2::{write::GzEncoder, Compression};
use std::{
fs::File,
time::{Duration, SystemTime},
};
pub(crate) fn create_dump_dir_and_symlinks( pub(crate) fn create_dump_dir_and_symlinks(workspace: &Workspace) {
is_debug: bool,
mut target_directory: Utf8PathBuf,
projects: Vec<Project>,
) {
use std::fs; use std::fs;
target_directory.push(if is_debug { "debug" } else { "release" }); let mut dump_dir = workspace.target_directory.clone();
let mut dump_dir = target_directory.clone();
dump_dir.push("dump"); dump_dir.push("dump");
fs::create_dir_all(&dump_dir).unwrap(); fs::create_dir_all(&dump_dir).unwrap();
for project in projects { let mut dump_nvidia_dir = dump_dir.clone();
dump_nvidia_dir.set_file_name("dump_nvidia");
fs::create_dir_all(&dump_nvidia_dir).unwrap();
for project in workspace.projects.iter() {
let dst = format!( let dst = format!(
"{}{}{}", "{}{}{}",
project.kind.prefix(), project.kind.prefix(),
@ -227,15 +283,18 @@ mod os {
project.kind.suffix() project.kind.suffix()
); );
let dump_dst = format!("../{}", dst); let dump_dst = format!("../{}", dst);
for src_file in project.linux_names { for src_file in project.linux_names.iter() {
force_symlink(&dst, &target_directory, &src_file); force_symlink(&dst, &workspace.target_directory, src_file);
if project.skip_dump_link { if project.skip_dump_link {
continue; continue;
} }
force_symlink(&dump_dst, &dump_dir, &src_file); force_symlink(&dump_dst, &dump_dir, src_file);
} }
for src_file in project.dump_names { for src_file in project.dump_names.iter() {
force_symlink(&dump_dst, &dump_dir, &src_file); force_symlink(&dump_dst, &dump_dir, src_file);
}
for src_file in project.dump_nvidia_names.iter() {
force_symlink(&dump_dst, &dump_nvidia_dir, src_file);
} }
} }
} }
@ -263,19 +322,128 @@ mod os {
} }
} }
impl TargetKind { pub fn zip(workspace: Workspace) -> i32 {
fn prefix(self) -> &'static str { let mut target_file = workspace.target_directory.clone();
match self { target_file.push("zluda.tar.gz");
TargetKind::Binary => "", let gz_file = File::create(target_file).unwrap();
TargetKind::Cdylib => "lib", let gz = GzEncoder::new(gz_file, Compression::default());
let mut tar = tar::Builder::new(gz);
let time = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or(Duration::ZERO);
for project in workspace.projects {
if project.skip_zip {
continue;
}
let mut src_file = File::open(format!(
"{}/{}{}{}",
&workspace.target_directory,
project.kind.prefix(),
project.target_name,
project.kind.suffix()
))
.unwrap();
let file_name = format!(
"{}{}{}",
project.kind.prefix(),
project.target_name,
project.kind.suffix()
);
tar.append_file(format!("zluda/{file_name}"), &mut src_file)
.unwrap();
for linux_name in project.linux_names.iter() {
let mut header = tar_header_symlink(time);
tar.append_link(&mut header, format!("zluda/{}", linux_name), &file_name)
.unwrap();
if project.skip_dump_link {
continue;
}
let mut header = tar_header_symlink(time);
tar.append_link(
&mut header,
format!("zluda/dump/{}", linux_name),
format!("../{file_name}"),
)
.unwrap();
}
for dump_name in project.dump_names.iter() {
let mut header = tar_header_symlink(time);
tar.append_link(
&mut header,
format!("zluda/dump/{}", dump_name),
format!("../{file_name}"),
)
.unwrap();
}
for dump_name in project.dump_nvidia_names.iter() {
let mut header = tar_header_symlink(time);
tar.append_link(
&mut header,
format!("zluda/dump_nvidia/{}", dump_name),
format!("../{file_name}"),
)
.unwrap();
} }
} }
tar.finish().unwrap();
0
}
fn suffix(self) -> &'static str { fn tar_header_symlink(time: Duration) -> tar::Header {
match self { let mut header = tar::Header::new_gnu();
TargetKind::Binary => "", header.set_mtime(time.as_secs());
TargetKind::Cdylib => ".so", header.set_entry_type(tar::EntryType::Symlink);
} header
} }
}
#[cfg(windows)]
mod os {
use crate::Workspace;
use std::{convert::TryFrom, fs::File};
// This is 100% intentional, we don't want symlinks on Windows since
// we use a completely different scheme for injections there
pub(crate) fn create_dump_dir_and_symlinks(_: &Workspace) {}
pub(crate) fn zip(workspace: Workspace) -> i32 {
fn get_zip_entry_options(
f: &File,
time_offset: time::UtcOffset,
) -> Option<zip::write::FileOptions> {
let time = f.metadata().unwrap().modified().unwrap();
let time = time::OffsetDateTime::from(time).to_offset(time_offset);
Some(
zip::write::FileOptions::default()
.last_modified_time(zip::DateTime::try_from(time).unwrap()),
)
}
let mut target_file = workspace.target_directory.clone();
target_file.push("zluda.zip");
let zip_archive = File::create(target_file).unwrap();
let mut zip_writer = zip::write::ZipWriter::new(zip_archive);
let time_offset = time::UtcOffset::current_local_offset().unwrap_or(time::UtcOffset::UTC);
for p in workspace.projects {
if p.skip_zip {
continue;
}
let mut src_file = File::open(format!(
"{}/{}{}",
&workspace.target_directory,
p.target_name,
p.kind.suffix()
))
.unwrap();
zip_writer
.start_file(
format!("zluda/{}{}", p.target_name, p.kind.suffix()),
get_zip_entry_options(&src_file, time_offset)
.unwrap_or(zip::write::FileOptions::default()),
)
.unwrap();
std::io::copy(&mut src_file, &mut zip_writer).unwrap();
}
zip_writer.finish().unwrap();
0
} }
} }

View file

@ -26,3 +26,4 @@ features = [
[package.metadata.zluda] [package.metadata.zluda]
debug_only = true debug_only = true
windows_only = true windows_only = true
skip_zip = true

View file

@ -44,3 +44,4 @@ rand = "0.8.5"
# Nominally debug_only, but useful for power users # Nominally debug_only, but useful for power users
[package.metadata.zluda] [package.metadata.zluda]
dump_names = ["libcuda.so", "libcuda.so.1"] dump_names = ["libcuda.so", "libcuda.so.1"]
dump_nvidia_names = ["libcuda.so", "libcuda.so.1"]

View file

@ -24,11 +24,13 @@ use winapi::um::winbase::{INFINITE, WAIT_FAILED};
static REDIRECT_DLL: &'static str = "zluda_redirect.dll"; static REDIRECT_DLL: &'static str = "zluda_redirect.dll";
static NCCL_DLL: &'static str = "nccl.dll"; static NCCL_DLL: &'static str = "nccl.dll";
static NVRTC_DLL: &'static str = "nvrtc.dll"; static NVRTC_DLL: &'static str = "nvrtc64.dll";
static NVCUDA_DLL: &'static str = "nvcuda.dll"; static NVCUDA_DLL: &'static str = "nvcuda.dll";
static NVML_DLL: &'static str = "nvml.dll"; static NVML_DLL: &'static str = "nvml.dll";
static NVAPI_DLL: &'static str = "nvapi64.dll"; static NVAPI_DLL: &'static str = "nvapi64.dll";
static NVOPTIX_DLL: &'static str = "optix.6.6.0.dll"; static NVOPTIX_DLL: &'static str = "optix.6.6.0.dll";
static CUBLAS_DLL: &'static str = "cublas64.dll";
static CUSPARSE_DLL: &'static str = "cusparse64.dll";
include!("../../zluda_redirect/src/payload_guid.rs"); include!("../../zluda_redirect/src/payload_guid.rs");
@ -39,7 +41,7 @@ struct ProgramArguments {
#[argh(option)] #[argh(option)]
nccl: Option<PathBuf>, nccl: Option<PathBuf>,
/// DLL to be injected instead of system nvrtc.dll. If not provided {0}, will use nvrtc.dll from its own directory /// DLL to be injected instead of system nvrtc64.dll. If not provided, no injection will take place
#[argh(option)] #[argh(option)]
nvrtc: Option<PathBuf>, nvrtc: Option<PathBuf>,
@ -59,6 +61,14 @@ struct ProgramArguments {
#[argh(option)] #[argh(option)]
nvoptix: Option<PathBuf>, nvoptix: Option<PathBuf>,
/// DLL to be injected instead of system cublas64.dll. If not provided, no injection will take place
#[argh(option)]
cublas: Option<PathBuf>,
/// DLL to be injected instead of system cusparse64.dll. If not provided, no injection will take place
#[argh(option)]
cusparse: Option<PathBuf>,
/// executable to be injected with custom CUDA libraries /// executable to be injected with custom CUDA libraries
#[argh(positional)] #[argh(positional)]
exe: String, exe: String,
@ -76,17 +86,25 @@ pub fn main_impl() -> Result<(), Box<dyn Error>> {
let mut proc_info = unsafe { mem::zeroed::<detours_sys::_PROCESS_INFORMATION>() }; let mut proc_info = unsafe { mem::zeroed::<detours_sys::_PROCESS_INFORMATION>() };
let mut dlls_to_inject = vec![ let mut dlls_to_inject = vec![
environment.nccl_path_zero_terminated.as_ptr() as _, environment.nccl_path_zero_terminated.as_ptr() as _,
environment.nvrtc_path_zero_terminated.as_ptr() as _,
environment.nvcuda_path_zero_terminated.as_ptr() as _, environment.nvcuda_path_zero_terminated.as_ptr() as _,
environment.nvml_path_zero_terminated.as_ptr() as *const i8, environment.nvml_path_zero_terminated.as_ptr() as *const i8,
environment.redirect_path_zero_terminated.as_ptr() as _, environment.redirect_path_zero_terminated.as_ptr() as _,
]; ];
if let Some(ref nvrtc) = environment.nvrtc_path_zero_terminated {
dlls_to_inject.push(nvrtc.as_ptr() as _);
}
if let Some(ref nvapi) = environment.nvapi_path_zero_terminated { if let Some(ref nvapi) = environment.nvapi_path_zero_terminated {
dlls_to_inject.push(nvapi.as_ptr() as _); dlls_to_inject.push(nvapi.as_ptr() as _);
} }
if let Some(ref nvoptix) = environment.nvoptix_path_zero_terminated { if let Some(ref nvoptix) = environment.nvoptix_path_zero_terminated {
dlls_to_inject.push(nvoptix.as_ptr() as _); dlls_to_inject.push(nvoptix.as_ptr() as _);
} }
if let Some(ref cublas) = environment.cublas_path_zero_terminated {
dlls_to_inject.push(cublas.as_ptr() as _);
}
if let Some(ref cusparse) = environment.cusparse_path_zero_terminated {
dlls_to_inject.push(cusparse.as_ptr() as _);
}
os_call!( os_call!(
detours_sys::DetourCreateProcessWithDllsW( detours_sys::DetourCreateProcessWithDllsW(
ptr::null(), ptr::null(),
@ -159,11 +177,13 @@ pub fn main_impl() -> Result<(), Box<dyn Error>> {
struct NormalizedArguments { struct NormalizedArguments {
nccl_path: PathBuf, nccl_path: PathBuf,
nvrtc_path: PathBuf, nvrtc_path: Option<PathBuf>,
nvcuda_path: PathBuf, nvcuda_path: PathBuf,
nvml_path: PathBuf, nvml_path: PathBuf,
nvapi_path: Option<PathBuf>, nvapi_path: Option<PathBuf>,
nvoptix_path: Option<PathBuf>, nvoptix_path: Option<PathBuf>,
cublas_path: Option<PathBuf>,
cusparse_path: Option<PathBuf>,
redirect_path: PathBuf, redirect_path: PathBuf,
winapi_command_line_zero_terminated: Vec<u16>, winapi_command_line_zero_terminated: Vec<u16>,
} }
@ -173,13 +193,14 @@ impl NormalizedArguments {
let current_exe = env::current_exe()?; let current_exe = env::current_exe()?;
let nccl_path = let nccl_path =
Self::get_absolute_path_or_default(&current_exe, prog_args.nccl, NCCL_DLL)?; Self::get_absolute_path_or_default(&current_exe, prog_args.nccl, NCCL_DLL)?;
let nvrtc_path = let nvrtc_path = prog_args.nvrtc.map(Self::get_absolute_path).transpose()?;
Self::get_absolute_path_or_default(&current_exe, prog_args.nvrtc, NVRTC_DLL)?;
let nvcuda_path = let nvcuda_path =
Self::get_absolute_path_or_default(&current_exe, prog_args.nvcuda, NVCUDA_DLL)?; Self::get_absolute_path_or_default(&current_exe, prog_args.nvcuda, NVCUDA_DLL)?;
let nvml_path = Self::get_absolute_path_or_default(&current_exe, prog_args.nvml, NVML_DLL)?; let nvml_path = Self::get_absolute_path_or_default(&current_exe, prog_args.nvml, NVML_DLL)?;
let nvapi_path = prog_args.nvapi.map(Self::get_absolute_path).transpose()?; let nvapi_path = prog_args.nvapi.map(Self::get_absolute_path).transpose()?;
let nvoptix_path = prog_args.nvoptix.map(Self::get_absolute_path).transpose()?; let nvoptix_path = prog_args.nvoptix.map(Self::get_absolute_path).transpose()?;
let cublas_path = prog_args.cublas.map(Self::get_absolute_path).transpose()?;
let cusparse_path = prog_args.cusparse.map(Self::get_absolute_path).transpose()?;
let winapi_command_line_zero_terminated = let winapi_command_line_zero_terminated =
construct_command_line(std::iter::once(prog_args.exe).chain(prog_args.args)); construct_command_line(std::iter::once(prog_args.exe).chain(prog_args.args));
let mut redirect_path = current_exe.parent().unwrap().to_path_buf(); let mut redirect_path = current_exe.parent().unwrap().to_path_buf();
@ -191,6 +212,8 @@ impl NormalizedArguments {
nvml_path, nvml_path,
nvapi_path, nvapi_path,
nvoptix_path, nvoptix_path,
cublas_path,
cusparse_path,
redirect_path, redirect_path,
winapi_command_line_zero_terminated, winapi_command_line_zero_terminated,
}) })
@ -245,11 +268,13 @@ impl NormalizedArguments {
struct Environment { struct Environment {
nccl_path_zero_terminated: String, nccl_path_zero_terminated: String,
nvrtc_path_zero_terminated: String, nvrtc_path_zero_terminated: Option<String>,
nvcuda_path_zero_terminated: String, nvcuda_path_zero_terminated: String,
nvml_path_zero_terminated: String, nvml_path_zero_terminated: String,
nvapi_path_zero_terminated: Option<String>, nvapi_path_zero_terminated: Option<String>,
nvoptix_path_zero_terminated: Option<String>, nvoptix_path_zero_terminated: Option<String>,
cublas_path_zero_terminated: Option<String>,
cusparse_path_zero_terminated: Option<String>,
redirect_path_zero_terminated: String, redirect_path_zero_terminated: String,
winapi_command_line_zero_terminated: Vec<u16>, winapi_command_line_zero_terminated: Vec<u16>,
_temp_dir: TempDir, _temp_dir: TempDir,
@ -266,11 +291,14 @@ impl Environment {
&_temp_dir, &_temp_dir,
NCCL_DLL, NCCL_DLL,
)?); )?);
let nvrtc_path_zero_terminated = Self::zero_terminate(Self::copy_to_correct_name( let nvrtc_path_zero_terminated = args
args.nvrtc_path, .nvrtc_path
&_temp_dir, .map(|nvrtc| {
NVRTC_DLL, Ok::<_, io::Error>(Self::zero_terminate(Self::copy_to_correct_name(
)?); nvrtc, &_temp_dir, NVRTC_DLL,
)?))
})
.transpose()?;
let nvcuda_path_zero_terminated = Self::zero_terminate(Self::copy_to_correct_name( let nvcuda_path_zero_terminated = Self::zero_terminate(Self::copy_to_correct_name(
args.nvcuda_path, args.nvcuda_path,
&_temp_dir, &_temp_dir,
@ -299,6 +327,26 @@ impl Environment {
)?)) )?))
}) })
.transpose()?; .transpose()?;
let cublas_path_zero_terminated = args
.cublas_path
.map(|cublas| {
Ok::<_, io::Error>(Self::zero_terminate(Self::copy_to_correct_name(
cublas,
&_temp_dir,
CUBLAS_DLL,
)?))
})
.transpose()?;
let cusparse_path_zero_terminated = args
.cusparse_path
.map(|cusparse| {
Ok::<_, io::Error>(Self::zero_terminate(Self::copy_to_correct_name(
cusparse,
&_temp_dir,
CUSPARSE_DLL,
)?))
})
.transpose()?;
let redirect_path_zero_terminated = Self::zero_terminate(args.redirect_path); let redirect_path_zero_terminated = Self::zero_terminate(args.redirect_path);
Ok(Self { Ok(Self {
nccl_path_zero_terminated, nccl_path_zero_terminated,
@ -307,6 +355,8 @@ impl Environment {
nvml_path_zero_terminated, nvml_path_zero_terminated,
nvapi_path_zero_terminated, nvapi_path_zero_terminated,
nvoptix_path_zero_terminated, nvoptix_path_zero_terminated,
cublas_path_zero_terminated,
cusparse_path_zero_terminated,
redirect_path_zero_terminated, redirect_path_zero_terminated,
winapi_command_line_zero_terminated: args.winapi_command_line_zero_terminated, winapi_command_line_zero_terminated: args.winapi_command_line_zero_terminated,
_temp_dir, _temp_dir,

View file

@ -15,5 +15,4 @@ atiadlxx-sys = { path = "../atiadlxx-sys" }
rocm_smi-sys = { path = "../rocm_smi-sys" } rocm_smi-sys = { path = "../rocm_smi-sys" }
[package.metadata.zluda] [package.metadata.zluda]
top_level = true
linux_names = ["libnvidia-ml.so", "libnvidia-ml.so.1"] linux_names = ["libnvidia-ml.so", "libnvidia-ml.so.1"]