cuda - 您如何将用 C 编写的自定义 CUDA 内核链接到 Rust 项目?

标签 cuda linker rust ffi nvcc

我正在努力使用带有 crate cc 的 build.rs 文件将我的 CUDA 内核与我的 Rust 项目链接起来:

build.rs

extern crate cc;

fn main() {
    println!("cargo:rustc-link-lib=cuda");
    println!("cargo:rustc-link-lib=cudart");
    println!("cargo:rustc-link-lib=cudnn");

    cc::Build::new()
        .cuda(true)
        .flag("-cudart=shared")
        .flag("-gencode")
        .flag("arch=compute_61,code=sm_61")
        .file("kernel.cu")
        .compile("kernel");
}

我收到这个错误:

error: linking with `cc` failed: exit code: 1
  |
  = note: "cc" "-Wl,--as-needed" "-Wl,-z,noexecstack" "-m64" "-L" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib" "/home/ltei/Dev/Workspaces/rust_cudnn/target/debug/deps/rust_cudnn-c2a0c7c98bc21183.1y16o1qfye96o7m0.rcgu.o" "/home/ltei/Dev/Workspaces/rust_cudnn/target/debug/deps/rust_cudnn-c2a0c7c98bc21183.3rngp6bm2u2q5z0y.rcgu.o" "/home/ltei/Dev/Workspaces/rust_cudnn/target/debug/deps/rust_cudnn-c2a0c7c98bc21183.3vhdzx0ywzealo7m.rcgu.o" "/home/ltei/Dev/Workspaces/rust_cudnn/target/debug/deps/rust_cudnn-c2a0c7c98bc21183.4xq48u46a1pwiqn7.rcgu.o" "/home/ltei/Dev/Workspaces/rust_cudnn/target/debug/deps/rust_cudnn-c2a0c7c98bc21183.8xzrsc1ux72v29j.rcgu.o" "-o" "/home/ltei/Dev/Workspaces/rust_cudnn/target/debug/deps/rust_cudnn-c2a0c7c98bc21183" "/home/ltei/Dev/Workspaces/rust_cudnn/target/debug/deps/rust_cudnn-c2a0c7c98bc21183.crate.allocator.rcgu.o" "-Wl,--gc-sections" "-pie" "-Wl,-z,relro,-z,now" "-nodefaultlibs" "-L" "/home/ltei/Dev/Workspaces/rust_cudnn/target/debug/deps" "-L" "/home/ltei/Dev/Workspaces/rust_cudnn/target/debug/build/rust_cudnn-df924982e63c2363/out" "-L" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib" "-l" "cudart" "-l" "cudnn" "-l" "cuda" "-Wl,-Bstatic" "-Wl,--whole-archive" "-l" "kernel" "-Wl,--no-whole-archive" "-Wl,-Bdynamic" "-l" "stdc++" "-Wl,-Bstatic" "/home/ltei/Dev/Workspaces/rust_cudnn/target/debug/deps/liblibc-dca5860987df25ef.rlib" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib/libstd-0006dc6e9901bcad.rlib" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib/libpanic_unwind-8d1c3982c0670998.rlib" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib/liballoc_jemalloc-2a12cd93029b9807.rlib" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib/libunwind-a5d3ff19e13d9f37.rlib" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib/liballoc_system-c5f69e7df1f06d84.rlib" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib/liblibc-e2e7ce88a6c41eea.rlib" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib/liballoc-3d7473d271611dc2.rlib" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib/libstd_unicode-58e7a51af24928de.rlib" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib/libcore-6806ae6018eec5e7.rlib" "/home/ltei/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/lib/libcompiler_builtins-85fd2d595ec0faf9.rlib" "-Wl,-Bdynamic" "-l" "util" "-l" "util" "-l" "dl" "-l" "rt" "-l" "pthread" "-l" "pthread" "-l" "gcc_s" "-l" "c" "-l" "m" "-l" "rt" "-l" "pthread" "-l" "util" "-l" "util"
  = note: /home/ltei/Dev/Workspaces/rust_cudnn/target/debug/deps/rust_cudnn-c2a0c7c98bc21183.3vhdzx0ywzealo7m.rcgu.o : In fonction « rust_cudnn::main » :
          /home/ltei/Dev/Workspaces/rust_cudnn/src/main.rs:213 : undefined reference to « Kernel_test »
          /home/ltei/Dev/Workspaces/rust_cudnn/target/debug/build/rust_cudnn-df924982e63c2363/out/libkernel.a(kernel.o) : In fonction « __sti____cudaRegisterAll_41_tmpxft_0000098e_00000000_7_kernel_cpp1_ii_a9220a05() » :
          /tmp/tmpxft_0000098e_00000000-4_kernel.cudafe1.stub.c:7 : undefined reference to « __cudaRegisterFatBinary »
          /home/ltei/Dev/Workspaces/rust_cudnn/target/debug/build/rust_cudnn-df924982e63c2363/out/libkernel.a(kernel.o) : In fonction « __cudaUnregisterBinaryUtil() » :
          /usr/include/crt/host_runtime.h:238 : undefined reference to « __cudaUnregisterFatBinary »
          collect2: error: ld returned 1 exit status

显然,当您不链接 cudart 时会发生这种情况,但我认为我链接了。也许我做错了?该错误仅在我尝试编译和链接我的 CUDA 内核时发生。当我不这样做时,它可以工作,我可以使用 CUDA 函数,例如来自 Rust 的 cudaMalloc

这是我的 kernel.cu 文件:

#include "kernel.h"

__global__ void vectorAdd_ker(float* vector, int len, float value) {
      int tid = blockIdx.x * blockDim.x + threadIdx.x;
      if (tid < len) { vector[tid] += value; }
}

void Kernel_vectorAdd(float* vector, int len, float value) {
   dim3 gridDim;
   dim3 blockDim;

   blockDim.x = 1024;
   gridDim.x = (len + blockDim.x - 1) / blockDim.x;

  vectorAdd_ker <<<gridDim, blockDim>>> (vector, len, value);
}

int Kernel_test() {
  return 7;
}

最佳答案

原始发布者诊断出这是一个路径问题,并且能够将 build.rs 修改为此(取自 here ):

extern crate cc;
use std::env;


fn main() {

    if let Ok(cuda_path) = env::var("CUDA_HOME") {
        println!("cargo:rustc-link-search=native={}/lib64", cuda_path);
    } else {
        println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
    }

    println!("cargo:rustc-link-lib=dylib=cuda");
    println!("cargo:rustc-link-lib=dylib=cudart");
    println!("cargo:rustc-link-lib=dylib=cublas");
    println!("cargo:rustc-link-lib=dylib=curand");

    cc::Build::new().cuda(true)
        .flag("-gencode").flag("arch=compute_52,code=sm_52") // Generate code for Maxwell (GTX 970, 980, 980 Ti, Titan X).
        .flag("-gencode").flag("arch=compute_53,code=sm_53") // Generate code for Maxwell (Jetson TX1).
        .flag("-gencode").flag("arch=compute_61,code=sm_61") // Generate code for Pascal (GTX 1070, 1080, 1080 Ti, Titan Xp).
        .flag("-gencode").flag("arch=compute_60,code=sm_60") // Generate code for Pascal (Tesla P100).
        .flag("-gencode").flag("arch=compute_62,code=sm_62") // Generate code for Pascal (Jetson TX2).
        .file("kernels/vectorfragment.cu").compile("libvectorfragment.a");
    cc::Build::new().cuda(true).cpp_link_stdlib(None)
        .flag("-gencode").flag("arch=compute_52,code=sm_52")
        .flag("-gencode").flag("arch=compute_53,code=sm_53")
        .flag("-gencode").flag("arch=compute_61,code=sm_61")
        .flag("-gencode").flag("arch=compute_60,code=sm_60")
        .flag("-gencode").flag("arch=compute_62,code=sm_62")
        .file("kernels/vectorpacked.cu").compile("libvectorpacked.a");
    cc::Build::new().cuda(true).cpp_link_stdlib(None)
        .flag("-gencode").flag("arch=compute_52,code=sm_52")
        .flag("-gencode").flag("arch=compute_53,code=sm_53")
        .flag("-gencode").flag("arch=compute_61,code=sm_61")
        .flag("-gencode").flag("arch=compute_60,code=sm_60")
        .flag("-gencode").flag("arch=compute_62,code=sm_62")
        .file("kernels/matrix.cu").compile("libmatrix.a");
}

这显然在原来的地方不起作用。

[注意:此答案作为评论中的社区 wiki 添加,现在已删除,以便将此问题从 CUDA 标记的未回答问题队列中删除。]

关于cuda - 您如何将用 C 编写的自定义 CUDA 内核链接到 Rust 项目?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49348859/

相关文章:

numpy - 有什么方法可以使用多个 GPU 来提升矩阵乘法?

cuda - 在不同的源文件上使用相同的常量内存数组

c++ - 需要避免在需要使用链接器脚本 *.ld 文件的静态库中对 `WinMain' 的 undefined reference

c - 静态库链接问题(没有引用main,或者没有引用)

rust - 为什么 BTreeMap 是可散列的,而不是 HashMap?

rust - Rust 常量表达式可以使用 Default 之类的特性吗?

rust - 我可以有效地从 HashSet 中弹出吗?

ubuntu - 如何在elementaryOS(Ubuntu 14.04)上安装CUDA 7.0

cudaThreadSynchronize() 要求

c - objcopy 如何计算将 elf 文件的哪些部分插入到输出文件中?