memory - cudaMemGetInfo 在 GTX 690 的两个设备上返回相同数量的可用内存

标签 memory cuda multi-gpu

我在尝试追踪内存使用情况时遇到了 Geforce GTX 690 问题。一个简单的测试程序:

BOOST_AUTO_TEST_CASE(cudaMemoryTest) {

size_t mem_tot_0 = 0;
size_t mem_free_0 = 0;
size_t mem_tot_1 = 0;
size_t mem_free_1 = 0;

unsigned int mem_size = 100*1000000;

float* h_P = new float[mem_size];
for(size_t i = 0; i < mem_size; i++) {
    h_P[i] = 0.f;
}

cudaSetDevice(0);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory before copy dev 0: "<<mem_free_0<<std::endl;

cudaSetDevice(1);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_1, &mem_tot_1);
std::cout<<"Free memory before copy dev 1: "<<mem_free_1<<std::endl;

cudaSetDevice(0);
float* P;
cudaMalloc((void**)&P, mem_size*sizeof(float));
cudaMemcpy((void*)P, h_P,  mem_size*sizeof(float), cudaMemcpyHostToDevice);

cudaSetDevice(0);
cudaMemGetInfo(&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after copy  dev 0: "<<mem_free_0<<std::endl;

cudaSetDevice(1);
cudaMemGetInfo(&mem_free_1, &mem_tot_1);
std::cout<<"Free memory after copy dev 1: "<<mem_free_1<<std::endl;

BOOST_CHECK(mem_free_0 != mem_free_1);

cudaError_t err;
err = cudaGetLastError();
if(err!=cudaSuccess)
    std::cout<<"an error occurred"<<std::endl;

cudaSetDevice(0);
destroyMem(P);
delete [] h_P;
}

测试打印出:

1>  Free memory before copy dev 0: 1733173248
1>  Free memory before copy dev 1: 1688424448
1>  Free memory after copy  dev 0: 1289940992
1>  Free memory after copy dev 1: 1289940992     
CudaUtilsTest.cpp(47): error in "cudaMemoryTest": check mem_free_0 != mem_free_1 failed

问题是在分配后设备 1 上的空闲内存量与设备 0 上的完全相同,这不应该是这种情况,因此问题必须在 cudaMemGetInfo 和/或 cudaSetDevice 中。任何人都遇到过同样的问题,或者有人可以指出测试中存在其他根本性错误吗?

在 Windows 7、Visual Studio 2010、Cuda SDK 5.0 上运行代码,使用代码生成进行编译:compute_30,sm_30

编辑 22.4.2013

我继续试验这个问题,似乎 cudaSetDevice 工作正常,这可以从 cudaGetDevice 调用的结果中得到验证。我在内存分配测试后添加了设备 0 的重置,看起来 cudaMemGetInfo 返回的可用内存大小对于两个设备来说再次相同。我在自己的代码中检查了 cuda_error_t 的所有返回值,所有函数调用都返回 cudaSuccess。使用上述设置的 GTX 690 是否有人遇到过类似问题?

最近发送的测试代码:

BOOST_AUTO_TEST_CASE(cudaMemoryTest) {
size_t mem_tot_0 = 0;
size_t mem_free_0 = 0;
size_t mem_tot_1 = 0;
size_t mem_free_1 = 0;

int device_num = 0;

unsigned int mem_size = 100*1000000;

float* h_P = new float[mem_size];
for(size_t i = 0; i < mem_size; i++) {
    h_P[i] = 0.f;
}

cudaSetDevice(0);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory before copy dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_1, & mem_tot_1);
std::cout<<"Free memory before copy dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(0);
cudaGetDevice(&device_num);
float* P;
cudaMalloc((void**)&P, mem_size*sizeof(float));
cudaMemcpy((void*)P, h_P,  mem_size*sizeof(float), cudaMemcpyHostToDevice);
cudaMemGetInfo(&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after copy  dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaMemGetInfo(&mem_free_1, &mem_tot_1);
std::cout<<"Free memory after copy dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

BOOST_CHECK(mem_free_0 != mem_free_1);

cudaError_t err;
err = cudaGetLastError();
if(err!=cudaSuccess)
    std::cout<<"an error occurred"<<std::endl;

// Reset only device 0 and check both
cudaSetDevice(0);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after second reset of device 0, dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaMemGetInfo  (&mem_free_1, & mem_tot_1);
std::cout<<"Free memory after second device reset of device 0, dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

delete [] h_P;
    }

测试输出:

1>  Free memory before copy dev 0: 1794379776 Device: 0
1>  Free memory before copy dev 1: 1751728128 Device: 1
1>  Free memory after copy  dev 0: 1351696384 Device: 0
1>  Free memory after copy dev 1: 1351696384 Device: 1
1>  CudaUtilsTest.cpp(353): error in "cudaMemoryTest": check mem_free_0 != mem_free_1 failed
1>  Free memory after second reset of device 0, dev 0: 1751728128 Device: 0
1>  Free memory after second device reset of device 0, dev 1: 1751728128 Device: 1

最佳答案

这已通过更改 WDDM 驱动程序设置解决,如下所示:

Switch "Disable multi-GPU mode" from NVIDIA control panel at "3D-settings" -> "Configure Multi-GPU, Surround, PhysX".

[此答案作为社区 wiki 条目从评论中添加,以将问题从 CUDA 标记的未回答队列中移除]

关于memory - cudaMemGetInfo 在 GTX 690 的两个设备上返回相同数量的可用内存,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34999737/

相关文章:

linux - 在 Linux 上对 vm_copy 的等效系统调用

java - 为什么将 ""附加到字符串可以节省内存?

潜在未定义类型的 C++ 类型、函数和值别名

c++ - thrust::max_element 比较慢 cublasIsamax - 更有效的实现?

python - 为什么我在 Keras 中使用 multi_gpu_model 的训练速度比单 gpu 差?

linux - Nvidia GTX 590 的多 GPU GPUDirect 对等通信问题

linux - 软虚拟内存限制 (ulimit -v)

c - CFSTR() 是否分配内存?

linux - CUDA 6.5/Ubuntu 14.04/AWS EC2 GPU 实例 g2.2xlarge 缺少 drm.ko

ffmpeg - 如何使用 FFMPEG 多 GPU 进程