cuda - 按 map 计数扩展和增加数据

标签 cuda thrust

我对推力(cuda)还很陌生,并且发现了一些具有挑战性的东西。


(要简化的编辑问题)我有一个输入向量和一个 map :

 vector = [8,23,46,500,2,7,91,91]
    map = [1, 0, 4,  3,1,0, 5, 3]

我想扩展它并将值增加为:

new_vec = [8,46,47,48,49,500,501,502,2,91,92,93,94,95,91,92,93]
  • 我意识到推力/examples/expand.cu 示例已经主要做到了这一点,但我不知道如何通过 map 计数有效地增加数据值。
  • 如果有人能够解释如何修改此示例以实现此目的,将会很有帮助。

最佳答案

调整 Thrust expand 示例以使用 exclusive_scan_by_key 对子序列中的每个输出元素进行排名,然后按该排名递增:

#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/gather.h>
#include <thrust/scan.h>
#include <thrust/fill.h>
#include <thrust/copy.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/functional.h>
#include <iterator>
#include <iostream>


template<typename Vector>
void print(const std::string& s, const Vector& v)
{
  typedef typename Vector::value_type T;

  std::cout << s;
  thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, " "));
  std::cout << std::endl;
}


template<typename InputIterator1,
         typename InputIterator2,
         typename OutputIterator>
void expand_and_increment(InputIterator1 first1,
                          InputIterator1 last1,
                          InputIterator2 first2,
                          OutputIterator output)
{
  typedef typename thrust::iterator_difference<InputIterator1>::type difference_type;

  difference_type input_size  = thrust::distance(first1, last1);
  difference_type output_size = thrust::reduce(first1, last1);

  // scan the counts to obtain output offsets for each input element
  thrust::device_vector<difference_type> output_offsets(input_size);
  thrust::exclusive_scan(first1, last1, output_offsets.begin()); 

  print("output_offsets ", output_offsets);

  // scatter the nonzero counts into their corresponding output positions
  thrust::device_vector<difference_type> output_indices(output_size);
  thrust::scatter_if
    (thrust::counting_iterator<difference_type>(0),
     thrust::counting_iterator<difference_type>(input_size),
     output_offsets.begin(),
     first1,
     output_indices.begin());

  // compute max-scan over the output indices, filling in the holes
  thrust::inclusive_scan
    (output_indices.begin(),
     output_indices.end(),
     output_indices.begin(),
     thrust::maximum<difference_type>());

  print("output_indices ", output_indices);

  // gather input values according to index array (output = first2[output_indices])
  OutputIterator output_end = output; thrust::advance(output_end, output_size);
  thrust::gather(output_indices.begin(),
                 output_indices.end(),
                 first2,
                 output);

  // rank output_indices
  thrust::device_vector<difference_type> ranks(output_size);
  thrust::exclusive_scan_by_key(output_indices.begin(), output_indices.end(),
                                thrust::make_constant_iterator<difference_type>(1),
                                ranks.begin());

  print("ranks ", ranks);

  // increment output by ranks
  thrust::transform(output, output + output_size, ranks.begin(), output, thrust::placeholders::_1 + thrust::placeholders::_2);
}


int main(void)
{
  int values[] = {8,23,46,500,2,7,91,91};
  int counts[] = {1, 0, 4,  3,1,0, 5, 3};

  size_t input_size  = sizeof(counts) / sizeof(int);
  size_t output_size = thrust::reduce(counts, counts + input_size);

  // copy inputs to device
  thrust::device_vector<int> d_counts(counts, counts + input_size);
  thrust::device_vector<int> d_values(values, values + input_size);
  thrust::device_vector<int> d_output(output_size);

  // expand values according to counts
  expand_and_increment(d_counts.begin(), d_counts.end(),
                       d_values.begin(),
                       d_output.begin());

  std::cout << "Expanding and incrementing values according to counts" << std::endl;
  print(" counts ", d_counts);
  print(" values ", d_values);
  print(" output ", d_output);

  return 0;
}

输出:

$ nvcc expand_and_increment.cu -run
output_offsets 0 1 1 5 8 9 9 14 
output_indices 0 2 2 2 2 3 3 3 4 6 6 6 6 6 7 7 7 
ranks 0 0 1 2 3 0 1 2 0 0 1 2 3 4 0 1 2 
Expanding and incrementing values according to counts
 counts 1 0 4 3 1 0 5 3 
 values 8 23 46 500 2 7 91 91 
 output 8 46 47 48 49 500 501 502 2 91 92 93 94 95 91 92 93 

关于cuda - 按 map 计数扩展和增加数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15822635/

相关文章:

python - 使用 Python 和 numba-pro 的 CUDA 内核中的数组

c++ - 分解在 Visual C++ 中不起作用的函数

c++ - cuda thrust::for_each with thrust::counting_iterator

c++ - rand() 在通过仿函数调用时生成相同的随机数集(即使在使用 srand(time(NULL)) 播种后)

c++ - 使用构造函数中本地声明的 device_vector 中的 device_vector::data() 方法初始化结构内部的指针是否安全?

c++ - CUDA Thrust reduce_by_key 使用更少的内存

c++ - 如何存储 CUDA 内核函数的 bool 结果

gcc - 由于 gcc 编译器版本不受支持,Caffe 编译失败

visual-studio - 使用 nsight 的 "Start CUDA debugging"时忽略断点

cuda - 如何将 Thrust::host_vector<char> 复制到 char*