c++ - 采用迭代器元组并生成基本类型的一元函数

标签 c++ thrust

我有一个 zip 迭代器,它指向迭代器的元组。我想提供 Thrust::transform 和一个仿函数,该仿函数将使用元组抓取元素并生成标量输出。

我的程序无法运行,我不知道为什么。

我认为这可能与以下因素有关: CUDA thrust zip_iterator tuple transform_reduce ,但是更改仿函数的模板参数并没有达到目的。

以下代码编译:

#include <iostream>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/tuple.h>

typedef thrust::device_vector<double>::iterator realIter;
typedef thrust::tuple<realIter,realIter> Tup;
typedef thrust::zip_iterator<Tup> Zip;
typedef thrust::tuple<double,double> Tup2; //I tried replacing Tup with this in the functor

struct dummyOp : public thrust::unary_function<Tup, double> {
 __host__ __device__ double operator()(Tup &tup){
   double result = *thrust::get<0>(tup);
   return result;
 }
};

int main(){
  thrust::device_vector<double> A(4);
  thrust::device_vector<double> B(4);
  thrust::device_vector<double> C(4);

  A[0] =  1.; A[1] = 2.;
  A[2] =  3.; A[3] = 4.;

  B[0] =  4.;   B[1] = 3.;
  B[2] =  2.;   B[3] = 1.;

  Tup tup   = thrust::tuple<realIter,realIter>(A.begin(),B.begin());
  Zip zippy = thrust::zip_iterator<Tup>(tup);
  dummyOp f;

  // The following does not work:
  //thrust::transform(zippy, zippy + 4, C.begin(), f);

  std::cout << "A:\n";
  thrust::copy(A.begin(), A.end(), std::ostream_iterator<double>(std::cout, " "));
  std::cout << "\nB:\n";
  thrust::copy(B.begin(), B.end(), std::ostream_iterator<double>(std::cout, " "));
  std::cout << "\nC:\n";
  thrust::copy(C.begin(), C.end(), std::ostream_iterator<double>(std::cout, " "));
  std::cout << std::endl;
  std::cout <<"get<0>(zippy[0]) returns:\n" << thrust::get<0>(zippy[0]) << std::endl;
  std::cout <<"get<1>(zippy[1]) returns:\n" << thrust::get<1>(zippy[1]) << std::endl;

  return 0;
}

运行它会给出:

$ ./so2
A:
1 2 3 4
B:
4 3 2 1
C:
0 0 0 0
get<0>(zippy[0]) returns:
1
get<1>(zippy[1]) returns:
3

取消注释有问题的行后,我们有:

$ nvcc -arch=compute_35 so2.cu -o so2
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/internal_functional.h(322): error: function "dummyOp::operator()" cannot be called with the given argument list
            argument types are: (thrust::detail::tuple_of_iterator_references<double &, double &, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>)
            object type is: dummyOp
          detected during:
            instantiation of "thrust::detail::enable_if_non_const_reference_or_tuple_of_iterator_references<thrust::tuple_element<1, Tuple>::type>::type thrust::detail::unary_transform_functor<UnaryFunction>::operator()(Tuple) [with UnaryFunction=dummyOp, Tuple=thrust::detail::tuple_of_iterator_references<thrust::detail::tuple_of_iterator_references<double &, double &, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, double &, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/function.h(60): here
            instantiation of "Result thrust::detail::wrapped_function<Function, Result>::operator()(const Argument &) const [with Function=thrust::detail::unary_transform_functor<dummyOp>, Result=void, Argument=thrust::detail::tuple_of_iterator_references<thrust::detail::tuple_of_iterator_references<thrust::device_reference<double>, thrust::device_reference<double>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, thrust::device_reference<double>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/for_each.inl(57): here
            instantiation of "void thrust::system::cuda::detail::for_each_n_detail::for_each_kernel::operator()(thrust::system::cuda::detail::bulk_::parallel_group<thrust::system::cuda::detail::bulk_::concurrent_group<thrust::system::cuda::detail::bulk_::agent<1UL>, 0UL>, 0UL> &, Iterator, Function, Size) [with Iterator=thrust::zip_iterator<thrust::tuple<Zip, thrust::detail::normal_iterator<thrust::device_ptr<double>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, Function=thrust::detail::wrapped_function<thrust::detail::unary_transform_functor<dummyOp>, void>, Size=unsigned int]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/bulk/detail/apply_from_tuple.hpp(71): here
            instantiation of "void thrust::system::cuda::detail::bulk_::detail::apply_from_tuple(Function, const thrust::tuple<Arg1, Arg2, Arg3, Arg4, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> &) [with Function=thrust::system::cuda::detail::for_each_n_detail::for_each_kernel, Arg1=thrust::system::cuda::detail::bulk_::parallel_group<thrust::system::cuda::detail::bulk_::concurrent_group<thrust::system::cuda::detail::bulk_::agent<1UL>, 0UL>, 0UL> &, Arg2=thrust::zip_iterator<thrust::tuple<Zip, thrust::detail::normal_iterator<thrust::device_ptr<double>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, Arg3=thrust::detail::wrapped_function<thrust::detail::unary_transform_functor<dummyOp>, void>, Arg4=unsigned int]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/bulk/detail/closure.hpp(50): here
            instantiation of "void thrust::system::cuda::detail::bulk_::detail::closure<Function, Tuple>::operator()() [with Function=thrust::system::cuda::detail::for_each_n_detail::for_each_kernel, Tuple=thrust::tuple<thrust::system::cuda::detail::bulk_::parallel_group<thrust::system::cuda::detail::bulk_::concurrent_group<thrust::system::cuda::detail::bulk_::agent<1UL>, 0UL>, 0UL> &, thrust::zip_iterator<thrust::tuple<Zip, thrust::detail::normal_iterator<thrust::device_ptr<double>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, thrust::detail::wrapped_function<thrust::detail::unary_transform_functor<dummyOp>, void>, unsigned int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/bulk/detail/cuda_task.hpp(58): here
            [ 9 instantiation contexts not shown ]
            instantiation of "RandomAccessIterator thrust::system::cuda::detail::for_each(thrust::system::cuda::detail::execution_policy<DerivedPolicy> &, RandomAccessIterator, RandomAccessIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, RandomAccessIterator=thrust::zip_iterator<thrust::tuple<Zip, thrust::detail::normal_iterator<thrust::device_ptr<double>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, UnaryFunction=thrust::detail::unary_transform_functor<dummyOp>]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/for_each.inl(44): here
            instantiation of "InputIterator thrust::for_each(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, InputIterator=thrust::zip_iterator<thrust::tuple<Zip, thrust::detail::normal_iterator<thrust::device_ptr<double>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, UnaryFunction=thrust::detail::unary_transform_functor<dummyOp>]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/detail/generic/transform.inl(57): here
            instantiation of "OutputIterator thrust::system::detail::generic::transform(thrust::execution_policy<DerivedPolicy> &, InputIterator, InputIterator, OutputIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, InputIterator=Zip, OutputIterator=thrust::detail::normal_iterator<thrust::device_ptr<double>>, UnaryFunction=dummyOp]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/transform.inl(44): here
            instantiation of "OutputIterator thrust::transform(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, OutputIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, InputIterator=Zip, OutputIterator=thrust::detail::normal_iterator<thrust::device_ptr<double>>, UnaryFunction=dummyOp]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/transform.inl(142): here
            instantiation of "OutputIterator thrust::transform(InputIterator, InputIterator, OutputIterator, UnaryFunction) [with InputIterator=Zip, OutputIterator=thrust::detail::normal_iterator<thrust::device_ptr<double>>, UnaryFunction=dummyOp]"
so2.cu(36): here

更新: 虽然我仍然非常希望帮助理解上面代码中的问题,但以下方法有效:

struct dummyOp {
 template <typename Tuple>
 __host__ __device__ double operator()(Tuple tup){
   double result = thrust::get<0>(tup);
   return result;
 }
};

这个想法是从这里偷来的:https://github.com/thrust/thrust/blob/master/examples/arbitrary_transformation.cu

这并不构成我自己问题的答案,因为我仍然不明白原始代码中的类型有什么问题。

另外:将参数设为显式引用(元组 &)是行不通的。这是否意味着 tup 是按值传递的?

最佳答案

一般来说,当 zip 迭代器作为推力算法的一部分被取消引用时,它会创建一个传递给相关仿函数的基本类型元组(即不是迭代器或指针)。

当我们使用“违规行”分析代码的编译器输出时:

argument types are: (thrust::detail::tuple_of_iterator_references<double &, double &, 

我们观察到,当推力取消引用 zippy zip 迭代器时,它会生成一个对 double 项的引用元组。我们可以用它来告知仿函数预期的输入类型,即:

thrust::tuple<double &, double &>

由于这些是对基本类型的引用,因此我们没有必要在仿函数中取消引用它们(就好像它们是指针或迭代器一样)来获取它们的值。

以下修改后的代码合并了这些想法并且编译没有错误:

$ cat t4.cu
#include <iostream>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/tuple.h>

typedef thrust::device_vector<double>::iterator realIter;
typedef thrust::tuple<realIter,realIter> Tup;
typedef thrust::zip_iterator<Tup> Zip;
typedef thrust::tuple<double &,double &> Tup2; //I tried replacing Tup with this in the functor

struct dummyOp : public thrust::unary_function<Tup2, double> {
 __host__ __device__ double operator()(Tup2 &tup){
   double result = thrust::get<0>(tup);
   return result;
 }
};

int main(){
  thrust::device_vector<double> A(4);
  thrust::device_vector<double> B(4);
  thrust::device_vector<double> C(4);

  A[0] =  1.; A[1] = 2.;
  A[2] =  3.; A[3] = 4.;

  B[0] =  4.;   B[1] = 3.;
  B[2] =  2.;   B[3] = 1.;

  Tup tup   = thrust::tuple<realIter,realIter>(A.begin(),B.begin());
  Zip zippy = thrust::zip_iterator<Tup>(tup);
  dummyOp f;

  // The following does not work:
  thrust::transform(zippy, zippy + 4, C.begin(), f);

  std::cout << "A:\n";
  thrust::copy(A.begin(), A.end(), std::ostream_iterator<double>(std::cout, " "));
  std::cout << "\nB:\n";
  thrust::copy(B.begin(), B.end(), std::ostream_iterator<double>(std::cout, " "));
  std::cout << "\nC:\n";
  thrust::copy(C.begin(), C.end(), std::ostream_iterator<double>(std::cout, " "));
  std::cout << std::endl;
  std::cout <<"get<0>(zippy[0]) returns:\n" << thrust::get<0>(zippy[0]) << std::endl;
  std::cout <<"get<1>(zippy[1]) returns:\n" << thrust::get<1>(zippy[1]) << std::endl;

  return 0;
}
$ nvcc -arch=sm_61 -o t4 t4.cu
$ ./t4
A:
1 2 3 4
B:
4 3 2 1
C:
1 2 3 4
get<0>(zippy[0]) returns:
1
get<1>(zippy[1]) returns:
3
$

作为替代方案,在元组类型上模板化仿函数运算符当然是可行的,因为编译器会推导出必要的类型细节并实例化仿函数运算符的适当版本。

关于c++ - 采用迭代器元组并生成基本类型的一元函数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39796783/

相关文章:

c++ - QJson数据序列化顺序

c++ - 为什么结构体的 sizeof 不等于每个成员的 sizeof 之和?

c++ - 这个算法实现有什么问题 [Sieve of Erathosthene]

cuda - 推力:填充编译错误

memory-management - 了解 Thrust (CUDA) 内存使用情况

c++ - 是否存在一些 thrust::device_vector 等效库,以在 CUDA 内核中使用?

c++ - 使用折叠表达式填充数组时的语法问题

c++ - GNU/Linux : getting value of option 15 of DHCP without using `getnameinfo`

cuda - Thrust cuda中使用float4时出现内存问题

c++ - 从 Thrust::device_vector 到原始指针并返回?