python - 用 cython 声明一个 numpy 数组会奇怪地产生大量开销

标签 python numpy cython

我正在使用 Cython 重写我的一些 Python 代码。

按照建议in the documentation我开始用优化的 cython 定义替换我的 python 数组。

特别是,以下应该是声明 numpy 数组的“最佳”方式:

# cython: profile=True
# cython: boundscheck=False
# cython: wraparound=False

import numpy as np
cimport numpy as np

cpdef test():

    cdef np.ndarray[np.int_t, ndim=1] seeds_idx = np.empty(10, dtype=np.int)

    pass

但是,通过 cython -a my_file.pyx 分析上述代码生成的 html 文件显示如下:

+10:     cdef np.ndarray[np.int_t, ndim=1] seeds_idx = np.empty(10, dtype=np.int)
  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_empty); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_int); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_4) < 0) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_tuple_, __pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 10, __pyx_L1_error)
  __pyx_t_5 = ((PyArrayObject *)__pyx_t_4);
  {
    __Pyx_BufFmt_StackElem __pyx_stack[1];
    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
      __pyx_v_seeds_idx = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.buf = NULL;
      __PYX_ERR(0, 10, __pyx_L1_error)
    } else {__pyx_pybuffernd_seeds_idx.diminfo[0].strides = __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_seeds_idx.diminfo[0].shape = __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.shape[0];
    }
  }
  __pyx_t_5 = 0;
  __pyx_v_seeds_idx = ((PyArrayObject *)__pyx_t_4);
  __pyx_t_4 = 0;
/* … */
  __pyx_tuple_ = PyTuple_Pack(1, __pyx_int_10); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple_);
  __Pyx_GIVEREF(__pyx_tuple_);

这是在 Python 2.7 上使用 cython 0.24 和 numpy 1.10.4 获得的。

另一方面,非常简单的声明 seeds_idx = np.empty(10) 导致:

+10:     seeds_idx = np.empty(10)
  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_empty); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_v_seeds_idx = __pyx_t_1;
  __pyx_t_1 = 0;
/* … */
  __pyx_tuple_ = PyTuple_Pack(1, __pyx_int_10); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple_);
  __Pyx_GIVEREF(__pyx_tuple_);

这里出了什么问题(如果有的话)?谢谢!

最佳答案

如评论所述,这里没有任何问题,因此无需担心。另外,请记住,您正在检查为简单分配生成的代码,任何差异都不会影响性能。

尽管有一个小勘误,在第二种情况下 seeds_idx = np.empty(10) 应该更改为 seeds_idx = np.empty(10, dtype=np.int) 匹配第一个。

如果你添加它,那么为存储函数调用的参数而创建的字典 (np.empty) 也会被添加:

__pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);

np.int 的查找:

__pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_3);
__pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_int); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
__Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

新创建的字典中的参数设置完成:

if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_4) < 0) __PYX_ERR(0, 8, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

除此之外,它们之间的唯一区别如下:

if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 10, __pyx_L1_error)
  __pyx_t_5 = ((PyArrayObject *)__pyx_t_4);
  {
    __Pyx_BufFmt_StackElem __pyx_stack[1];
    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
      __pyx_v_seeds_idx = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.buf = NULL;
      __PYX_ERR(0, 10, __pyx_L1_error)
    } else {__pyx_pybuffernd_seeds_idx.diminfo[0].strides = __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_seeds_idx.diminfo[0].shape = __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.shape[0];
    }
  }

其中,as stated in the documentation you linked很可能是为了快速访问数据缓冲区而执行的。

到目前为止,最好的选择是使用 typed memoryviews .这些是在 cython 中使用数组的原生方式,很可能是最简单的方式。 Their performance is usually on par with numpy arrays如果没有,您可以随时轻松地在它们之间切换。

关于python - 用 cython 声明一个 numpy 数组会奇怪地产生大量开销,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37408151/

相关文章:

python - 尝试使用 beautifulsoup 抓取页面,获取大量我想消除的元素数据(我假设)

python - Django - 指定 Django 管理员应该使用哪个模型管理器

python - 如何通过 numpy 获得与 pandas.autocorr() 相同的结果?

python - 如何打包cython模块?

python:运行超时进程并捕获stdout、stderr和退出状态

python - 有问题的文件名 - 是拉丁文吗?我如何将其转换为 ascii?

python - 如何从终端输出 python 图形?

Python:将一个数据框中的一列与另一个数据框中的列相除,累积总和

c++ - Cython 迭代器错误

python - 将内置函数类型转换为方法类型(在 Python 3 中)