python - 自定义 numpy 类型导致 numpy.mean 崩溃

我用 C++ 中的基本功能定义了我自己的 numpy 类型。数组创建有效，并且可以添加和划分这种类型的数组。可以使用 np.sum 减少数组，但在使用 np.mean 时解释器会崩溃。

import numpy as np
from example import Dual

x = np.array([Dual(1, 2), Dual(1, 3), Dual(1, 1)])
print(x.dtype)  # dtype(Dual)  -- correct type

# Mean 1
m = x.sum() / x.size
print(m.x, m.y)  # 1.0 2.0  -- correct result

# Mean 2
x.mean()  # -- CRASH

通过在我的 C++ 代码中插入调试输出，我验证了在我的代码中正确计算了加法和除法(实际执行了 3/3 和 6/3)。所以函数在计算我的结果后崩溃了。 (Windows 告诉我 Python 停止工作，所以大概发生了后台错误。)

mean 与 sum()/n 有何不同？如何修改我的类型以使其工作？

我试图将我的 C++ 代码简化以创建一个最小的完整示例。不幸的是，minimal 在定义工作 numpy 类型时仍然相当长。这是实现:

#include <Python.h>
#include <numpy/arrayobject.h>
#include <numpy/npy_math.h>
#include <numpy/ufuncobject.h>

#include "structmember.h"

#include <iostream>
#include <math.h>


struct Dual
{
public:
    double x;
    double y;

    Dual(double x, double y) : x(x), y(y) { }

    inline static Dual add(const Dual& a, const Dual& b) {
        return Dual(a.x + b.x, a.y + b.y); }

    inline static Dual div(const Dual& a, const Dual& b) {
        return Dual(a.x / b.x, a.y / b.y); }
};


typedef struct {
    PyObject_HEAD
    Dual ob_val;
} PyDual;


PyArray_Descr* dual_descr;


static PyObject *
PyDual_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    PyDual *self;

    self = (PyDual *)type->tp_alloc(type, 0);
    if (self == NULL) {
        return NULL;
    }

    double x = 0;
    double y = 0;

    static char *kwlist[] = {"x", "y", NULL};
    if(!PyArg_ParseTupleAndKeywords(args, kwds, "d|d", kwlist, &x, &y))
        return NULL;

    self->ob_val = Dual(x, y);
    return (PyObject *)self;
}


static PyTypeObject PyDual_Type = {
    PyVarObject_HEAD_INIT(NULL, 0)
    "example.Dual",               /* tp_name */
    sizeof(PyDual),                /* tp_basicsize */
    0,                             /* tp_itemsize */
    0,                             /* tp_dealloc */
    0,                             /* tp_print */
    0,                             /* tp_getattr */
    0,                             /* tp_setattr */
    0,                             /* tp_reserved */
    0,                             /* tp_repr */
    0,                             /* tp_as_number */
    0,                             /* tp_as_sequence */
    0,                             /* tp_as_mapping */
    0,                             /* tp_hash  */
    0,                             /* tp_call */
    0,                             /* tp_str */
    0,                             /* tp_getattro */
    0,                             /* tp_setattro */
    0,                             /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT,            /* tp_flags */
    "Dual value/derivative",       /* tp_doc */
    0,                             /* tp_traverse */
    0,                             /* tp_clear */
    0,                             /* tp_richcompare */
    0,                             /* tp_weaklistoffset */
    0,                             /* tp_iter */
    0,                             /* tp_iternext */
    0,                             /* tp_methods */
    0,                             /* tp_members */
    0,                             /* tp_getset */
    0,                             /* tp_base */
    0,                             /* tp_dict */
    0,                             /* tp_descr_get */
    0,                             /* tp_descr_set */
    0,                             /* tp_dictoffset */
    0,                             /* tp_init */
    0,                             /* tp_alloc */
    PyDual_new,                    /* tp_new */
};


static int PyDual_Check(PyObject* obj)
{
    return PyObject_IsInstance(obj, (PyObject*)&PyDual_Type);
}


static PyObject*
PyDual_from_Dual(Dual d)
{
    PyDual *obj = (PyDual*)PyDual_Type.tp_alloc(&PyDual_Type, 0);
    if(obj)
        obj->ob_val = d;
    return (PyObject*)obj;
}


static PyMemberDef PyDual_members[] = {
    {"x", T_DOUBLE, offsetof(PyDual, ob_val) + offsetof(Dual, x), READONLY, "value"},
    {"y", T_DOUBLE, offsetof(PyDual, ob_val) + offsetof(Dual, y), READONLY, "derivative"},
    {NULL}
};


//----------------------------------------------------------------------------

static PyArray_ArrFuncs _PyDual_ArrFuncs;


static void
DUAL_copyswap(Dual *dst, Dual *src, int swap, void *NPY_UNUSED(arr))
{
  PyArray_Descr *descr;
  descr = PyArray_DescrFromType(NPY_DOUBLE);
  descr->f->copyswapn(dst, sizeof(double), src, sizeof(double), 2, swap, NULL);
  Py_DECREF(descr);
}


static int DUAL_setitem(PyObject* item, void* data, void* ap)
{
  Dual d(0, 0);
  if(PyDual_Check(item)) {
    memcpy(data, &(((PyDual *)item)->ob_val),sizeof(Dual));
  } else if(PySequence_Check(item) && PySequence_Length(item)==4) {
    auto element = PySequence_GetItem(item, 0);
    if(element == NULL) { return -1; } /* Not a sequence, or other failure */
    d.x = PyFloat_AsDouble(element);
    Py_DECREF(element);
    element = PySequence_GetItem(item, 1);
    if(element == NULL) { return -1; } /* Not a sequence, or other failure */
    d.y = PyFloat_AsDouble(element);
    Py_DECREF(element);
  } else {
    PyErr_SetString(PyExc_TypeError,
                    "Unknown input to DUAL_setitem");
    return -1;
  }
  return 0;
}


static PyObject *DUAL_getitem(void* data, void* arr)
{
  Dual d(0, 0);
  memcpy(&d, data, sizeof(Dual));
  return PyDual_from_Dual(d);
}


template<typename T>
scalar_to_dual(T* ip, Dual* op, npy_intp n,
               PyArrayObject *NPY_UNUSED(aip), PyArrayObject *NPY_UNUSED(aop))
{
    while(n--)
    {
        op->x = *ip++;
        op->y = op->x;
    }
}


static void register_cast_function(int sourceType, int destType, PyArray_VectorUnaryFunc *castfunc)
{
  PyArray_Descr *descr = PyArray_DescrFromType(sourceType);
  PyArray_RegisterCastFunc(descr, destType, castfunc);
  PyArray_RegisterCanCast(descr, destType, NPY_NOSCALAR);
  Py_DECREF(descr);
}


static void sum_ufunc(char** args, npy_intp* dimensions, npy_intp* steps, void* data)
{
    char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];
    npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
    npy_intp n = dimensions[0];
    for(npy_intp i=0; i<n; i++, ip1+=is1, ip2+=is2, op1+=os1)
    {
        const Dual in1 = *(Dual*)ip1;
        const Dual in2 = *(Dual*)ip2;
        *((Dual*)op1) = Dual::add(in1, in2);
    }
}


static void div_ufunc(char** args, npy_intp* dimensions, npy_intp* steps, void* data)
{
    char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];
    npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
    npy_intp n = dimensions[0];
    for(npy_intp i=0; i<n; i++, ip1+=is1, ip2+=is2, op1+=os1)
    {
        const Dual in1 = *(Dual*)ip1;
        const Dual in2 = *(Dual*)ip2;
        *((Dual*)op1) = Dual::div(in1, in2);

        std::cout << in1.x << "/" << in2.x << std::endl;
        std::cout << in1.y << "/" << in2.y << std::endl;
    }
}


//----------------------------------------------------------------------------


static struct PyModuleDef examplemodule = {
   PyModuleDef_HEAD_INIT,
   "example",   /* name of module */
   NULL, /* module documentation, may be NULL */
   -1,       /* size of per-interpreter state of the module,
                or -1 if the module keeps state in global variables. */
   NULL, NULL, NULL, NULL, NULL
};


PyMODINIT_FUNC PyInit_example(void)
{
    // initialize numpy
    import_array(); if(PyErr_Occurred()) return NULL;
    import_umath(); if(PyErr_Occurred()) return NULL;
    auto numpy = PyImport_ImportModule("numpy"); if(!numpy) return NULL;
    auto numpy_dict = PyModule_GetDict(numpy); if(!numpy_dict) return NULL;

    PyDual_Type.tp_members = PyDual_members;

    PyDual_Type.tp_base = &PyGenericArrType_Type;

    if( PyType_Ready(&PyDual_Type) < 0)
        return NULL;

    PyArray_InitArrFuncs(&_PyDual_ArrFuncs);
    _PyDual_ArrFuncs.copyswap = (PyArray_CopySwapFunc*)DUAL_copyswap;
    _PyDual_ArrFuncs.setitem = (PyArray_SetItemFunc*)DUAL_setitem;
    _PyDual_ArrFuncs.getitem = (PyArray_GetItemFunc*)DUAL_getitem;

    dual_descr = PyObject_New(PyArray_Descr, &PyArrayDescr_Type);
    dual_descr->typeobj = &PyDual_Type;
    dual_descr->kind = 'x';
    dual_descr->type = 'x';
    dual_descr->byteorder = '=';
    dual_descr->flags = 0;
    dual_descr->type_num = 0; // assigned at registration
    dual_descr->elsize = 8*2;
    dual_descr->alignment = 8;
    dual_descr->subarray = NULL;
    dual_descr->fields = NULL;
    dual_descr->names = NULL;
    dual_descr->f = &_PyDual_ArrFuncs;
    dual_descr->metadata = NULL;
    dual_descr->c_metadata = NULL;

    Py_INCREF(&PyDual_Type);
    auto dualNum = PyArray_RegisterDataType(dual_descr);
    if(dualNum < 0) return NULL;

    int AD = dual_descr->type_num;

    register_cast_function(NPY_BOOL, dualNum, (PyArray_VectorUnaryFunc*)scalar_to_dual<npy_bool>);
    register_cast_function(NPY_LONG, dualNum, (PyArray_VectorUnaryFunc*)scalar_to_dual<npy_long>);
    register_cast_function(NPY_DOUBLE, dualNum, (PyArray_VectorUnaryFunc*)scalar_to_dual<npy_double>);

    int arg_types[] = {AD, AD, AD};

    PyUFuncObject* ufunc = (PyUFuncObject*)PyObject_GetAttrString(numpy, "add");
    PyUFunc_RegisterLoopForType(ufunc, AD, sum_ufunc, arg_types, NULL);

    ufunc = (PyUFuncObject*)PyObject_GetAttrString(numpy, "true_divide");
    PyUFunc_RegisterLoopForType(ufunc, AD, div_ufunc, arg_types, NULL);

    auto module = PyModule_Create(&examplemodule);
    if( module == NULL )
        return NULL;

    Py_INCREF(&PyDual_Type);
    PyModule_AddObject(module, "Dual", (PyObject*)&PyDual_Type);
    return module;
}

最佳答案

回想起来，错误非常明显，与实现的 numpy 部分无关，而是与 Python 对象的初始化有关。感谢@hpaulj 的评论，这让我走上了正确的轨道。

两者都是 x.mean和 np.mean调用np.core._methods.mean ，崩溃发生在行中

ret = ret.dtype.type(ret / rcount)

这是一个多余的类型转换(至少在我的例子中)，它等同于

Dual(dual_object)

换句话说，Dual使用 Dual 的实例调用构造函数作为论据。但是，构造函数的实现需要两个 double 作为参数:

static char *kwlist[] = {"x", "y", NULL};
if(!PyArg_ParseTupleAndKeywords(args, kwds, "d|d", kwlist, &x, &y))
    return NULL;

PyArg_ParseTupleAndKeywords尝试强制施放时崩溃 Dual至 double .这是另一个需要研究的问题，但是通过在构造函数中实现相关转换，原来的问题就解决了。

关于python - 自定义 numpy 类型导致 numpy.mean 崩溃，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/44288911/

python - 自定义 numpy 类型导致 numpy.mean 崩溃

上一篇：c++ - 如何指示 g++ 在 n 秒后停止

下一篇：c++ - 在模板类的模板成员函数中设置优先于构造函数