Python幂律适用于使用ODR的数据中的上限和不对称错误

我正在尝试使用 python 将一些数据拟合到幂律。问题是我的一些点是上限，我不知道如何将其包含在拟合例程中。

在数据中，我将上限作为 y 中的误差设置为 1，而其余的要小得多。您可以将此错误设置为 0 并更改 uplims 列表生成器，但这样就很糟糕了。

代码如下:

import numpy as np
import matplotlib.pyplot as plt
from scipy.odr import *

# Initiate some data
x = [1.73e-04, 5.21e-04, 1.57e-03, 4.71e-03, 1.41e-02, 4.25e-02, 1.28e-01, 3.84e-01, 1.15e+00]
x_err = [1e-04, 1e-04, 1e-03, 1e-03, 1e-02, 1e-02, 1e-01, 1e-01, 1e-01]
y = [1.26e-05, 8.48e-07, 2.09e-08, 4.11e-09, 8.22e-10, 2.61e-10, 4.46e-11, 1.02e-11, 3.98e-12]
y_err = [1, 1, 2.06e-08, 2.5e-09, 5.21e-10, 1.38e-10, 3.21e-11, 1, 1]

# Define upper limits
uplims = np.ones(len(y_err),dtype='bool')
for i in range(len(y_err)):
    if y_err[i]<1:
        uplims[i]=0
    else:
        uplims[i]=1

# Define a function (power law in our case) to fit the data with.
def function(p, x):
     m, c = p
     return m*x**(-c)

# Create a model for fitting.
model = Model(function)

# Create a RealData object using our initiated data from above.
data = RealData(x, y, sx=x_err, sy=y_err)

# Set up ODR with the model and data.
odr = ODR(data, model, beta0=[1e-09, 2])
odr.set_job(fit_type=0)   # 0 is full ODR and 2 is least squares; AFAIK, it doesn't change within errors
# more details in https://docs.scipy.org/doc/scipy/reference/generated/scipy.odr.ODR.set_job.html

# Run the regression.
out = odr.run()


# Use the in-built pprint method to give us results.
#out.pprint()   #this prints much information, but normally we don't need it, just the parameters and errors; the residual variation is the reduced chi square estimator

print('amplitude = %5.2e +/- %5.2e \nindex = %5.2f +/- %5.2f \nchi square = %12.8f'% (out.beta[0], out.sd_beta[0], out.beta[1], out.sd_beta[1], out.res_var))

# Generate fitted data.
x_fit = np.linspace(x[0], x[-1], 1000)    #to do the fit only within the x interval; we can always extrapolate it, of course
y_fit = function(out.beta, x_fit)


# Generate a plot to show the data, errors, and fit.
fig, ax = plt.subplots()

ax.errorbar(x, y, xerr=x_err, yerr=y_err, uplims=uplims, linestyle='None', marker='x')
ax.loglog(x_fit, y_fit)
ax.set_xlabel(r'$x$')
ax.set_ylabel(r'$f(x) = m·x^{-c}$')
ax.set_title('Power Law fit')

plt.show()

拟合结果为:

amplitude = 3.42e-12 +/- 5.32e-13
index =  1.33 +/-  0.04
chi square =   0.01484021

正如您在图中看到的那样，第一个点和最后两个点是上限，拟合并未将它们考虑在内。此外，在倒数第二点，即使严格禁止，拟合也会超过它。

我需要拟合知道这个限制非常严格，而不是尝试拟合点本身，而只是将它们视为限制。我如何使用 odr 例程(或任何其他使我适合并给我卡方式估计器的代码)来做到这一点？

请考虑到我需要轻松地将函数更改为其他泛化，因此 powerlaw 模块之类的东西是不可取的。

谢谢!

最佳答案

此答案与 this 有关帖子，我在这里讨论与 x 的拟合和 y错误。因此，这不需要 ODR模块，但可以手动完成。因此，可以使用 leastsq或 minimize .关于限制，我在其他帖子中明确表示，如果可能，我会尽量避免它们。这也可以在这里完成，虽然编程和数学的细节有点麻烦，特别是如果它应该是稳定和万无一失的。我只会给出一个大概的想法。假设我们想要 y0 > m * x0**(-c) .在日志形式中，我们可以将其写为 eta0 > mu - c * xeta0 . IE。有一个alpha这样 eta0 = mu - c * xeta0 + alpha**2 .其他不等式也一样。对于第二个上限，您会得到 beta**2但是你可以决定哪个是较小的，所以你自动满足另一个条件。同样的事情适用于 gamma**2 的下限。和一个 delta**2 .假设我们可以使用 alpha和 gamma .我们也可以结合不平等条件来将这两者联系起来。最后我们可以安装一个 sigma和 alpha = sqrt(s-t)* sigma / sqrt( sigma**2 + 1 ) , 其中s和 t源自不等式。 sigma / sqrt( sigma**2 + 1 )函数只是让 alpha 的一个选项在一定范围内变化，即alpha**2 < s-t radicand 可能变为负数的事实表明存在无解的情况。与 alpha已知，mu因此 m被计算。所以拟合参数是c和 sigma ，它考虑了不平等并使m取决于。我厌倦了它并且它可以工作，但手头的版本不是最稳定的版本。我会根据要求发布它。

由于我们已经有了手工制作的残差函数，因此我们还有第二个选择。我们只是介绍我们自己的chi**2功能与使用minimize ，这允许约束。作为minimize和 constraints关键字解决方案非常灵活，残差函数很容易为其他函数修改，而不仅仅是 m * x**( -c )整体构造相当灵活。它看起来如下:

import matplotlib.pyplot as plt
import numpy as np
from random import random, seed
from scipy.optimize import minimize,leastsq

seed(7563)
fig1 = plt.figure(1)


###for gaussion distributed errors
def boxmuller(x0,sigma):
    u1=random()
    u2=random()
    ll=np.sqrt(-2*np.log(u1))
    z0=ll*np.cos(2*np.pi*u2)
    z1=ll*np.cos(2*np.pi*u2)
    return sigma*z0+x0, sigma*z1+x0


###for plotting ellipses
def ell_data(a,b,x0=0,y0=0):
    tList=np.linspace(0,2*np.pi,150)
    k=float(a)/float(b)
    rList=[a/np.sqrt((np.cos(t))**2+(k*np.sin(t))**2) for t in tList]
    xyList=np.array([[x0+r*np.cos(t),y0+r*np.sin(t)] for t,r in zip(tList,rList)])
    return xyList

###function to fit
def f(x,m,c):
    y = abs(m) * abs(x)**(-abs(c))
    #~ print y,x,m,c
    return y


###how to rescale the ellipse to make fitfunction a tangent
def elliptic_rescale(x, m, c, x0, y0, sa, sb):
    #~ print "e,r",x,m,c
    y=f( x, m, c ) 
    #~ print "e,r",y
    r=np.sqrt( ( x - x0 )**2 + ( y - y0 )**2 )
    kappa=float( sa ) / float( sb )
    tau=np.arctan2( y - y0, x - x0 )
    new_a=r*np.sqrt( np.cos( tau )**2 + ( kappa * np.sin( tau ) )**2 )
    return new_a

###residual function to calculate chi-square
def residuals(parameters,dataPoint):#data point is (x,y,sx,sy)
    m, c = parameters
    #~ print "m c", m, c
    theData = np.array(dataPoint)
    best_t_List=[]
    for i in range(len(dataPoint)):
        x, y, sx, sy = dataPoint[i][0], dataPoint[i][1], dataPoint[i][2], dataPoint[i][3]
        #~ print "x, y, sx, sy",x, y, sx, sy
        ###getthe point on the graph where it is tangent to an error-ellipse
        ed_fit = minimize( elliptic_rescale, x , args = ( m, c, x, y, sx, sy ) )
        best_t = ed_fit['x'][0]
        best_t_List += [best_t]
        #~ exit(0)
    best_y_List=[ f( t, m, c ) for t in best_t_List ]
    ##weighted distance not squared yet, as this is done by scipy.optimize.leastsq
    wighted_dx_List = [ ( x_b - x_f ) / sx for x_b, x_f, sx in zip( best_t_List,theData[:,0], theData[:,2] ) ]
    wighted_dy_List = [ ( x_b - x_f ) / sx for x_b, x_f, sx in zip( best_y_List,theData[:,1], theData[:,3] ) ]
    return wighted_dx_List + wighted_dy_List


def chi2(params, pnts):  
    r = np.array( residuals( params, pnts ) )
    s = sum( [ x**2 for x in  r]  )
    #~ print params,s,r
    return s


def myUpperIneq(params,pnt):
    m, c = params
    x,y=pnt
    return y - f( x, m, c )


def myLowerIneq(params,pnt):
    m, c = params
    x,y=pnt
    return f( x, m, c ) - y


###to create some test data
def test_data(m,c, xList,const_sx,rel_sx,const_sy,rel_sy):
    yList=[f(x,m,c) for x in xList]
    xErrList=[ boxmuller(x,const_sx+x*rel_sx)[0] for x in xList]
    yErrList=[ boxmuller(y,const_sy+y*rel_sy)[0] for y in yList]
    return xErrList,yErrList


###some start values
mm_0=2.3511
expo_0=.3588
csx,rsx=.01,.07
csy,rsy=.04,.09,

limitingPoints=dict()
limitingPoints[0]=np.array([[.2,5.4],[.5,5.0],[5.1,.9],[5.7,.9]])
limitingPoints[1]=np.array([[.2,5.4],[.5,5.0],[5.1,1.5],[5.7,1.2]])
limitingPoints[2]=np.array([[.2,3.4],[.5,5.0],[5.1,1.1],[5.7,1.2]])
limitingPoints[3]=np.array([[.2,3.4],[.5,5.0],[5.1,1.7],[5.7,1.2]])

####some data
xThData=np.linspace(.2,5,15)
yThData=[ f(x, mm_0, expo_0) for x in xThData]

#~ ###some noisy data
xNoiseData,yNoiseData=test_data(mm_0,  expo_0, xThData, csx,rsx, csy,rsy)
xGuessdError=[csx+rsx*x for x in xNoiseData]
yGuessdError=[csy+rsy*y for y in yNoiseData]



for testing in range(4):
    ###Now fitting with limits
    zipData=zip(xNoiseData,yNoiseData, xGuessdError, yGuessdError)    
    estimate = [ 2.4, .3 ]
    con0={'type': 'ineq', 'fun': myUpperIneq, 'args': (limitingPoints[testing][0],)}
    con1={'type': 'ineq', 'fun': myUpperIneq, 'args': (limitingPoints[testing][1],)}
    con2={'type': 'ineq', 'fun': myLowerIneq, 'args': (limitingPoints[testing][2],)}
    con3={'type': 'ineq', 'fun': myLowerIneq, 'args': (limitingPoints[testing][3],)}
    myResult = minimize( chi2 , estimate , args=( zipData, ), constraints=[ con0, con1, con2, con3 ]  )
    print "############"
    print myResult


    ###plot that
    ax=fig1.add_subplot(4,2,2*testing+1)
    ax.plot(xThData,yThData)
    ax.errorbar(xNoiseData,yNoiseData, xerr=xGuessdError, yerr=yGuessdError, fmt='none',ecolor='r')


    testX = np.linspace(.2,6,25)
    testY = np.fromiter( ( f( x, myResult.x[0], myResult.x[1] ) for x in testX ), np.float)

    bx=fig1.add_subplot(4,2,2*testing+2)
    bx.plot(xThData,yThData)
    bx.errorbar(xNoiseData,yNoiseData, xerr=xGuessdError, yerr=yGuessdError, fmt='none',ecolor='r')
    ax.plot(limitingPoints[testing][:,0],limitingPoints[testing][:,1],marker='x', linestyle='')
    bx.plot(limitingPoints[testing][:,0],limitingPoints[testing][:,1],marker='x', linestyle='')
    ax.plot(testX, testY, linestyle='--')
    bx.plot(testX, testY, linestyle='--')

    bx.set_xscale('log')
    bx.set_yscale('log')

plt.show()

提供结果

############
  status: 0
 success: True
    njev: 8
    nfev: 36
     fun: 13.782127248002116
       x: array([ 2.15043226,  0.35646436])
 message: 'Optimization terminated successfully.'
     jac: array([-0.00377715,  0.00350225,  0.        ])
     nit: 8
############
  status: 0
 success: True
    njev: 7
    nfev: 32
     fun: 41.372277637885716
       x: array([ 2.19005695,  0.23229378])
 message: 'Optimization terminated successfully.'
     jac: array([ 123.95069313, -442.27114677,    0.        ])
     nit: 7
############
  status: 0
 success: True
    njev: 5
    nfev: 23
     fun: 15.946621924326545
       x: array([ 2.06146362,  0.31089065])
 message: 'Optimization terminated successfully.'
     jac: array([-14.39131606, -65.44189298,   0.        ])
     nit: 5
############
  status: 0
 success: True
    njev: 7
    nfev: 34
     fun: 88.306027468763432
       x: array([ 2.16834392,  0.14935514])
 message: 'Optimization terminated successfully.'
     jac: array([ 224.11848736, -791.75553417,    0.        ])
     nit: 7

我检查了四个不同的限制点(行)。结果以对数刻度(列)正常显示。通过一些额外的工作，您也可能会遇到错误。

非对称错误更新 老实说，目前我不知道如何处理这个属性(property)。天真地，我会定义自己的非对称损失函数，类似于 this post . 与 x和 y错误我是按象限来做的，而不是只检查正面或负面。因此，我的错误椭圆变成了四个相连的部分。尽管如此，还是有些道理的。为了测试和展示它是如何工作的，我做了一个线性函数的例子。我想OP可以根据他的要求将这两段代码组合起来。

在线性拟合的情况下，它看起来像这样:

import matplotlib.pyplot as plt
import numpy as np
from random import random, seed
from scipy.optimize import minimize,leastsq

#~ seed(7563)
fig1 = plt.figure(1)
ax=fig1.add_subplot(2,1,1)
bx=fig1.add_subplot(2,1,2)

###function to fit, here only linear for testing.
def f(x,m,y0):
    y = m * x +y0
    return y

###for gaussion distributed errors
def boxmuller(x0,sigma):
    u1=random()
    u2=random()
    ll=np.sqrt(-2*np.log(u1))
    z0=ll*np.cos(2*np.pi*u2)
    z1=ll*np.cos(2*np.pi*u2)
    return sigma*z0+x0, sigma*z1+x0


###for plotting ellipse quadrants
def ell_data(aN,aP,bN,bP,x0=0,y0=0):
    tPPList=np.linspace(0, 0.5 * np.pi, 50)
    kPP=float(aP)/float(bP)
    rPPList=[aP/np.sqrt((np.cos(t))**2+(kPP*np.sin(t))**2) for t in tPPList]

    tNPList=np.linspace( 0.5 * np.pi, 1.0 * np.pi, 50)
    kNP=float(aN)/float(bP)
    rNPList=[aN/np.sqrt((np.cos(t))**2+(kNP*np.sin(t))**2) for t in tNPList]

    tNNList=np.linspace( 1.0 * np.pi, 1.5 * np.pi, 50)
    kNN=float(aN)/float(bN)
    rNNList=[aN/np.sqrt((np.cos(t))**2+(kNN*np.sin(t))**2) for t in tNNList]

    tPNList = np.linspace( 1.5 * np.pi, 2.0 * np.pi, 50)
    kPN = float(aP)/float(bN)
    rPNList = [aP/np.sqrt((np.cos(t))**2+(kPN*np.sin(t))**2) for t in tPNList]

    tList = np.concatenate( [ tPPList, tNPList, tNNList, tPNList] )
    rList = rPPList + rNPList+ rNNList + rPNList

    xyList=np.array([[x0+r*np.cos(t),y0+r*np.sin(t)] for t,r in zip(tList,rList)])
    return xyList


###how to rescale the ellipse to touch fitfunction at point (x,y)
def elliptic_rescale_asymmetric(x, m, c, x0, y0, saN, saP, sbN, sbP , getQuadrant=False):
    y=f( x, m, c ) 
    ###distance to function
    r=np.sqrt( ( x - x0 )**2 + ( y - y0 )**2 )
    ###angle to function
    tau=np.arctan2( y - y0, x - x0 )
    quadrant=0
    if tau >0:
        if tau < 0.5 * np.pi: ## PP
            kappa=float( saP ) / float( sbP )
            quadrant=1
        else:
            kappa=float( saN ) / float( sbP )
            quadrant=2
    else:
        if tau < -0.5 * np.pi: ## PP
            kappa=float( saN ) / float( sbN)
            quadrant=3
        else:
            kappa=float( saP ) / float( sbN )
            quadrant=4
    new_a=r*np.sqrt( np.cos( tau )**2 + ( kappa * np.sin( tau ) )**2 )
    if quadrant == 1 or quadrant == 4:
        rel_a=new_a/saP
    else:
        rel_a=new_a/saN
    if getQuadrant:
        return rel_a, quadrant, tau
    else:
        return rel_a

### residual function to calculate chi-square
def residuals(parameters,dataPoint):#data point is (x,y,sxN,sxP,syN,syP)
    m, c = parameters
    theData = np.array(dataPoint)
    bestTList=[]
    qqList=[]
    weightedDistanceList = []
    for i in range(len(dataPoint)):
        x, y, sxN, sxP, syN, syP = dataPoint[i][0], dataPoint[i][1], dataPoint[i][2], dataPoint[i][3], dataPoint[i][4], dataPoint[i][5]
        ### get the point on the graph where it is tangent to an error-ellipse
        ### i.e. smallest ellipse touching the graph
        edFit = minimize(  elliptic_rescale_asymmetric, x , args = ( m, c, x, y, sxN, sxP, syN, syP ) )
        bestT = edFit['x'][0]
        bestTList += [ bestT ]
        bestA,qq , tau= elliptic_rescale_asymmetric( bestT, m, c , x, y, aN, aP, bN, bP , True)
        qqList += [ qq ]
    bestYList=[ f( t, m, c ) for t in bestTList ]
    ### weighted distance not squared yet, as this is done by scipy.optimize.leastsq or manual chi2 function
    for counter in range(len(dataPoint)):
        xb=bestTList[counter]
        xf=dataPoint[counter][0]
        yb=bestYList[counter]
        yf=dataPoint[counter][1]
        quadrant=qqList[counter]
        if quadrant == 1:
            sx, sy = sxP, syP
        elif quadrant == 2:
            sx, sy = sxN, syP
        elif quadrant == 3:
            sx, sy = sxN, syN
        elif quadrant == 4:
            sx, sy = sxP, syN
        else:
            assert 0
        weightedDistanceList += [ ( xb - xf ) / sx, ( yb - yf ) / sy ]
    return weightedDistanceList


def chi2(params, pnts):  
    r = np.array( residuals( params, pnts ) )
    s = np.fromiter( ( x**2 for x in  r), np.float ).sum()
    return s

####...to make data with asymmetric error (fixed); for testing only
def noisy_data(xList,m0,y0, sxN,sxP,syN,syP):
    yList=[ f(x, m0, y0) for x in xList]
    gNList=[boxmuller(0,1)[0] for dummy in range(len(xList))]
    xerrList=[]
    for x,err in zip(xList,gNList):
        if err < 0:
            xerrList += [ sxP * err + x ]
        else:
            xerrList += [ sxN * err + x ]
    gNList=[boxmuller(0,1)[0] for dummy in range(len(xList))]
    yerrList=[]
    for y,err in zip(yList,gNList):
        if err < 0:
            yerrList += [ syP * err + y ]
        else:
            yerrList += [ syN * err + y ]
    return xerrList, yerrList


###some start values
m0=1.3511
y0=-2.2
aN, aP, bN, bP=.2,.5, 0.9, 1.6

#### some data
xThData=np.linspace(.2,5,15)
yThData=[ f(x, m0, y0) for x in xThData]
xThData0=np.linspace(-1.2,7,3)
yThData0=[ f(x, m0, y0) for x in xThData0]

### some noisy data
xErrList,yErrList = noisy_data(xThData, m0, y0, aN, aP, bN, bP)

###...and the fit
dataToFit=zip(xErrList,yErrList,  len(xThData)*[aN], len(xThData)*[aP], len(xThData)*[bN], len(xThData)*[bP])
fitResult = minimize(chi2, (m0,y0) , args=(dataToFit,) )
fittedM, fittedY=fitResult.x
yThDataF=[ f(x, fittedM, fittedY) for x in xThData0]


### plot that
for cx in [ax,bx]:
    cx.plot([-2,7], [f(x, m0, y0 ) for x in [-2,7]])

ax.errorbar(xErrList,yErrList, xerr=[ len(xThData)*[aN],len(xThData)*[aP] ], yerr=[ len(xThData)*[bN],len(xThData)*[bP] ], fmt='ro')

for x,y in zip(xErrList,yErrList)[:]:
    xEllList,yEllList = zip( *ell_data(aN,aP,bN,bP,x,y) )
    ax.plot(xEllList,yEllList ,c='#808080')
    ### rescaled
    ### ...as well as a scaled version that touches the original graph. This gives the error shortest distance to that graph
    ed_fit = minimize( elliptic_rescale_asymmetric, 0 ,args=(m0, y0, x, y, aN, aP, bN, bP ) )
    best_t = ed_fit['x'][0]
    best_a,qq , tau= elliptic_rescale_asymmetric( best_t, m0, y0 , x, y, aN, aP, bN, bP , True)
    xEllList,yEllList = zip( *ell_data( aN * best_a, aP * best_a, bN * best_a, bP * best_a, x, y) )
    ax.plot( xEllList, yEllList, c='#4040a0' )

###plot the fit

bx.plot(xThData0,yThDataF)
bx.errorbar(xErrList,yErrList, xerr=[ len(xThData)*[aN],len(xThData)*[aP] ], yerr=[ len(xThData)*[bN],len(xThData)*[bP] ], fmt='ro')
for x,y in zip(xErrList,yErrList)[:]:
    xEllList,yEllList = zip( *ell_data(aN,aP,bN,bP,x,y) )
    bx.plot(xEllList,yEllList ,c='#808080')
    ####rescaled
    ####...as well as a scaled version that touches the original graph. This gives the error shortest distance to that graph
    ed_fit = minimize( elliptic_rescale_asymmetric, 0 ,args=(fittedM, fittedY, x, y, aN, aP, bN, bP ) )
    best_t = ed_fit['x'][0]
    #~ print best_t
    best_a,qq , tau= elliptic_rescale_asymmetric( best_t, fittedM, fittedY , x, y, aN, aP, bN, bP , True)
    xEllList,yEllList = zip( *ell_data( aN * best_a, aP * best_a, bN * best_a, bP * best_a, x, y) )
    bx.plot( xEllList, yEllList, c='#4040a0' )

plt.show()

哪些地 block

上图显示了原始线性函数和使用非对称高斯误差从中生成的一些数据。绘制了误差线，以及分段误差椭圆(灰色……并重新缩放以触及线性函数，蓝色)。下图还显示了拟合函数以及重新缩放的分段椭圆，触及拟合函数。

关于Python幂律适用于使用ODR的数据中的上限和不对称错误，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/46832659/

Python幂律适用于使用ODR的数据中的上限和不对称错误

上一篇：Python/Pandas 分箱数据 Timedelta

下一篇：python - Jupyter notebook python 在 Windows 10 上崩溃