python - Python 中的多元线性回归?

标签 python machine-learning linear-regression

我正在尝试在虚拟数据上构建多元线性回归,但我不断收到溢出错误。 假设这是一个虚拟数据。

print(x_train)
col1 col2 target 
0.18 0.89 109.85
1.0  0.26 155.72
0.92 0.11 137.66
0.07 0.37 76.17
0.85 0.16 139.75
0.99 0.41 162.6
0.87 0.47 151.77

print(x_test)
0.49 0.18
0.57 0.83
0.56 0.64
0.76 0.18

这是我为实现多个特征的线性回归而编写的代码。谁能告诉我我的线性回归的实现是否正确?如果它是正确的那么为什么我总是收到溢出错误。

import numpy as np

def data():
    # prepare data
    x_train = np.array(train_data)[:, :-1]
    y_train = np.array(train_data)[:, -1]
    x_test = np.array(test_data)
    return x_train, y_train, x_test

def normalize(y):
    return (y - y.min()) / (y.max() - y.min()) 

def linear_regression(x_train, y_train, epochs=300):
    y_train = normalize(y_train)
    rows, columns = x_train.shape
    weights = np.zeros((columns))
    intercept = 0
    for x in range(epochs):
        for i in range(len(x_train)):
            prev_weights = weights
            weights += intercept + prev_weights * x_train[i] - y_train[i]
            intercept += (intercept+(prev_weights*x_train[i])-y_train[i]).dot(x_train[i])
    return weights, intercept

def predict(x_test, weights, intercept):
    y_pred = []
    for i in range(len(x_test)):
        y_pred.append(weights.dot(x_test[i]) + intercept)
    return y_pred

def main():
    x_train, y_train, x_test = data()
    weights, intercept = linear_regression(x_train, y_train, epochs=300)
    y_pred = predict(x_test, weights, intercept)
    for i in y_pred:
        print(str(i))

if __name__=='__main__':
    main()

结果:

-inf
-inf
-inf
-inf

/srv/conda/lib/python3.6/site-packages/ipykernel_launcher.py:25: RuntimeWarning: overflow encountered in add

最佳答案

这是一种不同的方法,Python 3D 曲面拟合器使用您的数据进行 3D 散点图、3D 曲面图和等高线图。您应该能够在 3 空间中单击、拖动和旋转 3D 绘图以进行目视检查。这里拟合的曲面是一个平面,不需要测试和训练分割,因为直接给出 RMSE 和 R 平方,您可以看到曲面。只需重新拟合所有数据即可。

scatter

surface

contour

import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import  Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt

graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels

# 3D contour plot lines
numberOfContourLines = 16

# x, y, z = col1, col2, target
xData = numpy.array([0.18, 1.0, 0.92, 0.07, 0.85, 0.99, 0.87])
yData = numpy.array([0.89, 0.26, 0.11, 0.37, 0.16, 0.41, 0.47])
zData = numpy.array([109.85, 155.72, 137.66, 76.17, 139.75, 162.6, 151.77])


def func(data, a, b, c):
    x = data[0]
    y = data[1]
    return (a * x) + (y * b) + c


def SurfacePlot(func, data, fittedParameters):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)

    matplotlib.pyplot.grid(True)
    axes = Axes3D(f)

    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    xModel = numpy.linspace(min(x_data), max(x_data), 20)
    yModel = numpy.linspace(min(y_data), max(y_data), 20)
    X, Y = numpy.meshgrid(xModel, yModel)

    Z = func(numpy.array([X, Y]), *fittedParameters)

    axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)

    axes.scatter(x_data, y_data, z_data) # show data along with plotted surface

    axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label
    axes.set_zlabel('Z Data') # Z axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot or else there can be memory and process problems


def ContourPlot(func, data, fittedParameters):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    xModel = numpy.linspace(min(x_data), max(x_data), 20)
    yModel = numpy.linspace(min(y_data), max(y_data), 20)
    X, Y = numpy.meshgrid(xModel, yModel)

    Z = func(numpy.array([X, Y]), *fittedParameters)

    axes.plot(x_data, y_data, 'o')

    axes.set_title('Contour Plot') # add a title for contour plot
    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
    matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours

    plt.show()
    plt.close('all') # clean up after using pyplot or else there can be memory and process problems


def ScatterPlot(data):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)

    matplotlib.pyplot.grid(True)
    axes = Axes3D(f)
    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    axes.scatter(x_data, y_data, z_data)

    axes.set_title('Scatter Plot (click-drag with mouse)')
    axes.set_xlabel('X Data')
    axes.set_ylabel('Y Data')
    axes.set_zlabel('Z Data')

    plt.show()
    plt.close('all') # clean up after using pyplot or else there can be memory and process problems



if __name__ == "__main__":

    data = [xData, yData, zData]

    initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example

    # here a non-linear surface fit is made with scipy's curve_fit()
    fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)

    ScatterPlot(data)
    SurfacePlot(func, data, fittedParameters)
    ContourPlot(func, data, fittedParameters)

    print('fitted prameters', fittedParameters)

    modelPredictions = func(data, *fittedParameters) 

    absError = modelPredictions - zData

    SE = numpy.square(absError) # squared errors
    MSE = numpy.mean(SE) # mean squared errors
    RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
    Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
    print('RMSE:', RMSE)
    print('R-squared:', Rsquared)

关于python - Python 中的多元线性回归?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58681666/

相关文章:

python - 找出一个点是否位于点云的凸包中的有效方法是什么?

java - encog java 导出网络权重

tensorflow - 最小化前馈神经网络的tensorflow.js中的损失

python - Gensim doest_match 函数如何工作?

python - 使用月份日期时间合并多个数据框

tensorflow - 为什么在 Google Cloud ML 上训练的 TensorFlow 模型比本地训练的模型更准确?

algorithm - 在这种情况下,哪种机器学习算法最好?

matlab - Fminsearch Matlab(非线性回归)

machine-learning - 梯度下降随机更新 - 停止准则和更新规则 - 机器学习

python - 如何在 python 中为 OrderedDict 列表中的重复值抛出 AssertionError