python - Python y(n_samples, n_targets) 中高斯过程回归的实现

我正在使用 x = day1、day2、day3 等处理一些价格数据。在第 1 天，我有 15 个价格点 (y)，第 2 天，我有 30 个价格点 (y2)，依此类推。

当我阅读高斯过程回归的文档时: http://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcess.html#sklearn.gaussian_process.GaussianProcess.fit

y 是形状 (n_samples, n_targets)，具有要预测的输出观察值。

我假设 n_targets 指的是我每天观察到的所有价格点。但是，每天的价格点数是不一样的。我想知道如何处理这样的情况？

非常感谢!

最佳答案

我仅使用 numpy 在 python 中实现了用于回归的高斯过程。我的目标是通过实现来理解它。它可能对您有所帮助。

https://github.com/muatik/machine-learning-examples/blob/master/gaussianprocess2.ipynb

import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set(color_codes=True)

%matplotlib inline


class GP(object):

    @classmethod
    def kernel_bell_shape(cls, x, y, delta=1.0):
        return np.exp(-1/2.0 * np.power(x - y, 2) / delta)

    @classmethod
    def kernel_laplacian(cls, x, y, delta=1):
        return np.exp(-1/2.0 * np.abs(x - y) / delta)

    @classmethod
    def generate_kernel(cls, kernel, delta=1):
        def wrapper(*args, **kwargs):
            kwargs.update({"delta": delta})
            return kernel(*args, **kwargs)
        return wrapper

    def __init__(self, x, y, cov_f=None, R=0):
        super().__init__()
        self.x = x
        self.y = y
        self.N = len(self.x)
        self.R = R

        self.sigma = []
        self.mean = []
        self.cov_f = cov_f if cov_f else self.kernel_bell_shape
        self.setup_sigma()

    @classmethod
    def calculate_sigma(cls, x, cov_f, R=0):
        N = len(x)
        sigma = np.ones((N, N))
        for i in range(N):
            for j in range(i+1, N):
                cov = cov_f(x[i], x[j])
                sigma[i][j] = cov
                sigma[j][i] = cov

        sigma = sigma + R * np.eye(N)
        return sigma

    def setup_sigma(self):
        self.sigma = self.calculate_sigma(self.x, self.cov_f, self.R)

    def predict(self, x):
        cov = 1 + self.R * self.cov_f(x, x)
        sigma_1_2 = np.zeros((self.N, 1))
        for i in range(self.N):
            sigma_1_2[i] = self.cov_f(self.x[i], x)

        # SIGMA_1_2 * SIGMA_1_1.I * (Y.T -M)
        # M IS ZERO
        m_expt = (sigma_1_2.T * np.mat(self.sigma).I) * np.mat(self.y).T
        # sigma_expt = cov - (sigma_1_2.T * np.mat(self.sigma).I) * sigma_1_2
        sigma_expt = cov + self.R - (sigma_1_2.T * np.mat(self.sigma).I) * sigma_1_2
        return m_expt, sigma_expt

    @staticmethod
    def get_probability(sigma, y, R):
        multiplier = np.power(np.linalg.det(2 * np.pi * sigma), -0.5)
        return multiplier * np.exp(
            (-0.5) * (np.mat(y) * np.dot(np.mat(sigma).I, y).T))

    def optimize(self, R_list, B_list):

        def cov_f_proxy(delta, f):
            def wrapper(*args, **kwargs):
                kwargs.update({"delta": delta})
                return f(*args, **kwargs)
            return wrapper

        best = (0, 0, 0)
        history = []
        for r in R_list:
            best_beta = (0, 0)
            for b in B_list:
                sigma = gaus.calculate_sigma(self.x, cov_f_proxy(b, self.cov_f), r)
                marginal = b* float(self.get_probability(sigma, self.y, r))
                if marginal > best_beta[0]:
                    best_beta = (marginal, b)
            history.append((best_beta[0], r, best_beta[1]))
        return sorted(history)[-1], np.mat(history)

现在你可以尝试如下:

# setting up a GP
x = np.array([-2, -1, 0, 3.5, 4]);
y = np.array([4.1, 0.9, 2, 12.3, 15.8])
gaus = GP(x, y)

x_guess = np.linspace(-5, 16, 400)
y_pred = np.vectorize(gaus.predict)(x_guess)

plt.scatter(x, y, c="black")
plt.plot(x_guess, y_pred[0], c="b")
plt.plot(x_guess, y_pred[0] - np.sqrt(y_pred[1]) * 3, "r:")
plt.plot(x_guess, y_pred[0] + np.sqrt(y_pred[1]) * 3, "r:")

正则化参数的作用

def create_case(kernel, R=0):
    x = np.array([-2, -1, 0, 3.5, 4]);
    y = np.array([4.1, 0.9, 2, 12.3, 15.8])
    gaus = GP(x, y, kernel, R=R)

    x_guess = np.linspace(-4, 6, 400)
    y_pred = np.vectorize(gaus.predict)(x_guess)

    plt.scatter(x, y, c="black")
    plt.plot(x_guess, y_pred[0], c="b")
    plt.plot(x_guess, y_pred[0] - np.sqrt(y_pred[1]) * 3, "r:")
    plt.plot(x_guess, y_pred[0] + np.sqrt(y_pred[1]) * 3, "r:")

plt.figure(figsize=(16, 16))
for i, r in enumerate([0.0001, 0.03, 0.09, 0.8, 1.5, 5.0]):
    plt.subplot("32{}".format(i+1))
    plt.title("kernel={}, delta={}, beta={}".format("bell shape", 1, r))
    create_case(
        GP.generate_kernel(GP.kernel_bell_shape, delta=1), R=r)

plt.figure(figsize=(16, 16))
for i, d in enumerate([0.05, 0.5, 1, 3.2, 5.0, 7.0]):
    plt.subplot("32{}".format(i+1))
    plt.title("kernel={}, delta={}, beta={}".format("kernel_laplacian", d, 1))
    create_case(
        GP.generate_kernel(GP.kernel_bell_shape, delta=d), R=0)

关于python - Python y(n_samples, n_targets) 中高斯过程回归的实现，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/35166684/

python - Python y(n_samples, n_targets) 中高斯过程回归的实现

正则化参数的作用

上一篇：python - 为什么 print(0.3) 打印 0.3 而不是 0.30000000000000004

下一篇：python - 重叠的 yticklabels : Is it possible to control cell size of heatmap in seaborn?