python - 进行反向传播和梯度下降时,numpy 数组和矩阵出现问题

标签 python numpy neural-network backpropagation gradient-descent

我正在关注this Dan Shiffman 制作的关于创建小型“玩具”神经网络库的视频教程系列。

本教程使用 JS 和一个矩阵库,他在本系列的前面教过如何编码。不过,我使用 numpy。

this视频 他编写了梯度下降和反向传播的程序。但是,因为我使用的是 numpy,所以我的代码似乎不太有效。如果有人可以帮助我,我将非常感激!

这是我的代码:

import numpy as np
import math

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def dsigmoid(x):
    return x * (1 - x)

class NeuralNetwork:
    def __init__(self, Inum, Hnum, Onum):
        self.Inum = Inum
        self.Hnum = Hnum
        self.Onum = Onum

        self.lr = 0.1

        self.weightsIH = np.random.rand(self.Hnum, self.Inum) * 2 - 1
        self.weightsHO = np.random.rand(self.Onum, self.Hnum) * 2 - 1

        self.biasH = np.random.rand(self.Hnum) * 2 - 1
        self.biasO = np.random.rand(self.Onum) * 2 - 1

    def feedForward(self, inputs):
        hidden = np.dot(self.weightsIH, np.array(inputs))
        hidden = hidden + self.biasH
        hidden = sigmoid(hidden)

        outputs = np.dot(self.weightsHO, hidden)
        outputs = outputs + self.biasO
        outputs = sigmoid(outputs)

        return outputs

    def train(self, inputs, targets):
        # Feed Forward
        hidden = np.dot(self.weightsIH, np.array(inputs))
        hidden = hidden + self.biasH
        hidden = sigmoid(hidden)

        outputs = np.dot(self.weightsHO, hidden)
        outputs = outputs + self.biasO
        outputs = sigmoid(outputs)

        # Calculate errors
        errorsO = np.array(targets) - outputs

        # Calculate gradients with derivitive of sigmoid
        # TODO: Use numpy for gradient calculation (if possible)
        gradients = dsigmoid(outputs)
        gradients = gradients * errorsO
        gradients = gradients * self.lr

        # Calculate deltas
        hiddenT = hidden[np.newaxis]
        weightsHODeltas = np.dot(gradients, hiddenT)

        # Adjust weights by deltas
        self.weightsHO = self.weightsHO + weightsHODeltas

        # Adjust bias by gradients
        self.biasO = self.biasO + gradients


        errorsH = np.transpose(self.weightsHO) * errorsO


        # Calculate gradients with derivitive of sigmoid
        # TODO: Use numpy for gradient calculation (if possible)
        gradientsH = dsigmoid(hidden)
        gradientsH = gradientsH * errorsH
        gradientsH = gradientsH * self.lr

        # Calculate deltas
        inputsT = np.array(inputs)[np.newaxis]
        weightsIHDeltas = np.dot(gradientsH, inputsT)

        # Adjust weights by deltas
        self.weightsIH = self.weightsIH + weightsIHDeltas

        # Adjust bias by gradients
        self.biasO = self.biasO + gradientsH

这是我正在运行的代码:

from NN import NeuralNetwork
from random import shuffle

def main():
    nn = NeuralNetwork(2, 2, 1)

    dataset = [
        {
            "inputs": [0, 0],
            "outputs": 0
        },
        {
            "inputs": [0, 1],
            "outputs": 1
        },
        {
            "inputs": [1, 0],
            "outputs": 1
        },
        {
            "inputs": [1, 1],
            "outputs": 0
        }
    ]


    for x in range(100):
        for data in dataset:
            print(data)
            nn.train(data["inputs"], data["outputs"])
        shuffle(dataset)

    for data in dataset:
        print(data)
        nn.feedForward(data["inputs"])

if __name__ == '__main__':
    main()

这是我收到的错误消息:

Traceback (most recent call last):
File "c:\Users\ghost\Desktop\Notes\Programming\Machine Learning\NN From Scratch\Yet Another Neural Network Library\main.py", line 38, in <module>
main()
File "c:\Users\ghost\Desktop\Notes\Programming\Machine Learning\NN From Scratch\Yet Another Neural Network Library\main.py", line 30, in main
nn.train(data["inputs"], data["outputs"])
File "c:\Users\ghost\Desktop\Notes\Programming\Machine Learning\NN From Scratch\Yet Another Neural Network Library\NN.py", line 77, in train
weightsIHDeltas = np.dot(gradientsH, inputsT)
ValueError: shapes (2,2) and (1,2) not aligned: 2 (dim 1) != 1 (dim 0)

最佳答案

问题是您对 numpy 数组的维度感到困惑。在 numpy 中编写 ML 代码时,处理列向量会更容易,因为这是在纸上推导方程时所做的事情。另外,您的代码中存在逻辑错误。下面是更正后的代码:

import numpy as np
import math
from random import shuffle

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def dsigmoid(x):
    return x * (1 - x)

class NeuralNetwork:
    def __init__(self, Inum, Hnum, Onum):
        self.Inum = Inum
        self.Hnum = Hnum
        self.Onum = Onum

        self.lr = 0.1

        self.weightsIH = np.random.rand(self.Hnum, self.Inum) * 2 - 1
        self.weightsHO = np.random.rand(self.Onum, self.Hnum) * 2 - 1

        self.biasH = np.random.rand(self.Hnum) * 2 - 1
        self.biasO = np.random.rand(self.Onum) * 2 - 1

    def feedForward(self, inputs):
        hidden = np.dot(self.weightsIH, np.array(inputs))
        hidden = hidden + self.biasH
        hidden = sigmoid(hidden)

        outputs = np.dot(self.weightsHO, hidden)
        outputs = outputs + self.biasO
        outputs = sigmoid(outputs)

        return outputs

    def train(self, inputs, targets):
        """
        NOTE : Always deal with column vectors as you do in maths.
        """
        # Feed Forward
        hidden = np.dot(self.weightsIH, np.array(inputs))
        hidden = hidden + self.biasH
        hidden = sigmoid(hidden)

        outputs = np.dot(self.weightsHO, hidden)
        outputs = outputs + self.biasO
        outputs = sigmoid(outputs)
        # Calculate errors
        errorsO = np.array(targets) - outputs
        errorsO = errorsO[:, np.newaxis] # errorsO is a column now

        # Calculate gradients with derivitive of sigmoid
        gradientsO_ = dsigmoid(outputs)
        # Convert gradientsO also to column vector before taking product
        gradientsO_ = gradientsO_[:, np.newaxis] * errorsO # Hadamard product to get a new column vector
        gradientsO = gradientsO_ * self.lr

        # Calculate deltas
        hiddenT = hidden[:, np.newaxis] # hidden is a column now
        weightsHODeltas = np.dot(hiddenT, gradientsO.T)

        # Adjust weights by deltas
        self.weightsHO = self.weightsHO + weightsHODeltas.reshape(self.weightsHO.shape)

        # Adjust bias by gradients
        self.biasO = self.biasO + gradientsO.reshape(self.biasO.shape)

        # Hidden layer
        errorsH = np.dot(np.transpose(self.weightsHO), gradientsO_) # You had a conceptual mistake here. You don't incoporate learning rate here

        # Calculate gradients with derivitive of sigmoid
        gradientsH = dsigmoid(hidden)
        gradientsH = gradientsH[:, np.newaxis] * errorsH
        gradientsH = gradientsH * self.lr

        # Calculate deltas
        inputsT = np.array(inputs)[:, np.newaxis]
        weightsIHDeltas = np.dot(inputsT, gradientsH.T)

        # Adjust weights by deltas
        self.weightsIH = self.weightsIH + weightsIHDeltas.reshape(self.weightsIH.shape)

        # Adjust bias by gradients
        self.biasH = self.biasH + gradientsH.reshape(self.biasH.shape)

def main():
    nn = NeuralNetwork(2, 2, 1)

    dataset = [
        {
            "inputs": [0, 0],
            "outputs": 0
        },
        {
            "inputs": [0, 1],
            "outputs": 1
        },
        {
            "inputs": [1, 0],
            "outputs": 1
        },
        {
            "inputs": [1, 1],
            "outputs": 0
        }
    ]


    for x in range(100):
        for data in dataset:
            # print(data)
            nn.train(data["inputs"], data["outputs"])
        shuffle(dataset)

    for data in dataset:
        print(data)
        nn.feedForward(data["inputs"])

if __name__ == '__main__':
    main()

P.S:此外,您可以通过不像前馈部分那样重复代码来提高代码质量。

关于python - 进行反向传播和梯度下降时,numpy 数组和矩阵出现问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51949122/

相关文章:

python - 如何强制Python忽略re.findall()语句中的re.DOTALL?

python - 使用Python在Zapier中获取图像

python - NumPy 数组元素的自定义排列

python - 如何将列表列表转换为字节?

neural-network - 在 pytorch 中反向传播时自动更新自定义层参数

machine-learning - 如何仅评估某些类别的 Keras 模型精度

python - read\xHH 在 Python 中作为原始二进制文件从文件中转义

python - Eager Execution 函数的输入不能是 Keras 符号张量

python - 将列表转换为 Numpy ndarray

matlab - 用神经网络确定函数参数