python - 进行反向传播和梯度下降时，numpy 数组和矩阵出现问题

我正在关注this Dan Shiffman 制作的关于创建小型“玩具”神经网络库的视频教程系列。

本教程使用 JS 和一个矩阵库，他在本系列的前面教过如何编码。不过，我使用 numpy。

在 this视频他编写了梯度下降和反向传播的程序。但是，因为我使用的是 numpy，所以我的代码似乎不太有效。如果有人可以帮助我，我将非常感激!

这是我的代码:

import numpy as np
import math

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def dsigmoid(x):
    return x * (1 - x)

class NeuralNetwork:
    def __init__(self, Inum, Hnum, Onum):
        self.Inum = Inum
        self.Hnum = Hnum
        self.Onum = Onum

        self.lr = 0.1

        self.weightsIH = np.random.rand(self.Hnum, self.Inum) * 2 - 1
        self.weightsHO = np.random.rand(self.Onum, self.Hnum) * 2 - 1

        self.biasH = np.random.rand(self.Hnum) * 2 - 1
        self.biasO = np.random.rand(self.Onum) * 2 - 1

    def feedForward(self, inputs):
        hidden = np.dot(self.weightsIH, np.array(inputs))
        hidden = hidden + self.biasH
        hidden = sigmoid(hidden)

        outputs = np.dot(self.weightsHO, hidden)
        outputs = outputs + self.biasO
        outputs = sigmoid(outputs)

        return outputs

    def train(self, inputs, targets):
        # Feed Forward
        hidden = np.dot(self.weightsIH, np.array(inputs))
        hidden = hidden + self.biasH
        hidden = sigmoid(hidden)

        outputs = np.dot(self.weightsHO, hidden)
        outputs = outputs + self.biasO
        outputs = sigmoid(outputs)

        # Calculate errors
        errorsO = np.array(targets) - outputs

        # Calculate gradients with derivitive of sigmoid
        # TODO: Use numpy for gradient calculation (if possible)
        gradients = dsigmoid(outputs)
        gradients = gradients * errorsO
        gradients = gradients * self.lr

        # Calculate deltas
        hiddenT = hidden[np.newaxis]
        weightsHODeltas = np.dot(gradients, hiddenT)

        # Adjust weights by deltas
        self.weightsHO = self.weightsHO + weightsHODeltas

        # Adjust bias by gradients
        self.biasO = self.biasO + gradients


        errorsH = np.transpose(self.weightsHO) * errorsO


        # Calculate gradients with derivitive of sigmoid
        # TODO: Use numpy for gradient calculation (if possible)
        gradientsH = dsigmoid(hidden)
        gradientsH = gradientsH * errorsH
        gradientsH = gradientsH * self.lr

        # Calculate deltas
        inputsT = np.array(inputs)[np.newaxis]
        weightsIHDeltas = np.dot(gradientsH, inputsT)

        # Adjust weights by deltas
        self.weightsIH = self.weightsIH + weightsIHDeltas

        # Adjust bias by gradients
        self.biasO = self.biasO + gradientsH

这是我正在运行的代码:

from NN import NeuralNetwork
from random import shuffle

def main():
    nn = NeuralNetwork(2, 2, 1)

    dataset = [
        {
            "inputs": [0, 0],
            "outputs": 0
        },
        {
            "inputs": [0, 1],
            "outputs": 1
        },
        {
            "inputs": [1, 0],
            "outputs": 1
        },
        {
            "inputs": [1, 1],
            "outputs": 0
        }
    ]


    for x in range(100):
        for data in dataset:
            print(data)
            nn.train(data["inputs"], data["outputs"])
        shuffle(dataset)

    for data in dataset:
        print(data)
        nn.feedForward(data["inputs"])

if __name__ == '__main__':
    main()

这是我收到的错误消息:

Traceback (most recent call last):
File "c:\Users\ghost\Desktop\Notes\Programming\Machine Learning\NN From Scratch\Yet Another Neural Network Library\main.py", line 38, in <module>
main()
File "c:\Users\ghost\Desktop\Notes\Programming\Machine Learning\NN From Scratch\Yet Another Neural Network Library\main.py", line 30, in main
nn.train(data["inputs"], data["outputs"])
File "c:\Users\ghost\Desktop\Notes\Programming\Machine Learning\NN From Scratch\Yet Another Neural Network Library\NN.py", line 77, in train
weightsIHDeltas = np.dot(gradientsH, inputsT)
ValueError: shapes (2,2) and (1,2) not aligned: 2 (dim 1) != 1 (dim 0)

最佳答案

问题是您对 numpy 数组的维度感到困惑。在 numpy 中编写 ML 代码时，处理列向量会更容易，因为这是在纸上推导方程时所做的事情。另外，您的代码中存在逻辑错误。下面是更正后的代码:

import numpy as np
import math
from random import shuffle

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def dsigmoid(x):
    return x * (1 - x)

class NeuralNetwork:
    def __init__(self, Inum, Hnum, Onum):
        self.Inum = Inum
        self.Hnum = Hnum
        self.Onum = Onum

        self.lr = 0.1

        self.weightsIH = np.random.rand(self.Hnum, self.Inum) * 2 - 1
        self.weightsHO = np.random.rand(self.Onum, self.Hnum) * 2 - 1

        self.biasH = np.random.rand(self.Hnum) * 2 - 1
        self.biasO = np.random.rand(self.Onum) * 2 - 1

    def feedForward(self, inputs):
        hidden = np.dot(self.weightsIH, np.array(inputs))
        hidden = hidden + self.biasH
        hidden = sigmoid(hidden)

        outputs = np.dot(self.weightsHO, hidden)
        outputs = outputs + self.biasO
        outputs = sigmoid(outputs)

        return outputs

    def train(self, inputs, targets):
        """
        NOTE : Always deal with column vectors as you do in maths.
        """
        # Feed Forward
        hidden = np.dot(self.weightsIH, np.array(inputs))
        hidden = hidden + self.biasH
        hidden = sigmoid(hidden)

        outputs = np.dot(self.weightsHO, hidden)
        outputs = outputs + self.biasO
        outputs = sigmoid(outputs)
        # Calculate errors
        errorsO = np.array(targets) - outputs
        errorsO = errorsO[:, np.newaxis] # errorsO is a column now

        # Calculate gradients with derivitive of sigmoid
        gradientsO_ = dsigmoid(outputs)
        # Convert gradientsO also to column vector before taking product
        gradientsO_ = gradientsO_[:, np.newaxis] * errorsO # Hadamard product to get a new column vector
        gradientsO = gradientsO_ * self.lr

        # Calculate deltas
        hiddenT = hidden[:, np.newaxis] # hidden is a column now
        weightsHODeltas = np.dot(hiddenT, gradientsO.T)

        # Adjust weights by deltas
        self.weightsHO = self.weightsHO + weightsHODeltas.reshape(self.weightsHO.shape)

        # Adjust bias by gradients
        self.biasO = self.biasO + gradientsO.reshape(self.biasO.shape)

        # Hidden layer
        errorsH = np.dot(np.transpose(self.weightsHO), gradientsO_) # You had a conceptual mistake here. You don't incoporate learning rate here

        # Calculate gradients with derivitive of sigmoid
        gradientsH = dsigmoid(hidden)
        gradientsH = gradientsH[:, np.newaxis] * errorsH
        gradientsH = gradientsH * self.lr

        # Calculate deltas
        inputsT = np.array(inputs)[:, np.newaxis]
        weightsIHDeltas = np.dot(inputsT, gradientsH.T)

        # Adjust weights by deltas
        self.weightsIH = self.weightsIH + weightsIHDeltas.reshape(self.weightsIH.shape)

        # Adjust bias by gradients
        self.biasH = self.biasH + gradientsH.reshape(self.biasH.shape)

def main():
    nn = NeuralNetwork(2, 2, 1)

    dataset = [
        {
            "inputs": [0, 0],
            "outputs": 0
        },
        {
            "inputs": [0, 1],
            "outputs": 1
        },
        {
            "inputs": [1, 0],
            "outputs": 1
        },
        {
            "inputs": [1, 1],
            "outputs": 0
        }
    ]


    for x in range(100):
        for data in dataset:
            # print(data)
            nn.train(data["inputs"], data["outputs"])
        shuffle(dataset)

    for data in dataset:
        print(data)
        nn.feedForward(data["inputs"])

if __name__ == '__main__':
    main()

P.S:此外，您可以通过不像前馈部分那样重复代码来提高代码质量。

关于python - 进行反向传播和梯度下降时，numpy 数组和矩阵出现问题，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/51949122/

python - 进行反向传播和梯度下降时，numpy 数组和矩阵出现问题

上一篇：python - Django - 重写保存方法

下一篇：python - 为什么这不打印所有内容而只给出文件中的总字数？