c++ - 我的神经网络只学习一些数据集

我创建了以下应该基于反向传播进行学习的神经网络。

我通过大量阅读和大量不同的教程将它组合在一起。

为了测试，我试过给它一个 XOR 问题。每个数据集有 2 个输入和 2 个输出。两个输入都是 1 或 0，两个输出应指示是否应输出 0(第一个输出)或a 1 应该输出(第二个输出)。

当我给它以下数据时发生了什么:

___________________________________________________________________________
| Input 1 | Input 2 | Expected 1 | Expected 2 | NN Output 1 | NN Output 2 |
|-------------------------------------------------------------------------|
|    0    |    1    |     1      |     0      |    0.49     |    0.50     |
|    1    |    0    |     1      |     0      |    0.98     |    0.01     |
|    1    |    1    |     0      |     1      |    0.01     |    0.98     |
|    0    |    0    |     0      |     1      |    0.49     |    0.50     |
---------------------------------------------------------------------------

上面有希望清楚的是，对于给出的两个问题；它有点奏效，假设存在误差范围，得到答案的 0.01 以内是非常好的。

但对于其他两个答案，它还有很长的路要走。当然阶跃函数会产生正确的结果，但它基本上是说存在 50/50 的分割。

这是 100,000 个时期和 0.03 的学习率，您在上面看到的是实际训练数据。

如果我将学习率提高到0.9；结果不同，但也让我质疑:

___________________________________________________________________________
| Input 1 | Input 2 | Expected 1 | Expected 2 | NN Output 1 | NN Output 2 |
|-------------------------------------------------------------------------|
|    0    |    1    |     1      |     0      |    0.99     |    0.00     |
|    1    |    0    |     1      |     0      |    0.99     |    0.00     |
|    1    |    1    |     0      |     1      |    0.49     |    0.99     |
|    0    |    0    |     0      |     1      |    0.00     |    0.99     |
---------------------------------------------------------------------------

好多了；但是 1,1 输入仍然有奇怪的输出。

我的代码很短，如下所示。这是完整的代码:

#include <iostream>
#include <array>
#include <random>
#include <vector>

class RandomGenerator
{
public:
    RandomGenerator(const double min, const double max)
    :
        m_ran(),
        m_twister(m_ran()),
        m_distrib(min,max)
    {

    }

    double operator()(void) { return m_distrib(m_twister); }

private:
    std::random_device                      m_ran;
    std::mt19937_64                         m_twister;
    std::uniform_real_distribution<double>  m_distrib;
} randGen(-2,2);

double sigmoid(const double x)
{
    return 1.0 / (1.0 + std::exp(-x));
}

double softplus(const double x)
{
    return std::log(1.0 + std::exp(x));
}

double step(const double x)
{
    return x > 0 ? 1 : 0;
}

template<int NumInputs, double(*ActivationFunction)(const double)>
class Neuron
{
public:

    void SetInput(const std::size_t index, const double value)
    {
        m_inputsAndWeights[index].value = value;
    }

    double GetInput(const std::size_t index) const { return m_inputsAndWeights[index].value; }

    void SetWeight(const std::size_t index, const double weight)
    {
        m_inputsAndWeights[index].weight = weight;
    }

    double GetWeight(const std::size_t index) const { return m_inputsAndWeights[index].weight; }

    void SetBiasWeight(const double weight) { m_biasWeight = weight; }
    double GetBiasWeight() const { return m_biasWeight; }

    double GetOutput() const
    {
        double output = 0;
        for(const auto& p : m_inputsAndWeights)
            output += p.value * p.weight;
        output += 1.0 * m_biasWeight;
        return ActivationFunction(output);
    }

private:
    struct DataPair
    {
        double value;
        double weight;
    };

    std::array<DataPair,NumInputs> m_inputsAndWeights;
    double m_biasWeight;
};

template<std::size_t NumInputs, std::size_t NumOutputs>
class NeuralNetwork
{
public:
    static constexpr NumHidden() { return (NumInputs+NumOutputs) / 2; }

    SetInputs(std::array<double,NumInputs> inputData)
    {
        for(auto& i : m_hiddenNeurons)
        {
            for(auto index = 0; index < inputData.size(); ++index)
                i.SetInput(index,inputData[index]);
        }
    }

    std::array<double,NumOutputs> GetOutputs() const
    {
        std::array<double,NumOutputs> outputs;
        for(auto i = 0; i < NumOutputs; ++i)
        {
            outputs[i] = m_outputNeurons[i].GetOutput();
        }
        return outputs;
    }

    void PassForward(std::array<double,NumInputs> inputData)
    {
        SetInputs(inputData);
        for(auto i = 0; i < NumHidden(); ++i)
        {
            for(auto& o : m_outputNeurons)
            {
                o.SetInput(i,m_hiddenNeurons[i].GetOutput());
            }
        }
    }

    void Train(std::vector<std::array<double,NumInputs>> trainingData,
               std::vector<std::array<double,NumOutputs>> targetData,
               double learningRate, std::size_t numEpochs)
    {
        for(auto& h : m_hiddenNeurons)
        {
            for(auto i = 0; i < NumInputs; ++i)
                h.SetWeight(i,randGen());
            h.SetBiasWeight(randGen());
        }
        for(auto& o : m_outputNeurons)
        {
            for(auto h = 0; h < NumHidden(); ++h)
                o.SetWeight(h,randGen());
            o.SetBiasWeight(randGen());
        }

        for(std::size_t e = 0; e < numEpochs; ++e)
        {
            for(std::size_t dataIndex = 0; dataIndex < trainingData.size(); ++dataIndex)
            {
                PassForward(trainingData[dataIndex]);

                std::array<double,NumHidden()+1> deltaHidden;
                std::array<double,NumOutputs> deltaOutput;

                for(auto i = 0; i < NumOutputs; ++i)
                {
                    auto output = m_outputNeurons[i].GetOutput();
                    deltaOutput[i] = output * (1.0 - output) * (targetData[dataIndex][i] - output);
                }
                for(auto i = 0; i < NumHidden(); ++i)
                {
                    double error = 0;
                    for(auto j = 0; j < NumOutputs; ++j)
                    {
                        error += m_outputNeurons[j].GetWeight(i) * deltaOutput[j];
                    }
                    auto output = m_hiddenNeurons[i].GetOutput();
                    deltaHidden[i] = output * (1.0 - output) * error;
                }
                for(auto i = 0; i < NumOutputs; ++i)
                {
                    for(auto j = 0; j < NumHidden(); ++j)
                    {
                        auto currentWeight = m_outputNeurons[i].GetWeight(j);
                        m_outputNeurons[i].SetWeight(j,currentWeight + learningRate * deltaOutput[i] * m_hiddenNeurons[j].GetOutput());
                    }
                    auto currentWeight = m_outputNeurons[i].GetBiasWeight();
                    m_outputNeurons[i].SetBiasWeight(currentWeight + learningRate * deltaOutput[i] * (1.0*currentWeight));
                }
                for(auto i = 0; i < NumHidden(); ++i)
                {
                    for(auto j = 0; j < NumInputs; ++j)
                    {
                        auto currentWeight = m_hiddenNeurons[i].GetWeight(j);
                        m_hiddenNeurons[i].SetWeight(j,currentWeight + learningRate * deltaHidden[i] * m_hiddenNeurons[i].GetInput(j));
                    }
                    auto currentWeight = m_hiddenNeurons[i].GetBiasWeight();
                    m_hiddenNeurons[i].SetBiasWeight(currentWeight + learningRate * deltaHidden[i] * (1.0*currentWeight));
                }
            }
        }
    }

private:
    std::array<Neuron<NumInputs,sigmoid>,NumHidden()> m_hiddenNeurons;
    std::array<Neuron<NumHidden(),sigmoid>,NumOutputs> m_outputNeurons;
};

int main()
{

    NeuralNetwork<2,2> NN;

    std::vector<std::array<double,2>> trainingData = {{{0,1},{1,0},{1,1},{0,0}}};
    std::vector<std::array<double,2>> targetData = {{{1,0},{1,0},{0,1},{0,1}}};

    NN.Train(trainingData,targetData,0.03,100000);

    for(auto i = 0; i < trainingData.size(); ++i)
    {
        NN.PassForward(trainingData[i]);
        auto outputs = NN.GetOutputs();
        for(auto o = 0; o < outputs.size(); ++o)
        {
            std::cout << "Out " << o << ":\t" << outputs[o] << std::endl;
        }
    }

    return 0;
}

最佳答案

几天前我也做过同样的事情，我可以告诉你，如果你遇到了一些不幸的权重初始化，反向传播的 100 000 次迭代是不够的。不要随机初始化你的权重，sigmoid 很容易陷入大权重的饱和状态，另一方面，0 权重也无济于事。我已经初始化了我的权重 +/-(0.3, 0.7) 并且收敛性得到了显着改善。

关于c++ - 我的神经网络只学习一些数据集，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/47171170/

c++ - 我的神经网络只学习一些数据集

上一篇：c++ - (C++) 夹持圆内二维位置(使用中点圆算法绘制)

下一篇：C++ 重用调用同一函数的线程 vector