我创建了以下应该基于反向传播进行学习的神经网络。
我通过大量阅读和大量不同的教程将它组合在一起。
为了测试,我试过给它一个 XOR 问题。每个数据集有 2 个输入和 2 个输出。两个输入都是 1
或 0
,两个输出应指示是否应输出 0
(第一个输出)或a 1
应该输出(第二个输出)。
当我给它以下数据时发生了什么:
___________________________________________________________________________
| Input 1 | Input 2 | Expected 1 | Expected 2 | NN Output 1 | NN Output 2 |
|-------------------------------------------------------------------------|
| 0 | 1 | 1 | 0 | 0.49 | 0.50 |
| 1 | 0 | 1 | 0 | 0.98 | 0.01 |
| 1 | 1 | 0 | 1 | 0.01 | 0.98 |
| 0 | 0 | 0 | 1 | 0.49 | 0.50 |
---------------------------------------------------------------------------
上面有希望清楚的是,对于给出的两个问题;它有点奏效,假设存在误差范围,得到答案的 0.01 以内是非常好的。
但对于其他两个答案,它还有很长的路要走。当然阶跃函数会产生正确的结果,但它基本上是说存在 50/50 的分割。
这是 100,000 个时期和 0.03
的学习率,您在上面看到的是实际训练数据。
如果我将学习率提高到0.9
;结果不同,但也让我质疑:
___________________________________________________________________________
| Input 1 | Input 2 | Expected 1 | Expected 2 | NN Output 1 | NN Output 2 |
|-------------------------------------------------------------------------|
| 0 | 1 | 1 | 0 | 0.99 | 0.00 |
| 1 | 0 | 1 | 0 | 0.99 | 0.00 |
| 1 | 1 | 0 | 1 | 0.49 | 0.99 |
| 0 | 0 | 0 | 1 | 0.00 | 0.99 |
---------------------------------------------------------------------------
好多了;但是 1,1
输入仍然有奇怪的输出。
我的代码很短,如下所示。这是完整的代码:
#include <iostream>
#include <array>
#include <random>
#include <vector>
class RandomGenerator
{
public:
RandomGenerator(const double min, const double max)
:
m_ran(),
m_twister(m_ran()),
m_distrib(min,max)
{
}
double operator()(void) { return m_distrib(m_twister); }
private:
std::random_device m_ran;
std::mt19937_64 m_twister;
std::uniform_real_distribution<double> m_distrib;
} randGen(-2,2);
double sigmoid(const double x)
{
return 1.0 / (1.0 + std::exp(-x));
}
double softplus(const double x)
{
return std::log(1.0 + std::exp(x));
}
double step(const double x)
{
return x > 0 ? 1 : 0;
}
template<int NumInputs, double(*ActivationFunction)(const double)>
class Neuron
{
public:
void SetInput(const std::size_t index, const double value)
{
m_inputsAndWeights[index].value = value;
}
double GetInput(const std::size_t index) const { return m_inputsAndWeights[index].value; }
void SetWeight(const std::size_t index, const double weight)
{
m_inputsAndWeights[index].weight = weight;
}
double GetWeight(const std::size_t index) const { return m_inputsAndWeights[index].weight; }
void SetBiasWeight(const double weight) { m_biasWeight = weight; }
double GetBiasWeight() const { return m_biasWeight; }
double GetOutput() const
{
double output = 0;
for(const auto& p : m_inputsAndWeights)
output += p.value * p.weight;
output += 1.0 * m_biasWeight;
return ActivationFunction(output);
}
private:
struct DataPair
{
double value;
double weight;
};
std::array<DataPair,NumInputs> m_inputsAndWeights;
double m_biasWeight;
};
template<std::size_t NumInputs, std::size_t NumOutputs>
class NeuralNetwork
{
public:
static constexpr NumHidden() { return (NumInputs+NumOutputs) / 2; }
SetInputs(std::array<double,NumInputs> inputData)
{
for(auto& i : m_hiddenNeurons)
{
for(auto index = 0; index < inputData.size(); ++index)
i.SetInput(index,inputData[index]);
}
}
std::array<double,NumOutputs> GetOutputs() const
{
std::array<double,NumOutputs> outputs;
for(auto i = 0; i < NumOutputs; ++i)
{
outputs[i] = m_outputNeurons[i].GetOutput();
}
return outputs;
}
void PassForward(std::array<double,NumInputs> inputData)
{
SetInputs(inputData);
for(auto i = 0; i < NumHidden(); ++i)
{
for(auto& o : m_outputNeurons)
{
o.SetInput(i,m_hiddenNeurons[i].GetOutput());
}
}
}
void Train(std::vector<std::array<double,NumInputs>> trainingData,
std::vector<std::array<double,NumOutputs>> targetData,
double learningRate, std::size_t numEpochs)
{
for(auto& h : m_hiddenNeurons)
{
for(auto i = 0; i < NumInputs; ++i)
h.SetWeight(i,randGen());
h.SetBiasWeight(randGen());
}
for(auto& o : m_outputNeurons)
{
for(auto h = 0; h < NumHidden(); ++h)
o.SetWeight(h,randGen());
o.SetBiasWeight(randGen());
}
for(std::size_t e = 0; e < numEpochs; ++e)
{
for(std::size_t dataIndex = 0; dataIndex < trainingData.size(); ++dataIndex)
{
PassForward(trainingData[dataIndex]);
std::array<double,NumHidden()+1> deltaHidden;
std::array<double,NumOutputs> deltaOutput;
for(auto i = 0; i < NumOutputs; ++i)
{
auto output = m_outputNeurons[i].GetOutput();
deltaOutput[i] = output * (1.0 - output) * (targetData[dataIndex][i] - output);
}
for(auto i = 0; i < NumHidden(); ++i)
{
double error = 0;
for(auto j = 0; j < NumOutputs; ++j)
{
error += m_outputNeurons[j].GetWeight(i) * deltaOutput[j];
}
auto output = m_hiddenNeurons[i].GetOutput();
deltaHidden[i] = output * (1.0 - output) * error;
}
for(auto i = 0; i < NumOutputs; ++i)
{
for(auto j = 0; j < NumHidden(); ++j)
{
auto currentWeight = m_outputNeurons[i].GetWeight(j);
m_outputNeurons[i].SetWeight(j,currentWeight + learningRate * deltaOutput[i] * m_hiddenNeurons[j].GetOutput());
}
auto currentWeight = m_outputNeurons[i].GetBiasWeight();
m_outputNeurons[i].SetBiasWeight(currentWeight + learningRate * deltaOutput[i] * (1.0*currentWeight));
}
for(auto i = 0; i < NumHidden(); ++i)
{
for(auto j = 0; j < NumInputs; ++j)
{
auto currentWeight = m_hiddenNeurons[i].GetWeight(j);
m_hiddenNeurons[i].SetWeight(j,currentWeight + learningRate * deltaHidden[i] * m_hiddenNeurons[i].GetInput(j));
}
auto currentWeight = m_hiddenNeurons[i].GetBiasWeight();
m_hiddenNeurons[i].SetBiasWeight(currentWeight + learningRate * deltaHidden[i] * (1.0*currentWeight));
}
}
}
}
private:
std::array<Neuron<NumInputs,sigmoid>,NumHidden()> m_hiddenNeurons;
std::array<Neuron<NumHidden(),sigmoid>,NumOutputs> m_outputNeurons;
};
int main()
{
NeuralNetwork<2,2> NN;
std::vector<std::array<double,2>> trainingData = {{{0,1},{1,0},{1,1},{0,0}}};
std::vector<std::array<double,2>> targetData = {{{1,0},{1,0},{0,1},{0,1}}};
NN.Train(trainingData,targetData,0.03,100000);
for(auto i = 0; i < trainingData.size(); ++i)
{
NN.PassForward(trainingData[i]);
auto outputs = NN.GetOutputs();
for(auto o = 0; o < outputs.size(); ++o)
{
std::cout << "Out " << o << ":\t" << outputs[o] << std::endl;
}
}
return 0;
}
最佳答案
几天前我也做过同样的事情,我可以告诉你,如果你遇到了一些不幸的权重初始化,反向传播的 100 000 次迭代是不够的。不要随机初始化你的权重,sigmoid 很容易陷入大权重的饱和状态,另一方面,0 权重也无济于事。我已经初始化了我的权重 +/-(0.3, 0.7) 并且收敛性得到了显着改善。
关于c++ - 我的神经网络只学习一些数据集,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47171170/