java - 人工智能(神经网络) - 实际输出永远不会接近正确输出

标签 java neural-network artificial-intelligence

我正在开发一个应该像异或运算符一样工作的程序。

为了调整权重,我使用反向传播。

我还包括了深度学习(它几乎按其应有的方式工作,这里同样的斗争)但这不应该是出于重要性。 (当有像

这样的 if 子句时
if(hiddenNeurons.size() > 1)
{
 .....
}

这里面只有代码,这在使用多个隐藏神经元时很重要。 (在这个问题中并非如此))

困难:无论输入是什么,输出几乎总是相同的(大约 0.5)。

权重和偏差得到调整。

这是代码(还有更多,但其他代码并不重要):

public void learnFromData(int iterations) //this method learns from the ArrayList 'inputs' and 'outputs'
{
            if(inputs.size() == outputs.size())
            {
                //Collections.shuffle(inputs);  
                for(int j = 0;j<iterations;j++)
                {
                for(int i = 0;i<inputs.size();i++)
                {

                    double actualOutput = computeOutput(inputs.get(i))[0];
                    double expectedOutput = outputs.get(i)[0];


                    //System.out.println(String.format("Input: %.3f /\\ Ouput: %.4f Expected: %.4f",inputs.get(i)[0], actualOutput, expectedOutput));
                    double error = 0;
                    if (actualOutput > expectedOutput) {
                        error = actualOutput - expectedOutput;
                    } else {
                        error = expectedOutput - actualOutput;
                    }
                    if(i == 0){
                    System.out.println(String.format("Error: %.10f", error));}
                    learn(outputs.get(i));
                }
            }
        }
        else{
            System.out.println("\nERROR: the number of inputs and outputs have to match!\n");
        }
    }
public double[] computeOutput(double[] inputValues)
   {

    for(int i = 0;i<inputValues.length;i++) //giving the inputNeurons a value
    {
        inputNeurons[i] = inputValues[i];
    }
    for(int i = 0;i<hiddenNeurons.get(0).length;i++) 
    {
        hSums.get(0)[i] = 0.0;
    }
    for(int i = 0;i<aOutputNeurons.length;i++)
    {
        hoSums[i] = 0.0;
    }
    for(int i = 0;i<inputNeurons.length;i++) //calculating the sums of the hidden neurons (Input-function)
    {
        for(int b = 0;b<hiddenNeurons.get(0).length;b++)
        {
            hSums.get(0)[b] += inputNeurons[i] * ihWeights[i][b];
        }
    }
    for(int i = 0;i<hiddenNeurons.get(0).length;i++) //Each bias-value has to be added to its associated sum 
    {
       hSums.get(0)[i] += hBiases.get(0)[i];
    }


    for(int i = 0;i<hiddenNeurons.get(0).length;i++)
    {
        hiddenNeurons.get(0)[i] = Helper.sig(hSums.get(0)[i]); //output-function = sigmoid
    }

    //calculating the hSums
    if(hiddenNeurons.size()>1)
    {
        for (int layer = 0;layer<hiddenNeurons.size()-1;layer++) 
        {   
            //calculating the sums of the layer
            for(int neuron_nextLayer = 0; neuron_nextLayer < hiddenNeurons.get(layer+1).length;neuron_nextLayer++)
            {
                hSums.get(layer+1)[neuron_nextLayer] = 0;
                for(int neuron_actualLayer = 0;neuron_actualLayer < hiddenNeurons.get(layer).length;neuron_actualLayer++)
                {
                    hSums.get(layer+1)[neuron_nextLayer] += hiddenNeurons.get(layer)[neuron_actualLayer] * hhWeights.get(layer)[neuron_actualLayer][neuron_nextLayer];
                }
            }
        }
    }
    // calculating the sums of the output neurons (Input-function)
    int lastHiddenLayer = hiddenNeurons.size()-1;
    for(int i = 0;i<aOutputNeurons.length;i++) 
    {
        hoSums[i] = 0;
        for(int b = 0;b<hiddenNeurons.get(lastHiddenLayer).length;b++)
        {
            hoSums[i] += hiddenNeurons.get(lastHiddenLayer)[b] * hoWeights[b][i];
        }
        hoSums[i] += hoBiases[i];
        aOutputNeurons[i] = Helper.sig(hoSums[i]);
    }
    //weightToString();
    return aOutputNeurons;
   }
   public void learn(double[] cValues) //correctValues
   {
    // calculating the output-gradients 
    for(int i = 0;i<aOutputNeurons.length;i++)
    {
        oGradients[i] = (cValues[i]-aOutputNeurons[i])*Helper.invSig(aOutputNeurons[i]);
    }

    //calculating the hidden-gradients
    double sum; //sum of all multiplications between gradients of the output layer and the weights between the hidden neuron and each output neuron.
    int lastHiddenLayer = hiddenNeurons.size()-1;
    for(int i = 0;i<hiddenNeurons.get(lastHiddenLayer).length;i++)
    {
        sum = 0;
        for(int b = 0;b<aOutputNeurons.length;b++)
        {
            sum += oGradients[b] * hoWeights[i][b];
        }
        hGradients.get(lastHiddenLayer)[i] = Helper.invSig(hiddenNeurons.get(lastHiddenLayer)[i]) * sum;
    }

    if(hiddenNeurons.size() > 1)
    {
        for(int layer = lastHiddenLayer;layer > 0;layer--)
        {

            for(int neuron_actualHiddenLayer = 0; neuron_actualHiddenLayer < hiddenNeurons.get(layer-1).length;neuron_actualHiddenLayer++) // neuron_actualHiddenLayer is more in the direction of the input neurons and neuron_nextHiddenLayer more in the direction of the output neurons
            {
                sum = 0;

                for(int neuron_nextHiddenLayer = 0;neuron_nextHiddenLayer < hiddenNeurons.get(layer).length;neuron_nextHiddenLayer++)
                {
                    sum += hGradients.get(layer)[neuron_nextHiddenLayer] * hhWeights.get(layer-1)[neuron_actualHiddenLayer][neuron_nextHiddenLayer];
                }  
                hGradients.get(layer-1)[neuron_actualHiddenLayer] = Helper.invSig(hiddenNeurons.get(layer-1)[neuron_actualHiddenLayer]) * sum;
            }
        }
    }


    //calculating weight- and biasdeltas of input- to hidden neurons
    for(int i = 0;i<inputNeurons.length;i++)
    {
        for(int b = 0;b<hiddenNeurons.get(0).length;b++)
        {
            ihPrevWeightsDeltas[i][b] = eta * hGradients.get(0)[b] * inputNeurons[i];
            ihWeights[i][b] += ihPrevWeightsDeltas[i][b];
        }
    }
    // calculating weight- and biasdeltas of hidden- to hidden neurons
    if(hiddenNeurons.size() > 1)
    {
        for(int layer = 0;layer < hiddenNeurons.size()-1;layer++)
        {
            for(int neuron_actualHiddenLayer = 0; neuron_actualHiddenLayer < hiddenNeurons.get(layer).length;neuron_actualHiddenLayer++) // neuron_actualHiddenLayer is more in the direction of the input neurons and neuron_nextHiddenLayer more in the direction of the output neurons
            {
                for(int neuron_nextHiddenLayer = 0;neuron_nextHiddenLayer < hiddenNeurons.get(layer+1).length;neuron_nextHiddenLayer++)
                {
                    hhPrevWeightDeltas.get(layer)[neuron_actualHiddenLayer][neuron_nextHiddenLayer] = eta * hGradients.get(layer+1)[neuron_nextHiddenLayer] * hiddenNeurons.get(layer)[neuron_actualHiddenLayer];
                    hhWeights.get(layer)[neuron_actualHiddenLayer][neuron_nextHiddenLayer] += hhPrevWeightDeltas.get(layer)[neuron_actualHiddenLayer][neuron_nextHiddenLayer];
                    hhPrevBiasDeltas.get(layer)[neuron_actualHiddenLayer] = eta*hGradients.get(layer)[neuron_actualHiddenLayer];
                    hBiases.get(layer)[neuron_actualHiddenLayer] += hhPrevBiasDeltas.get(layer)[neuron_actualHiddenLayer];
                }  
            }
        }
    }
    for(int i = 0;i<hiddenNeurons.get(0).length;i++)
    {
        ihPrevBiasDeltas[i] = eta*hGradients.get(0)[i];
        hBiases.get(0)[i] += ihPrevBiasDeltas[i];
    }
    for(int i = 0;i<aOutputNeurons.length;i++)
    {
        hoPrevBiasDeltas[i] = eta*oGradients[i];
        hoBiases[i] += hoPrevBiasDeltas[i];
    }
    for(int i = 0;i<hiddenNeurons.get(0).length;i++)
    {
        for(int b = 0;b<aOutputNeurons.length;b++)
        {
            hoPrevWeightsDeltas[i][b] = eta * oGradients[b] * hiddenNeurons.get(lastHiddenLayer)[i];
            hoWeights[i][b] += hoPrevWeightsDeltas[i][b];
        }
    }

}

最佳答案

因为这是您自己的代码,请尝试使用一些成熟的项目(例如 Neuroph 库)执行相同的网络并比较结果,这应该可以帮助您缩小问题范围。

还请记住,XOR 函数是一个非线性分类问题,您至少需要 2 个具有非线性激活函数的层(1 个输入和 1 个隐藏层)。只是因为线性分类器无法调整来做非线性分类,所以需要非线性分类器。

关于java - 人工智能(神经网络) - 实际输出永远不会接近正确输出,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/38714872/

相关文章:

machine-learning - 神经网络训练中纪元的含义

machine-learning - keras.fit() 重新初始化权重

neural-network - Theano:在自动编码器中用步幅(子采样)重建卷积

artificial-intelligence - 人工智能领域有哪些令人印象深刻的算法或软件?

machine-learning - 机器学习中的PCA

java - 翻转 gridview 的项目,如窗口瓷砖

java - 无法从 MySql 数据库获取结果集

java - jquery ajax 调用时在浏览器上显示的缓存数据

java - 使所有对 mysite.com/user/specified/path 的请求运行相同的 JSP

java - AI 如何为战舰建模遗传编程