关闭。这个问题需要details or clarity .它目前不接受答案。
想改进这个问题?通过 editing this post 添加详细信息并澄清问题.
7年前关闭。
Improve this question
只是为了好玩而不是为了盈利,编写了一个神经网络应用程序,该应用程序预测来自交易比特币的交易市场的实时数据的输出。
现在澄清一下,我不是在问我的算法是否正确,或者我的模型是否会让我变得富有——我正在研究神经网络和实时预测,所以请这样阅读。
我可以从两个来源(市场)获得真实数据。
我正在考虑作为输入的数据显然是当前的购买价格,并且网络正在尝试猜测下一个价格。但是我不关心这里的时间安排,我想预测下一个可能的价格,所以我不考虑将未改变的买入价格作为输入。我每 100 毫秒调查一次市场并询问当前价格,如果价格发生变化,则我存储它,如果价格没有变化,我忽略它。
我通过输入历史价格来训练网络,每个市场大约 2k - 网络配置如下:
输入:3 个输入
隐藏:输入 *2 +1
输出:1
训练直到误差达到 0.001 因子。
现在回答问题。
1)我只存储改变的值,所以如果价格没有改变,我不会保存价格,因此 - 这种方法可以吗?或者即使价格不变,我也应该得到价格?这会影响预测吗?多少钱?我不想预测 15:00 的值(value),我希望网络预测下一个可能的买入价格 - 时间在这里并不重要。
2)如果你看下面的图表,你可以清楚地看到网络有点“滞后”(特别是在第二张截图上)并且它不喜欢“高峰”——更好的是,它甚至不能预测这些它总是预测相反的趋势 - 这是正常的还是对这种行为有一些解释?
源代码:
#include <chrono>
#include <thread>
#include <math.h>
#include <iostream>
#include "Core/CMemTracer.h"
#include "Core/CDatabase.h"
#include "Core/CCalcModule.h"
#include "Core/CCalcModuleNN.h"
#include "Core/CNeuralNetwork.h"
CNeuralNetwork _NeuralNetwork;
CDatabase _Database;
int main(int argc, const char * argv[])
{
std::string m_strDatabaseHost;
std::string m_strDatabaseName;
std::string m_strDatabaseUsername;
std::string m_strDatabasePassword;
std::string m_strExchange;
int m_iNumOfHistoryForTraining = 0;
int iNeuralNetworkInputs = 5;
int iNeuralNetworkHidden = 2 * iNeuralNetworkInputs + 1;
int iNeuralNetworkOutputs = 1;
int iMaximumTrainingEpoch = 10000000;
float fMinimum = 0;
float fMaximum = 1000;
float fMaximumNetworkError = 0.000720;
float fNeuralNetworkLearningRate = 0.5;
float fNeuralNetworkMomentum = 0.1;
std::vector<float> vHistory;
std::vector<float> vNormalisedData;
m_strDatabaseHost = "192.168.0.10";
m_strDatabaseName = "Trader";
m_strDatabasePassword = "password";
m_strDatabaseUsername = "root";
m_strExchange = "exBitMarket";
// How much data we fetch from the DB
m_iNumOfHistoryForTraining = 2000;
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Info, Connecting to Database");
// Load up Database
if(_Database.Connect(m_strDatabaseUsername, m_strDatabasePassword, m_strDatabaseHost) == false)
{
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Error, cant connect to Database");
return false;
}
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Info, Selecting Database");
// Select Database
if(_Database.SelectDatabase(m_strDatabaseName) == false)
{
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Error, cant select Database");
return false;
}
// Get x Data from Database
std::string strQuery = "SELECT * FROM (SELECT * FROM exData WHERE Exchange='"+m_strExchange+"' ORDER BY Epoch DESC LIMIT "+stringify(m_iNumOfHistoryForTraining)+")sub ORDER BY Epoch ASC";
// Query DB
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Info, Querying database");
CDatabase::tDatabaseQueryResult _QuerySelect;
if(_Database.Query(strQuery, _QuerySelect) == false)
{
//
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Error, cannot query database");
//
return false;
}
//
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Info, Got %i results", _QuerySelect.m_iRows);
// If Data available
if(_QuerySelect.m_iRows >= m_iNumOfHistoryForTraining )
{
// Push back Buy value to Historical Data Vector
for(int c = 0; c < _QuerySelect.m_vRows.size(); c++)
vHistory.push_back(atof(_QuerySelect.m_vRows[c].m_vstrColumns[3].data()));
vNormalisedData = vHistory;
}
else
{
//
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Error, not enough data returned (%i of %i required)", _QuerySelect.m_iRows,m_iNumOfHistoryForTraining);
//
return false;
}
//
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Info, Normalising data for Neural network input");
// Normalise
// Find max, min values from the dataset for later normalization
std::vector<float>::iterator itMax = std::max_element(vHistory.begin(), vHistory.end(),[](const float& x, const float& y) { return x < y; });
std::vector<float>::iterator itMin = std::min_element(vHistory.begin(), vHistory.end(),[](const float& x, const float& y) { return x < y; });
// Store Min/Max
fMinimum = itMin[0];
fMaximum = itMax[0];
//
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Info, Normalised data <%f, %f>", fMinimum, fMaximum);
// Important - Neural Network has to be setup correctly for activation function
// both this normalization and NN has to be setup the same way.
// Log sigmoid activation function (0,1)
// logistic sigmoid function [0, 1]
for(int a = 0; a < vHistory.size(); a++)
vNormalisedData[a] = (vHistory[a] - itMin[0]) / (itMax[0] - itMin[0]);
//
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Info, Initializing neural network with the setup %i/%i/%i Learning Rate: %f, Momentum: %f",
iNeuralNetworkInputs,
iNeuralNetworkHidden,
iNeuralNetworkOutputs,
fNeuralNetworkLearningRate,
fNeuralNetworkMomentum);
// Build the network with arguments passed
_NeuralNetwork.Initialize(iNeuralNetworkInputs, iNeuralNetworkHidden, iNeuralNetworkOutputs);
_NeuralNetwork.SetLearningRate(fNeuralNetworkLearningRate);
_NeuralNetwork.SetMomentum(false, fNeuralNetworkMomentum);
// Train
double dMaxError = 100.0;
double dLastError = 12345.0;
int iEpoch = 0;
int iLastDump = 0;
int iNumberOfDataForTraining = (vNormalisedData.size() / 2) - iNeuralNetworkInputs + iNeuralNetworkOutputs;
//
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Info, starting training with %i data out of %i", iNumberOfDataForTraining, vNormalisedData.size());
// Perform training on the training data
while ( (dMaxError > fMaximumNetworkError) && (iEpoch < iMaximumTrainingEpoch) )
{
//
dMaxError = 0;
// Now the input is normalized and ready for use perform the training
// Use 1/2 of the Normalised Data for training purposes, the rest will be used to
// Validate the network.
for(int a = 0; a < iNumberOfDataForTraining; a++)
{
// Set Inputs
for(int b = 0; b < iNeuralNetworkInputs; b++)
_NeuralNetwork.SetInput(b, vNormalisedData[a+b]);
// Set desired Output for the newest value
_NeuralNetwork.SetDesiredOutput(0, vNormalisedData[a + iNeuralNetworkInputs]);
// Feed data
_NeuralNetwork.FeedForward();
//
dMaxError += _NeuralNetwork.CalculateError();
// Backpropagate to learn
_NeuralNetwork.BackPropagate();
}
// Divide by the number of total array size to get global network error
dMaxError /= vNormalisedData.size();
// Dump some stats now
if(CUtils::GetEpoch() - iLastDump > 1)
{
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Training Error Factor: %f / %f Epoch: %i", dMaxError, fMaximumNetworkError, iEpoch);
iLastDump = CUtils::GetEpoch();
}
// Increment the epoch count
iEpoch++;
// Store last error for early-stop
dLastError = dMaxError;
}
//
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "Info, starting validation with %i data", vNormalisedData.size() - iNumberOfDataForTraining);
//
dMaxError = 0;
// Now check against 'Validation' Data
for(int a = iNumberOfDataForTraining; a < vNormalisedData.size(); a++)
{
// Set Inputs
for(int b = 0; b < iNeuralNetworkInputs; b++)
_NeuralNetwork.SetInput(b, vNormalisedData[a+b]);
// Set desired Output for the newest value
_NeuralNetwork.SetDesiredOutput(0, vNormalisedData[a + iNeuralNetworkInputs]);
// Feed data
_NeuralNetwork.FeedForward();
//
dMaxError += _NeuralNetwork.CalculateError();
}
// Divide by the number of total array size to get global network error
dMaxError /= vNormalisedData.size();
CLogger::Instance()->Write(XLOGEVENT_LOCATION, "%i Network Trained, Error Factor on Validation data = %f",
CUtils::GetEpoch(),
dMaxError);
// Save the network to an output filer
return 0;
}
不问算法,只问网络的输出,这是正常的,还是看起来像网络过度拟合?
更新:
添加了反射(reflect)训练数据训练和验证数据验证的更新代码。
最佳答案
您正在进行机器学习,而在机器学习中,您从不使用训练数据来评估您的模型。
要回答您的问题,您是否过拟合,或者这是否正常:如果您不将数据集分成基本的训练和测试,您将过拟合。
第一步 :拆分您的数据,使其成为 50/50,或者拥有 90% 的训练数据和 10% 的测试数据就足够了。你可以用训练数据做你想做的事,但你唯一可以使用测试数据的就是看看你的模型有多好。理想情况下,您只需执行一次。
进一步的步骤:这在某种程度上被简化了。您可以使用交叉验证(即您使用不同的拆分)。或者您可以使用评估集来适应您的参数或玩弄一些东西,因此您只需触摸一次测试集。确实,您可以做的事情没有限制,但是基本的拆分可以创造奇迹。
如何检查过拟合
好的,我已经告诉你如何正确地做到这一点,但这并不能告诉你你是否过度拟合。
您学习模型的数据(在您的情况下是 NN)被污染了。除非学习算法真的很糟糕,否则它总是很适合。问题是,它是否适合来自同一分布的所有其他数据?为此,您使用测试集。如果模型可以很好地拟合,则说明您没有过度拟合。如果它完全是一团糟,那么你已经过度拟合(或有其他问题)。
关于c++ - 神经网络预测实时市场数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/27154385/