python - 为什么 Skorch 在每个时期都显示 NAN?

标签 python scikit-learn pytorch skorch

我想基于 Skorch 的数据集类创建自己的数据集类,因为我想区分分类列和连续列。这些分类列将通过模型中的嵌入层传递。结果很奇怪,因为它显示 NAN 像这样:

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1           nan           nan  0.2187
      2           nan           nan  0.1719
      3           nan           nan  0.1719
      4           nan           nan  0.1562
      5           nan           nan  0.1406

你能帮我解决这个问题吗?我正在使用来自这个kaggle的数据: Here

from skorch import NeuralNetRegressor
from skorch.dataset import Dataset
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder


class TabularDataset(Dataset):
    def __init__(self, data, cat_cols=None, output_col=None):
        self.n = data.shape[0]

        if output_col:
            self.y = data[output_col].astype(np.float32).values.reshape(-1, 1)
        else:
            self.y = np.zeros((self.n, 1))

        self.cat_cols = cat_cols if cat_cols else []
        self.cont_cols = [col for col in data.columns
                          if col not in self.cat_cols + [output_col]]

        if self.cont_cols:
            self.cont_X = data[self.cont_cols].astype(np.float32).values
        else:
            self.cont_X = np.zeros((self.n, 1))

        if self.cat_cols:
            self.cat_X = data[self.cat_cols].astype(np.int64).values
        else:
            self.cat_X = np.zeros((self.n, 1))

    def __len__(self):
        # Denotes the total number of sampoes
        return self.n

    def __getitem__(self, idx):
        # generates one sample of data
        return [self.cont_X[idx], self.cat_X[idx]], self.y[idx]


class FeedForwardNN(nn.Module):

    def __init__(self, emb_dims, no_of_cont, lin_layer_sizes,
                 output_size, emb_dropout, lin_layer_dropouts):

        """
        Parameters
        ----------
        emb_dims: List of two element tuples
          This list will contain a two element tuple for each
          categorical feature. The first element of a tuple will
          denote the number of unique values of the categorical
          feature. The second element will denote the embedding
          dimension to be used for that feature.
        no_of_cont: Integer
          The number of continuous features in the data.
        lin_layer_sizes: List of integers.
          The size of each linear layer. The length will be equal
          to the total number
          of linear layers in the network.
        output_size: Integer
          The size of the final output.
        emb_dropout: Float
          The dropout to be used after the embedding layers.
        lin_layer_dropouts: List of floats
          The dropouts to be used after each linear layer.
        """

        super().__init__()

        # Embedding layers
        self.emb_layers = nn.ModuleList([nn.Embedding(x, y)
                                         for x, y in emb_dims])

        no_of_embs = sum([y for x, y in emb_dims])
        self.no_of_embs = no_of_embs
        self.no_of_cont = no_of_cont

        # Linear Layers
        first_lin_layer = nn.Linear(self.no_of_embs + self.no_of_cont,
                                    lin_layer_sizes[0])

        self.lin_layers = \
            nn.ModuleList([first_lin_layer] + \
                          [nn.Linear(lin_layer_sizes[i], lin_layer_sizes[i + 1])
                           for i in range(len(lin_layer_sizes) - 1)])

        for lin_layer in self.lin_layers:
            nn.init.kaiming_normal_(lin_layer.weight.data)

        # Output Layer
        self.output_layer = nn.Linear(lin_layer_sizes[-1],
                                      output_size)
        nn.init.kaiming_normal_(self.output_layer.weight.data)

        # Batch Norm Layers
        self.first_bn_layer = nn.BatchNorm1d(self.no_of_cont)
        self.bn_layers = nn.ModuleList([nn.BatchNorm1d(size)
                                        for size in lin_layer_sizes])

        # Dropout Layers
        self.emb_dropout_layer = nn.Dropout(emb_dropout)
        self.droput_layers = nn.ModuleList([nn.Dropout(size)
                                            for size in lin_layer_dropouts])

    def forward(self, X):
        cont_data = X[0]
        cat_data = X[1]
        if self.no_of_embs != 0:
            x = [emb_layer(cat_data[:, i])
                 for i, emb_layer in enumerate(self.emb_layers)]
            x = torch.cat(x, 1)
            x = self.emb_dropout_layer(x)

        if self.no_of_cont != 0:
            normalized_cont_data = self.first_bn_layer(cont_data)

            if self.no_of_embs != 0:
                x = torch.cat([x, normalized_cont_data], 1)
            else:
                x = normalized_cont_data

        for lin_layer, dropout_layer, bn_layer in \
                zip(self.lin_layers, self.droput_layers, self.bn_layers):
            x = F.relu(lin_layer(x))
            x = bn_layer(x)
            x = dropout_layer(x)

        x = self.output_layer(x)

        return x


# Read data
data = pd.read_csv("data/train.csv", usecols=["SalePrice", "MSSubClass", "MSZoning", "LotFrontage", "LotArea",
                                              "Street", "YearBuilt", "LotShape", "1stFlrSF", "2ndFlrSF"]).dropna()

categorical_features = ["MSSubClass", "MSZoning", "Street", "LotShape", "YearBuilt"]
output_feature = "SalePrice"

# Label Encode Categorial Features
label_encoders = {}
for cat_col in categorical_features:
    label_encoders[cat_col] = LabelEncoder()
    data[cat_col] = label_encoders[cat_col].fit_transform(data[cat_col])

# feed Forward NN
cat_dims = [int(data[col].nunique()) for col in categorical_features]

emb_dims = [(x, min(50, (x + 1) // 2)) for x in cat_dims]


net = FeedForwardNN(emb_dims, no_of_cont=4, lin_layer_sizes=[50, 100],
                    output_size=1, emb_dropout=0.04,
                    lin_layer_dropouts=[0.001, 0.01])

# Fit
ds = TabularDataset(data=data, cat_cols=categorical_features,
                    output_col=output_feature)
X = data.drop(['SalePrice'], axis=1)
y = data['SalePrice'].values.reshape(-1, 1)
net = NeuralNetRegressor(
    net,
    max_epochs=5,
    lr=0.1,
    dataset=ds
)
net.fit(X, y)

最佳答案

对于在分类问题中遇到类似问题的任何人,Skorch 默认使用的损失函数(标准)是 NLLLoss,它会为您计算日志( docrelated issue )。因此,预计 Softmax(在多个类的情况下)层将作为架构中的最后一步出现,以便能够生成概率。

您可以:

  1. 添加 Softmax 层以生成概率并保留默认的 NLLLoss;
  2. 将默认损失更改为 CrossEntropyLoss:
    net = NeuralNetClassifier(
        ...
        criterion=torch.nn.CrossEntropyLoss
    )

关于python - 为什么 Skorch 在每个时期都显示 NAN?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53386245/

相关文章:

python - 将生成器与 tf.data 一起使用的最可扩展方式? tf.data 指南说 `from_generator` 的可扩展性有限

python - 为什么 Keras Conv1D 权重在训练期间没有改变?

python - Pytables EArray 与 Table 的速度/效率比较

python - 如何将 numpy 数组和索引数组映射到 pandas 数据框?

python - pytorch对图片进行编码和解码

python - 如何将 pytorch 张量列表分离到数组

Python 嵌套循环验证问题

python - 使用 sklearn 进行多项式回归的最简单方法?

python - 值错误 : Dataset with data_id 554 not found

python - 如何向量化以矩阵行给出的特定索引集的平均值计算?