python - 如何使用smac进行卷积神经网络的超参数优化?

标签 python conv-neural-network pytorch hyperparameters automl

注意:长篇文章。请耐心等待

我在 PyTorch 中的 KMNIST 数据集上实现了一个卷积神经网络。我需要使用 SMAC 来优化 CNN 的学习率和随机梯度下降的动量。我是超参数优化方面的新手,我从 smac 文档中学到的是,

  1. SMAC 通过目标算法评估器 (TAE) 调用要优化的算法来对其进行评估。
  2. 我们需要一个场景对象来配置优化过程。
  3. Scenario 对象中的 run_obj 参数指定 SMAC 应优化的内容。

我的最终目标是获得良好的准确性或低损失

这是我到目前为止所做的:

卷积神经网络

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms 
import torchvision.datasets as datasets
from torch.autograd import Variable
from datasets import *
import torch.utils.data
import torch.nn.functional as F
import matplotlib.pyplot as plt

# Create the model class

class CNN(nn.Module):
    def __init__(self):

        super(CNN, self).__init__() # to inherent the features of nn.Module

        self.cnn1 = nn.Conv2d(in_channels = 1, out_channels = 8, kernel_size = 3, stride = 1, padding =1)

        # in_channels =1 because of grey scale image
        # kernel_size = feature_size
        # padding = 1 because for same padding = [(filter_size -1)/2]
        # the output size of the 8 feature maps is [(input_size - filter_size +2(padding)/stride)+1]

        #Batch Normalization

        self.batchnorm1 = nn.BatchNorm2d(8)

        # RELU

        self.relu = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size =2)

        # After maxpooling, the output of each feature map is 28/2 =14

        self.cnn2 = nn.Conv2d(in_channels = 8, out_channels = 32, kernel_size = 5, stride = 1, padding =2)

        #Batch Normalization

        self.batchnorm2 = nn.BatchNorm2d(32)

        # RELU

        #self.relu = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size =2)

        # After maxpooling , the output of each feature map is 14/2 =7of them is of size 7x7 --> 32*7*7=1568
        # Flatten the feature maps. You have 32 feature maps, each 
        self.fc1 = nn.Linear(in_features=1568, out_features = 600)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(in_features=600, out_features = 10)

    def forward(self,x):

        out = self.cnn1(x)
        #out = F.relu(self.cnn1(x))
        out = self.batchnorm1(out)
        out = self.relu(out)
        out = self.maxpool1(out)

        out = self.cnn2(out)
        out = self.batchnorm2(out)
        out = self.relu(out)
        out = self.maxpool2(out)

        #Now we have to flatten the output. This is where we apply the feed forward neural network as learned
        #before!

        #It will the take the shape (batch_size, 1568) = (100, 1568)

        out = out.view(-1, 1568)

        #Then we forward through our fully connected layer

        out = self.fc1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)

        return out

def train(model, train_loader, optimizer, epoch, CUDA, loss_fn):
        model.train()
        cum_loss=0
        iter_count = 0

        for i, (images, labels) in enumerate(train_load):

            if CUDA:

               images = Variable(images.cuda())
               images = images.unsqueeze(1)
               images = images.type(torch.FloatTensor)
               images = images.cuda()

               labels = Variable(labels.cuda())
               labels = labels.type(torch.LongTensor)
               labels = labels.cuda()

            else:

               images = Variable(images)
               images = images.unsqueeze(1)
               images = images.type(torch.DoubleTensor)

               labels = Variable(labels)
               labels = labels.type(torch.DoubleTensor)

            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            cum_loss += loss


            if (i+1) % batch_size == 0:
               correct = 0
               total = 0
               acc = 0
               _, predicted = torch.max(outputs.data,1)
               total += labels.size(0)
               if CUDA:
                  correct += (predicted.cpu()==labels.cpu()).sum()
               else:
                  correct += (predicted==labels).sum()

               accuracy = 100*correct/total

            if i % len(train_load) == 0:

               iter_count += 1
               ave_loss = cum_loss/batch_size
        return ave_loss

batch_size = 100 
epochs = 5
e = range(epochs)
#print(e)

#Load datasets

variable_name=KMNIST()

train_images = variable_name.images
train_images = torch.from_numpy(train_images)

#print(train_images.shape)
#print(type(train_images))

train_labels = variable_name.labels
train_labels = torch.from_numpy(train_labels)

#print(train_labels.shape)
#print(type(train_labels))

train_dataset = torch.utils.data.TensorDataset(train_images, train_labels)

# Make the dataset iterable

train_load = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)

print('There are {} images in the training set' .format(len(train_dataset)))
print('There are {} images in the loaded training set' .format(len(train_load)))



def net(learning_rate, Momentum):
    model = CNN()
    CUDA = torch.cuda.is_available()
    if CUDA:
        model = model.cuda()

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate,momentum = Momentum, nesterov= True)

    iteration = 0
    total_loss=[]

    for epoch in range(epochs):
        ave_loss = train(model, train_load, optimizer, epoch, CUDA, loss_fn)

        total_loss.append(ave_loss)

    return optimizer, loss_fn, model, total_loss

optimizer, loss_fn, model, total_loss = net(learning_rate= 0.01, Momentum = 0.09)

# Print model's state_dict

print("---------------")

print("Model's state_dict:")

for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

print("---------------")

#print("Optimizer's state_dict:")

#for var_name in optimizer.state_dict():
 #   print(var_name, "\t", optimizer.state_dict()[var_name])

torch.save(model.state_dict(), "kmnist_cnn.pt")

plt.plot(e, (np.array(total_loss)))
plt.xlabel("# Epoch")
plt.ylabel("Loss")
plt.show()

print('Done!')

smac 超参数优化:

from smac.configspace import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
    UniformFloatHyperparameter, UniformIntegerHyperparameter

from smac.configspace.util import convert_configurations_to_array
#from ConfigSpace.conditions import InCondition

# Import SMAC-utilities
from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

# We define a few possible types of SVM-kernels and add them as "kernel" to our cs

lr = UniformFloatHyperparameter('learning_rate', 1e-4, 1e-1, default_value='1e-2')
momentum = UniformFloatHyperparameter('Momentum', 0.01, 0.1, default_value='0.09')

cs.add_hyperparameters([lr, momentum])

def kmnist_from_cfg(cfg):

    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    print('Config is', cfg)

    #optimizer, loss_fn, model, total_loss = net(**cfg)
    #optimizer, loss_fn, model, total_loss = net(learning_rate= cfg["learning_rate"], Momentum= cfg["Momentum"])

    optimizer, loss_fn, model, total_loss = net(learning_rate= 0.02, Momentum= 0.05)

    return optimizer, loss_fn, model, total_loss

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 200,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

#def_value = kmnist_from_cfg(cs.get_default_configuration())
#print("Default Value: %.2f" % (def_value))


# Optimize, using a SMAC-object

print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario,tae_runner=kmnist_from_cfg) #rng=np.random.RandomState(42)
smac.solver.intensifier.tae_runner.use_pynisher = False

print("SMAC", smac)
incumbent = smac.optimize()


inc_value = kmnist_from_cfg(incumbent)

print("Optimized Value: %.2f" % (inc_value))

当我将损失作为 run_obj 参数时,我收到错误消息

ArgumentError: argument --run-obj/--run_obj: invalid choice: 'total_loss' (choose from 'runtime', 'quality')

说实话,我不知道“品质”是什么意思。无论如何,当我将quality作为run_obj参数时,我收到错误消息

TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

如果我理解正确的话,当需要 int 但给出 str 时,会得到上述错误消息。为了检查问题是否与配置空间有关,我尝试了

optimizer, loss_fn, model, total_loss = net(learning_rate= 0.02, Momentum= 0.05)

而不是这些:

optimizer, loss_fn, model, total_loss = net(**cfg)
optimizer, loss_fn, model, total_loss = net(learning_rate= cfg["learning_rate"], Momentum= cfg["Momentum"])

错误仍然存​​在。

关于如何使用 smac 优化 CNN 超参数的任何想法以及为什么我会收到此错误消息?我尝试在网上寻找类似的问题。 This post有点帮助。不幸的是,由于NN上没有smac的实现(至少我没有找到它),我无法找出解决方案。我已经没有了所有的想法。

感谢任何帮助、想法或有用的链接。

谢谢!

最佳答案

我相信 tae_runner (在您的情况下为 kmnist_from_cfg)必须是一个可调用的,它采用您正确提供的配置空间点,并输出一个数字。你输出一个元组。也许只返回验证集上的total_loss?我基于 smac github 中的 svm 示例,地址为 https://github.com/automl/SMAC3/blob/master/examples/svm.py .

关于python - 如何使用smac进行卷积神经网络的超参数优化?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55619716/

相关文章:

conv-neural-network - 在 pytorch 中汇集 channel

Anaconda更新时总是想把我的GPU Pytorch版本替换为CPU Pytorch版本

machine-learning - 如何让 pytorch 读取 numpy 格式?

python - 使用 pytorch 和多处理在 CPU 上运行推理

python - 在 Python 2.x 中,使用反引号从 int 对象获取十进制字符串很糟糕吗?

python - 如何使 'while' 循环将循环条件的结果打印到单行(串联)中?

python - 在 Python 中一致地格式化数字

python - 将 Numpy 数组图像编码为图像类型(.png 等)以将其与 GCloud Vision API 一起使用 - 无需 OpenCV

python - 在处理高光谱图像时,3D-CNN 输入的第五维应该是多少?

python - 将 batchnorm(TensorFlow) 的 is_training 变为 False