python - 为什么我的完全卷积自动编码器不对称？

我正在开发一个全卷积自动编码器，它采用 3 个 channel 作为输入并输出 2 个 channel (输入:LAB，输出:AB)。因为输出应该与输入大小相同，所以我使用全卷积。

代码:

import torch.nn as nn


class AE(nn.Module):
   def __init__(self):
       super(AE, self).__init__()

        self.encoder = nn.Sequential(
           # conv 1
           nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=1),
           nn.BatchNorm2d(64),
           nn.ReLU(),
           nn.MaxPool2d(kernel_size=2, stride=2),

           # conv 2
           nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=1),
           nn.BatchNorm2d(128),
           nn.ReLU(),
           nn.MaxPool2d(kernel_size=2, stride=2),

           # conv 3
           nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, stride=1, padding=1),
           nn.BatchNorm2d(256),
           nn.ReLU(),
           nn.MaxPool2d(kernel_size=2, stride=2),

           # conv 4
           nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, stride=1, padding=1),
           nn.BatchNorm2d(512),
           nn.ReLU(),
           nn.MaxPool2d(kernel_size=2, stride=2),

           # conv 5
           nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=5, stride=1, padding=1),
           nn.BatchNorm2d(1024),
           nn.ReLU()

       )

       self.decoder = nn.Sequential(
           # conv 6
           nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=5, stride=1, padding=1),
           nn.BatchNorm2d(512),
           nn.ReLU(),

           # conv 7
           nn.Upsample(scale_factor=2, mode='bilinear'),
           nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=5, stride=1, padding=1),
           nn.BatchNorm2d(256),
           nn.ReLU(),

           # conv 8
           nn.Upsample(scale_factor=2, mode='bilinear'),
           nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=5, stride=1, padding=1),
           nn.BatchNorm2d(128),
           nn.ReLU(),

           # conv 9
           nn.Upsample(scale_factor=2, mode='bilinear'),
           nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=5, stride=1, padding=1),
           nn.BatchNorm2d(64),
           nn.ReLU(),

           # conv 10 out
           nn.Upsample(scale_factor=2, mode='bilinear'),
           nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=5, stride=1, padding=1),
           nn.Softmax()    # multi-class classification

           # TODO softmax deprecated
       )

   def forward(self, x):
       x = self.encoder(x)
       x = self.decoder(x)
       return x

输出张量的大小应该是:torch.Size([1, 2, 199, 253])

输出张量真正的大小:torch.Size([1, 2, 190, 238])

我的主要问题是结合 Conv2d 和 MaxPool2d 并在 ConvTranspose2d 中设置正确的参数值。因此，我分别使用 MaxPool2d 的 Upsample 函数和 ConvTranspose2d 仅用于 Conv2d。但是我还是有点不对称，我真的不知道为什么。

感谢您的帮助!

最佳答案

有两个问题。

首先是填充不足:使用 kernel_size=5，每次应用卷积时，您的卷积都会将图像缩小 4(每边 2 个像素)，因此您需要 padding=2，而不只是 1，在所有地方。

其次是“不均匀”的输入大小。我的意思是，一旦你的卷积被适本地填充，你就会剩下下采样操作，它在每个点都试图将你的图像分辨率分成两半。当他们失败时，他们只返回一个较小的结果(整数除法丢弃余数)。由于您的网络有 4 个连续的 2x 下采样操作，您需要输入的 H, W 维度是 2^4=16 的倍数。然后你实际上会得到同样形状的输出。下面是一个例子

import torch
import torch.nn as nn

class AE(nn.Module):
    def __init__(self):
        super(AE, self).__init__()

        self.encoder = nn.Sequential(
            # conv 1
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # conv 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # conv 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # conv 4
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # conv 5
            nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(1024),
            nn.ReLU()
        )

        self.decoder = nn.Sequential(
            # conv 6
            nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(512),
            nn.ReLU(),

            # conv 7
            nn.Upsample(scale_factor=2, mode='bilinear'),
            nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),

            # conv 8
            nn.Upsample(scale_factor=2, mode='bilinear'),
            nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            # conv 9
            nn.Upsample(scale_factor=2, mode='bilinear'),
            nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            # conv 10 out
            nn.Upsample(scale_factor=2, mode='bilinear'),
            nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=5, stride=1, padding=2),
            nn.Softmax()    # multi-class classification
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

input = torch.randn(1, 3, 6*16, 7*16)
output = AE()(input)
print(input.shape)
print(output.shape)

关于python - 为什么我的完全卷积自动编码器不对称？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/58198305/

python - 为什么我的完全卷积自动编码器不对称？

上一篇：python - 如何通过跳过 NaN 从数组/数据帧中获取第一个和最后一个数字对

下一篇：python - 哪个版本的 TensorFlow.js 与在 TensorFlow 1.12.0 (Python) 中训练的模型兼容？