我正在开发一个全卷积自动编码器,它采用 3 个 channel 作为输入并输出 2 个 channel (输入:LAB,输出:AB)。因为输出应该与输入大小相同,所以我使用全卷积。
代码:
import torch.nn as nn
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.encoder = nn.Sequential(
# conv 1
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 2
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 3
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 4
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 5
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(1024),
nn.ReLU()
)
self.decoder = nn.Sequential(
# conv 6
nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
# conv 7
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
# conv 8
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
# conv 9
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
# conv 10 out
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=5, stride=1, padding=1),
nn.Softmax() # multi-class classification
# TODO softmax deprecated
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
输出张量的大小应该是:torch.Size([1, 2, 199, 253])
输出张量真正的大小:torch.Size([1, 2, 190, 238])
我的主要问题是结合 Conv2d 和 MaxPool2d 并在 ConvTranspose2d 中设置正确的参数值。因此,我分别使用 MaxPool2d 的 Upsample 函数和 ConvTranspose2d 仅用于 Conv2d。但是我还是有点不对称,我真的不知道为什么。
感谢您的帮助!
最佳答案
有两个问题。
首先是填充不足:使用 kernel_size=5
,每次应用卷积时,您的卷积都会将图像缩小 4(每边 2 个像素),因此您需要 padding=2
,而不只是 1,在所有地方。
其次是“不均匀”的输入大小。我的意思是,一旦你的卷积被适本地填充,你就会剩下下采样操作,它在每个点都试图将你的图像分辨率分成两半。当他们失败时,他们只返回一个较小的结果(整数除法丢弃余数)。由于您的网络有 4 个连续的 2x 下采样操作,您需要输入的 H, W
维度是 2^4=16
的倍数。然后你实际上会得到同样形状的输出。下面是一个例子
import torch
import torch.nn as nn
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.encoder = nn.Sequential(
# conv 1
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 2
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 3
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 4
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 5
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(1024),
nn.ReLU()
)
self.decoder = nn.Sequential(
# conv 6
nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(512),
nn.ReLU(),
# conv 7
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(256),
nn.ReLU(),
# conv 8
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(128),
nn.ReLU(),
# conv 9
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(64),
nn.ReLU(),
# conv 10 out
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=5, stride=1, padding=2),
nn.Softmax() # multi-class classification
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
input = torch.randn(1, 3, 6*16, 7*16)
output = AE()(input)
print(input.shape)
print(output.shape)
关于python - 为什么我的完全卷积自动编码器不对称?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58198305/