环境:PyTorch-1.7.1
问题描述:LeNet在MNIST上的loss不收敛,训练集和测试集准确率始终在10%上下。不论如何调整learning rate都无济于事。
解决方案:将激活函数由Sigmoid替换为ReLU即可。Sigmoid容易发生梯度消失,导致网络不收敛。修改后的代码如下:
import argparse
import os
import torch
import torch.nn as nn
import torchvision
parser = argparse.ArgumentParser(description='VT-attack in PyTorch')
parser.add_argument('--epoch', default=20, type=int, help='training epoch')
parser.add_argument('--batch_size', default=2560, type=int, help='training epoch')
parser.add_argument('--device', default='0', type=str, help='gpu device')
parser.add_argument('--use_gpu', default=True, type=bool, help='')
args = parser.parse_args()
class Reshape(torch.nn.Module):
'''
Reshape module for MNIST
'''
def forward(self, x):
return x.view(-1, 1, 28, 28)
def evaluate(net, test_set):
net.eval()
if args.use_gpu == True:
net = net.cuda()
test_loader = torch.utils.data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False,
num_workers=4)
correct = 0
for i, (x, y) in enumerate(test_loader):
if args.use_gpu == True:
x, y = x.cuda(), y.cuda()
output = net(x)
pred_top1 = output.topk(k=1, largest=True).indices
if pred_top1.dim() >= 2:
pred_top1 = pred_top1.squeeze()
correct += (pred_top1 == y).sum().item()
return correct / len(test_set)
def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
nn.init.xavier_uniform_(m.weight)
def train(net, train_loader, test_set):
net.apply(init_weights)
if args.use_gpu == True:
net = net.cuda()
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6)
for epoch in range(args.epoch):
loss_epoch = 0.0
net.train()
for i, (x, y) in enumerate(train_loader):
if args.use_gpu == True:
x, y = x.cuda(), y.cuda()
output = net(x)
loss = criterion(output, y)
loss_epoch += loss.cpu().item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
scheduler.step()
acc = evaluate(net, test_set)
print("epoch: %d, loss: %.2f, accuracy: %.4f" % (epoch, loss_epoch, acc))
def main():
net = torch.nn.Sequential(
Reshape(), nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.ReLU(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(),
nn.AvgPool2d(kernel_size=2, stride=2), nn.Flatten(),
nn.Linear(16 * 5 * 5, 120), nn.ReLU(),
nn.Linear(120, 84), nn.ReLU(),
nn.Linear(84, 10)
)
'''print out the shape of outputs of each layer'''
X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)
for layer in net:
X = layer(X)
print(layer.__class__.__name__, "output shape: \t", X.shape)
transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
])
train_set = torchvision.datasets.MNIST(root="./data", train=True, transform=transform,
download=True)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True,
num_workers=4)
test_set = torchvision.datasets.MNIST(root="./data", train=False, transform=transform,
download=True)
train(net, train_loader, test_set)
return
if __name__ == "__main__":
os.environ["CUDA_VISIBLE_DEVICES"] = args.device
main()
更多推荐
记录一次激活函数导致的LeNet不收敛
发布评论